{ "cells": [ { "cell_type": "markdown", "id": "01a17c2e", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Strings and Custom Functions in Pandas\n", "\n", "Feng Li\n", "\n", "School of Statistics and Mathematics\n", "\n", "Central University of Finance and Economics\n", "\n", "[feng.li@cufe.edu.cn](mailto:feng.li@cufe.edu.cn)\n", "\n", "[https://feng.li/python](https://feng.li/python)" ] }, { "cell_type": "markdown", "id": "funny-reputation", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "\n", "# Working with text columns\n", "\n", "## String methods in pandas" ] }, { "cell_type": "code", "execution_count": 1, "id": "09ffba64", "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "import pandas as pd\n", "ledger_df = pd.read_excel('data/Q1Sales.xlsx') # read the first sheet" ] }, { "cell_type": "code", "execution_count": 2, "id": "median-craft", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 SHOPPE.COM\n", "1 WALCART\n", "2 BULLSEYE\n", "3 BULLSEYE\n", "4 BULLSEYE\n", " ... \n", "14049 BULLSEYE\n", "14050 BULLSEYE\n", "14051 UNDERSTOCK.COM\n", "14052 IBAY.COM\n", "14053 UNDERSTOCK.COM\n", "Name: Channel, Length: 14054, dtype: object" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['Channel'].str.upper()" ] }, { "cell_type": "code", "execution_count": 4, "id": "interstate-deficit", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 Cannon Water Bomb Balloons 100 Pack\n", "1 LEGO Ninja Turtles Stealth Shell in Pursuit 79102\n", "2 NaN\n", "3 Transformers Age of Extinction Generations Del...\n", "4 Transformers Age of Extinction Generations Del...\n", " ... \n", "14049 AC Adapter/Power Supply&Cord for Lenovo 3000 G...\n", "14050 Cisco Systems Gigabit VPN Router (RV320K9NA)\n", "14051 Philips AJ3116M/37 Digital Tuning Clock Radio ...\n", "14052 NaN\n", "14053 Sirius Satellite Radio XADH2 Home Access Kit f...\n", "Name: Product Name, Length: 14054, dtype: object" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['Product Name']" ] }, { "cell_type": "code", "execution_count": 5, "id": "matched-syndication", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 CANNON WATER BOMB BALLOONS 100 PACK\n", "1 LEGO NINJA TURTLES STEALTH SHELL IN PURSUIT 79102\n", "2 NaN\n", "3 TRANSFORMERS AGE OF EXTINCTION GENERATIONS DEL...\n", "4 TRANSFORMERS AGE OF EXTINCTION GENERATIONS DEL...\n", " ... \n", "14049 AC ADAPTER/POWER SUPPLY&CORD FOR LENOVO 3000 G...\n", "14050 CISCO SYSTEMS GIGABIT VPN ROUTER (RV320K9NA)\n", "14051 PHILIPS AJ3116M/37 DIGITAL TUNING CLOCK RADIO ...\n", "14052 NaN\n", "14053 SIRIUS SATELLITE RADIO XADH2 HOME ACCESS KIT F...\n", "Name: Product Name, Length: 14054, dtype: object" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['Product Name'].str.upper()" ] }, { "cell_type": "code", "execution_count": 6, "id": "italian-acrobat", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 False\n", "1 True\n", "2 NaN\n", "3 False\n", "4 False\n", " ... \n", "14049 False\n", "14050 False\n", "14051 False\n", "14052 NaN\n", "14053 False\n", "Name: Product Name, Length: 14054, dtype: object" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['Product Name'].str.contains('LEGO')" ] }, { "cell_type": "code", "execution_count": 8, "id": "possible-hypothesis", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", " | InvoiceNo | \n", "Channel | \n", "Product Name | \n", "ProductID | \n", "Account | \n", "AccountNo | \n", "Date | \n", "Deadline | \n", "Currency | \n", "Unit Price | \n", "Quantity | \n", "Total | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
1 | \n", "1533 | \n", "Walcart | \n", "LEGO Ninja Turtles Stealth Shell in Pursuit 79102 | \n", "T&G/LEG-37777 | \n", "Sales | \n", "5004 | \n", "2020-01-01 | \n", "06/15/20 | \n", "USD | \n", "6.70 | \n", "1 | \n", "6.70 | \n", "
43 | \n", "1575 | \n", "iBay.com | \n", "LEGO Star Wars Clone Troopers vs Droidekas 75000 | \n", "T&G/LEG-16040 | \n", "Sales | \n", "5004 | \n", "2020-01-01 | \n", "3-22-20 | \n", "USD | \n", "14.68 | \n", "2 | \n", "29.36 | \n", "
105 | \n", "1637 | \n", "Bullseye | \n", "LEGO LOTR 79006 The Council of Elrond | \n", "T&G/LEG-76682 | \n", "Sales | \n", "5004 | \n", "2020-01-01 | \n", "2-19-20 | \n", "USD | \n", "7.67 | \n", "6 | \n", "46.02 | \n", "
176 | \n", "1708 | \n", "Shoppe.com | \n", "LEGO City Fire Chief Car 60001 | \n", "T&G/LEG-89613 | \n", "Sales | \n", "5004 | \n", "2020-01-01 | \n", "3-28-20 | \n", "USD | \n", "24.95 | \n", "1 | \n", "24.95 | \n", "
228 | \n", "1608 | \n", "iBay.com | \n", "LEGO Star Wars Clone Troopers vs Droidekas 75000 | \n", "T&G/LEG-16040 | \n", "Sales | \n", "5004 | \n", "2020-01-01 | \n", "3-22-20 | \n", "USD | \n", "14.68 | \n", "2 | \n", "29.36 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
13525 | \n", "15007 | \n", "Understock.com | \n", "LEGO City Trains High-speed Passenger Train 60... | \n", "T&G/LEG-51950 | \n", "Sales | \n", "5004 | \n", "2020-01-30 | \n", "4-24-20 | \n", "USD | \n", "7.41 | \n", "18 | \n", "133.38 | \n", "
13550 | \n", "15082 | \n", "Walcart | \n", "LEGO City Fire Chief Car 60001 | \n", "T&G/LEG-89613 | \n", "Sales | \n", "5004 | \n", "2020-01-31 | \n", "3-22-20 | \n", "USD | \n", "25.11 | \n", "8 | \n", "200.88 | \n", "
13731 | \n", "15131 | \n", "Walcart | \n", "LEGO City Fire Chief Car 60001 | \n", "T&G/LEG-89613 | \n", "Sales | \n", "5004 | \n", "2020-01-31 | \n", "3-22-20 | \n", "USD | \n", "25.11 | \n", "8 | \n", "200.88 | \n", "
13753 | \n", "15285 | \n", "Understock.com | \n", "LEGO Star Wars Clone Troopers vs Droidekas 75000 | \n", "T&G/LEG-16040 | \n", "Sales | \n", "5004 | \n", "2020-01-31 | \n", "7-28-20 | \n", "USD | \n", "14.84 | \n", "2 | \n", "29.68 | \n", "
14031 | \n", "15468 | \n", "Understock.com | \n", "LEGO Star Wars Clone Troopers vs Droidekas 75000 | \n", "T&G/LEG-16040 | \n", "Sales | \n", "5004 | \n", "2020-01-31 | \n", "7-28-20 | \n", "USD | \n", "14.84 | \n", "2 | \n", "29.68 | \n", "
200 rows × 12 columns
\n", "\n", " | Product Name | \n", "ProductID | \n", "Unit Price | \n", "Quantity | \n", "Total | \n", "
---|---|---|---|---|---|
0 | \n", "Cannon Water Bomb Balloons 100 Pack | \n", "T&G/CAN-97509 | \n", "20.11 | \n", "14 | \n", "281.54 | \n", "
1 | \n", "LEGO Ninja Turtles Stealth Shell in Pursuit 79102 | \n", "T&G/LEG-37777 | \n", "6.70 | \n", "1 | \n", "6.70 | \n", "
2 | \n", "NaN | \n", "T&G/PET-14209 | \n", "11.67 | \n", "5 | \n", "58.35 | \n", "
3 | \n", "Transformers Age of Extinction Generations Del... | \n", "T&G/TRA-20170 | \n", "13.46 | \n", "6 | \n", "80.76 | \n", "
4 | \n", "Transformers Age of Extinction Generations Del... | \n", "T&G/TRA-20170 | \n", "13.46 | \n", "6 | \n", "80.76 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
14049 | \n", "AC Adapter/Power Supply&Cord for Lenovo 3000 G... | \n", "E/AC-63975 | \n", "28.72 | \n", "8 | \n", "229.76 | \n", "
14050 | \n", "Cisco Systems Gigabit VPN Router (RV320K9NA) | \n", "E/CIS-74992 | \n", "33.39 | \n", "1 | \n", "33.39 | \n", "
14051 | \n", "Philips AJ3116M/37 Digital Tuning Clock Radio ... | \n", "E/PHI-08100 | \n", "4.18 | \n", "1 | \n", "4.18 | \n", "
14052 | \n", "NaN | \n", "E/POL-61164 | \n", "4.78 | \n", "25 | \n", "119.50 | \n", "
14053 | \n", "Sirius Satellite Radio XADH2 Home Access Kit f... | \n", "E/SIR-83381 | \n", "33.16 | \n", "2 | \n", "66.32 | \n", "
14054 rows × 5 columns
\n", "\n", " | 0 | \n", "1 | \n", "
---|---|---|
0 | \n", "T&G | \n", "CAN-97509 | \n", "
1 | \n", "T&G | \n", "LEG-37777 | \n", "
2 | \n", "T&G | \n", "PET-14209 | \n", "
3 | \n", "T&G | \n", "TRA-20170 | \n", "
4 | \n", "T&G | \n", "TRA-20170 | \n", "
... | \n", "... | \n", "... | \n", "
14049 | \n", "E | \n", "AC-63975 | \n", "
14050 | \n", "E | \n", "CIS-74992 | \n", "
14051 | \n", "E | \n", "PHI-08100 | \n", "
14052 | \n", "E | \n", "POL-61164 | \n", "
14053 | \n", "E | \n", "SIR-83381 | \n", "
14054 rows × 2 columns
\n", "\n", " | InvoiceNo | \n", "Channel | \n", "Product Name | \n", "ProductID | \n", "Account | \n", "AccountNo | \n", "Date | \n", "Deadline | \n", "Currency | \n", "Unit Price | \n", "Quantity | \n", "Total | \n", "CategoryID | \n", "ItemID | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1532 | \n", "Shoppe.com | \n", "Cannon Water Bomb Balloons 100 Pack | \n", "T&G/CAN-97509 | \n", "Sales | \n", "5004 | \n", "2020-01-01 | \n", "11/23/19 | \n", "USD | \n", "20.11 | \n", "14 | \n", "281.54 | \n", "T&G | \n", "CAN-97509 | \n", "
1 | \n", "1533 | \n", "Walcart | \n", "LEGO Ninja Turtles Stealth Shell in Pursuit 79102 | \n", "T&G/LEG-37777 | \n", "Sales | \n", "5004 | \n", "2020-01-01 | \n", "06/15/20 | \n", "USD | \n", "6.70 | \n", "1 | \n", "6.70 | \n", "T&G | \n", "LEG-37777 | \n", "
2 | \n", "1534 | \n", "Bullseye | \n", "NaN | \n", "T&G/PET-14209 | \n", "Sales | \n", "5004 | \n", "2020-01-01 | \n", "05/07/20 | \n", "USD | \n", "11.67 | \n", "5 | \n", "58.35 | \n", "T&G | \n", "PET-14209 | \n", "
3 | \n", "1535 | \n", "Bullseye | \n", "Transformers Age of Extinction Generations Del... | \n", "T&G/TRA-20170 | \n", "Sales | \n", "5004 | \n", "2020-01-01 | \n", "12/22/19 | \n", "USD | \n", "13.46 | \n", "6 | \n", "80.76 | \n", "T&G | \n", "TRA-20170 | \n", "
4 | \n", "1535 | \n", "Bullseye | \n", "Transformers Age of Extinction Generations Del... | \n", "T&G/TRA-20170 | \n", "Sales | \n", "5004 | \n", "2020-01-01 | \n", "12/22/19 | \n", "USD | \n", "13.46 | \n", "6 | \n", "80.76 | \n", "T&G | \n", "TRA-20170 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
14049 | \n", "15581 | \n", "Bullseye | \n", "AC Adapter/Power Supply&Cord for Lenovo 3000 G... | \n", "E/AC-63975 | \n", "Sales | \n", "5004 | \n", "2020-01-31 | \n", "February 23 2020 | \n", "USD | \n", "28.72 | \n", "8 | \n", "229.76 | \n", "E | \n", "AC-63975 | \n", "
14050 | \n", "15582 | \n", "Bullseye | \n", "Cisco Systems Gigabit VPN Router (RV320K9NA) | \n", "E/CIS-74992 | \n", "Sales | \n", "5004 | \n", "2020-01-31 | \n", "January 21 2020 | \n", "USD | \n", "33.39 | \n", "1 | \n", "33.39 | \n", "E | \n", "CIS-74992 | \n", "
14051 | \n", "15583 | \n", "Understock.com | \n", "Philips AJ3116M/37 Digital Tuning Clock Radio ... | \n", "E/PHI-08100 | \n", "Sales | \n", "5004 | \n", "2020-01-31 | \n", "March 22 2020 | \n", "USD | \n", "4.18 | \n", "1 | \n", "4.18 | \n", "E | \n", "PHI-08100 | \n", "
14052 | \n", "15584 | \n", "iBay.com | \n", "NaN | \n", "E/POL-61164 | \n", "Sales | \n", "5004 | \n", "2020-01-31 | \n", "June 25 2020 | \n", "USD | \n", "4.78 | \n", "25 | \n", "119.50 | \n", "E | \n", "POL-61164 | \n", "
14053 | \n", "15585 | \n", "Understock.com | \n", "Sirius Satellite Radio XADH2 Home Access Kit f... | \n", "E/SIR-83381 | \n", "Sales | \n", "5004 | \n", "2020-01-31 | \n", "February 01 2020 | \n", "USD | \n", "33.16 | \n", "2 | \n", "66.32 | \n", "E | \n", "SIR-83381 | \n", "
14054 rows × 14 columns
\n", "\n", " | ProductID | \n", "Product Name | \n", "Total | \n", "
---|---|---|---|
66 | \n", "C&P/KID-94587 | \n", "Kidz Digital Camera | \n", "275.64 | \n", "
117 | \n", "C&P/KOD-01305 | \n", "Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit... | \n", "64.1 | \n", "
154 | \n", "C&P/FOS-95687 | \n", "Foscam FI8910W White Wireless IP Cameras 2-pack | \n", "669.2 | \n", "
265 | \n", "C&P/Q-S-31839 | \n", "Q-See QSC414D Outdoor Dome Color CCD Camera wi... | \n", "7.82 | \n", "
287 | \n", "C&P/KOD-01305 | \n", "Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit... | \n", "64.1 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
13916 | \n", "C&P/NEE-31972 | \n", "NEEWER® 10x25 Zoom LCD Binoculars Built-in Dig... | \n", "37.68 | \n", "
13950 | \n", "T&G/LIG-86589 | \n", "Lights,Camera,Action Decor | \n", "23.02 | \n", "
13954 | \n", "C&P/DAH-04621 | \n", "Dahua IPC-HFW2100 S 1.3MP Weatherproof HD IP S... | \n", "126.0 | \n", "
13986 | \n", "C&P/NIK-92147 | \n", "Nikon D3100 14.2MP Digital SLR Camera with 18-... | \n", "54.1 | \n", "
13996 | \n", "C&P/SEC-57209 | \n", "Securityman Wi-Fi Interference Free Wireless O... | \n", "17.44 | \n", "
443 rows × 3 columns
\n", "\n", " | ProductID | \n", "Product Name | \n", "Total | \n", "
---|---|---|---|
117 | \n", "C&P/KOD-01305 | \n", "Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit... | \n", "64.1 | \n", "
287 | \n", "C&P/KOD-01305 | \n", "Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit... | \n", "64.1 | \n", "
616 | \n", "C&P/KOD-32137 | \n", "Kodak EasyShare Z990 12 MP Digital Camera with... | \n", "166.3 | \n", "
2151 | \n", "C&P/CAN-50721 | \n", "Canon PowerShot SX50 HS 12MP Digital Camera wi... | \n", "6.45 | \n", "
2280 | \n", "C&P/CAN-50721 | \n", "Canon PowerShot SX50 HS 12MP Digital Camera wi... | \n", "6.45 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
12459 | \n", "C&P/CAN-12514 | \n", "Canon EOS Rebel T2i DSLR Camera (Body Only) | \n", "35.4 | \n", "
12522 | \n", "C&P/CAN-12514 | \n", "Canon EOS Rebel T2i DSLR Camera (Body Only) | \n", "35.4 | \n", "
12905 | \n", "C&P/KOD-32137 | \n", "Kodak EasyShare Z990 12 MP Digital Camera with... | \n", "332.6 | \n", "
13594 | \n", "C&P/NIK-92147 | \n", "Nikon D3100 14.2MP Digital SLR Camera with 18-... | \n", "54.1 | \n", "
13986 | \n", "C&P/NIK-92147 | \n", "Nikon D3100 14.2MP Digital SLR Camera with 18-... | \n", "54.1 | \n", "
77 rows × 3 columns
\n", "\n", " | ProductID | \n", "Product Name | \n", "Total | \n", "
---|---|---|---|
117 | \n", "C&P/KOD-01305 | \n", "Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit... | \n", "64.1 | \n", "
287 | \n", "C&P/KOD-01305 | \n", "Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit... | \n", "64.1 | \n", "
616 | \n", "C&P/KOD-32137 | \n", "Kodak EasyShare Z990 12 MP Digital Camera with... | \n", "166.3 | \n", "
2151 | \n", "C&P/CAN-50721 | \n", "Canon PowerShot SX50 HS 12MP Digital Camera wi... | \n", "6.45 | \n", "
2280 | \n", "C&P/CAN-50721 | \n", "Canon PowerShot SX50 HS 12MP Digital Camera wi... | \n", "6.45 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "
12459 | \n", "C&P/CAN-12514 | \n", "Canon EOS Rebel T2i DSLR Camera (Body Only) | \n", "35.4 | \n", "
12522 | \n", "C&P/CAN-12514 | \n", "Canon EOS Rebel T2i DSLR Camera (Body Only) | \n", "35.4 | \n", "
12905 | \n", "C&P/KOD-32137 | \n", "Kodak EasyShare Z990 12 MP Digital Camera with... | \n", "332.6 | \n", "
13594 | \n", "C&P/NIK-92147 | \n", "Nikon D3100 14.2MP Digital SLR Camera with 18-... | \n", "54.1 | \n", "
13986 | \n", "C&P/NIK-92147 | \n", "Nikon D3100 14.2MP Digital SLR Camera with 18-... | \n", "54.1 | \n", "
77 rows × 3 columns
\n", "\n", " | 0 | \n", "1 | \n", "
---|---|---|
117 | \n", "Kodak | \n", "ZM1 | \n", "
287 | \n", "Kodak | \n", "ZM1 | \n", "
616 | \n", "Kodak | \n", "EasyShare | \n", "
2151 | \n", "Canon | \n", "PowerShot | \n", "
2280 | \n", "Canon | \n", "PowerShot | \n", "
... | \n", "... | \n", "... | \n", "
12459 | \n", "Canon | \n", "EOS | \n", "
12522 | \n", "Canon | \n", "EOS | \n", "
12905 | \n", "Kodak | \n", "EasyShare | \n", "
13594 | \n", "Nikon | \n", "D3100 | \n", "
13986 | \n", "Nikon | \n", "D3100 | \n", "
77 rows × 2 columns
\n", "