{ "cells": [ { "cell_type": "markdown", "id": "01a17c2e", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Strings and Custom Functions in Pandas\n", "\n", "Feng Li\n", "\n", "School of Statistics and Mathematics\n", "\n", "Central University of Finance and Economics\n", "\n", "[feng.li@cufe.edu.cn](mailto:feng.li@cufe.edu.cn)\n", "\n", "[https://feng.li/python](https://feng.li/python)" ] }, { "cell_type": "markdown", "id": "funny-reputation", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "\n", "# Working with text columns\n", "\n", "## String methods in pandas" ] }, { "cell_type": "code", "execution_count": 1, "id": "09ffba64", "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "import pandas as pd\n", "ledger_df = pd.read_excel('data/Q1Sales.xlsx') # read the first sheet" ] }, { "cell_type": "code", "execution_count": 2, "id": "median-craft", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 SHOPPE.COM\n", "1 WALCART\n", "2 BULLSEYE\n", "3 BULLSEYE\n", "4 BULLSEYE\n", " ... \n", "14049 BULLSEYE\n", "14050 BULLSEYE\n", "14051 UNDERSTOCK.COM\n", "14052 IBAY.COM\n", "14053 UNDERSTOCK.COM\n", "Name: Channel, Length: 14054, dtype: object" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['Channel'].str.upper()" ] }, { "cell_type": "code", "execution_count": 4, "id": "interstate-deficit", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 Cannon Water Bomb Balloons 100 Pack\n", "1 LEGO Ninja Turtles Stealth Shell in Pursuit 79102\n", "2 NaN\n", "3 Transformers Age of Extinction Generations Del...\n", "4 Transformers Age of Extinction Generations Del...\n", " ... \n", "14049 AC Adapter/Power Supply&Cord for Lenovo 3000 G...\n", "14050 Cisco Systems Gigabit VPN Router (RV320K9NA)\n", "14051 Philips AJ3116M/37 Digital Tuning Clock Radio ...\n", "14052 NaN\n", "14053 Sirius Satellite Radio XADH2 Home Access Kit f...\n", "Name: Product Name, Length: 14054, dtype: object" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['Product Name']" ] }, { "cell_type": "code", "execution_count": 5, "id": "matched-syndication", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 CANNON WATER BOMB BALLOONS 100 PACK\n", "1 LEGO NINJA TURTLES STEALTH SHELL IN PURSUIT 79102\n", "2 NaN\n", "3 TRANSFORMERS AGE OF EXTINCTION GENERATIONS DEL...\n", "4 TRANSFORMERS AGE OF EXTINCTION GENERATIONS DEL...\n", " ... \n", "14049 AC ADAPTER/POWER SUPPLY&CORD FOR LENOVO 3000 G...\n", "14050 CISCO SYSTEMS GIGABIT VPN ROUTER (RV320K9NA)\n", "14051 PHILIPS AJ3116M/37 DIGITAL TUNING CLOCK RADIO ...\n", "14052 NaN\n", "14053 SIRIUS SATELLITE RADIO XADH2 HOME ACCESS KIT F...\n", "Name: Product Name, Length: 14054, dtype: object" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['Product Name'].str.upper()" ] }, { "cell_type": "code", "execution_count": 6, "id": "italian-acrobat", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 False\n", "1 True\n", "2 NaN\n", "3 False\n", "4 False\n", " ... \n", "14049 False\n", "14050 False\n", "14051 False\n", "14052 NaN\n", "14053 False\n", "Name: Product Name, Length: 14054, dtype: object" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['Product Name'].str.contains('LEGO')" ] }, { "cell_type": "code", "execution_count": 8, "id": "possible-hypothesis", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
InvoiceNoChannelProduct NameProductIDAccountAccountNoDateDeadlineCurrencyUnit PriceQuantityTotal
11533WalcartLEGO Ninja Turtles Stealth Shell in Pursuit 79102T&G/LEG-37777Sales50042020-01-0106/15/20USD6.7016.70
431575iBay.comLEGO Star Wars Clone Troopers vs Droidekas 75000T&G/LEG-16040Sales50042020-01-013-22-20USD14.68229.36
1051637BullseyeLEGO LOTR 79006 The Council of ElrondT&G/LEG-76682Sales50042020-01-012-19-20USD7.67646.02
1761708Shoppe.comLEGO City Fire Chief Car 60001T&G/LEG-89613Sales50042020-01-013-28-20USD24.95124.95
2281608iBay.comLEGO Star Wars Clone Troopers vs Droidekas 75000T&G/LEG-16040Sales50042020-01-013-22-20USD14.68229.36
.......................................
1352515007Understock.comLEGO City Trains High-speed Passenger Train 60...T&G/LEG-51950Sales50042020-01-304-24-20USD7.4118133.38
1355015082WalcartLEGO City Fire Chief Car 60001T&G/LEG-89613Sales50042020-01-313-22-20USD25.118200.88
1373115131WalcartLEGO City Fire Chief Car 60001T&G/LEG-89613Sales50042020-01-313-22-20USD25.118200.88
1375315285Understock.comLEGO Star Wars Clone Troopers vs Droidekas 75000T&G/LEG-16040Sales50042020-01-317-28-20USD14.84229.68
1403115468Understock.comLEGO Star Wars Clone Troopers vs Droidekas 75000T&G/LEG-16040Sales50042020-01-317-28-20USD14.84229.68
\n", "

200 rows × 12 columns

\n", "
" ], "text/plain": [ " InvoiceNo Channel \\\n", "1 1533 Walcart \n", "43 1575 iBay.com \n", "105 1637 Bullseye \n", "176 1708 Shoppe.com \n", "228 1608 iBay.com \n", "... ... ... \n", "13525 15007 Understock.com \n", "13550 15082 Walcart \n", "13731 15131 Walcart \n", "13753 15285 Understock.com \n", "14031 15468 Understock.com \n", "\n", " Product Name ProductID \\\n", "1 LEGO Ninja Turtles Stealth Shell in Pursuit 79102 T&G/LEG-37777 \n", "43 LEGO Star Wars Clone Troopers vs Droidekas 75000 T&G/LEG-16040 \n", "105 LEGO LOTR 79006 The Council of Elrond T&G/LEG-76682 \n", "176 LEGO City Fire Chief Car 60001 T&G/LEG-89613 \n", "228 LEGO Star Wars Clone Troopers vs Droidekas 75000 T&G/LEG-16040 \n", "... ... ... \n", "13525 LEGO City Trains High-speed Passenger Train 60... T&G/LEG-51950 \n", "13550 LEGO City Fire Chief Car 60001 T&G/LEG-89613 \n", "13731 LEGO City Fire Chief Car 60001 T&G/LEG-89613 \n", "13753 LEGO Star Wars Clone Troopers vs Droidekas 75000 T&G/LEG-16040 \n", "14031 LEGO Star Wars Clone Troopers vs Droidekas 75000 T&G/LEG-16040 \n", "\n", " Account AccountNo Date Deadline Currency Unit Price Quantity \\\n", "1 Sales 5004 2020-01-01 06/15/20 USD 6.70 1 \n", "43 Sales 5004 2020-01-01 3-22-20 USD 14.68 2 \n", "105 Sales 5004 2020-01-01 2-19-20 USD 7.67 6 \n", "176 Sales 5004 2020-01-01 3-28-20 USD 24.95 1 \n", "228 Sales 5004 2020-01-01 3-22-20 USD 14.68 2 \n", "... ... ... ... ... ... ... ... \n", "13525 Sales 5004 2020-01-30 4-24-20 USD 7.41 18 \n", "13550 Sales 5004 2020-01-31 3-22-20 USD 25.11 8 \n", "13731 Sales 5004 2020-01-31 3-22-20 USD 25.11 8 \n", "13753 Sales 5004 2020-01-31 7-28-20 USD 14.84 2 \n", "14031 Sales 5004 2020-01-31 7-28-20 USD 14.84 2 \n", "\n", " Total \n", "1 6.70 \n", "43 29.36 \n", "105 46.02 \n", "176 24.95 \n", "228 29.36 \n", "... ... \n", "13525 133.38 \n", "13550 200.88 \n", "13731 200.88 \n", "13753 29.68 \n", "14031 29.68 \n", "\n", "[200 rows x 12 columns]" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df[ledger_df['Product Name'].str.contains('LEGO').fillna(False)]" ] }, { "cell_type": "markdown", "id": "partial-flour", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Replacing parts of text" ] }, { "cell_type": "code", "execution_count": 9, "id": "compact-elevation", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_34612/3620803427.py:1: FutureWarning: The default value of regex will change from True to False in a future version.\n", " ledger_df['Channel'].str.replace('.com', '')\n" ] }, { "data": { "text/plain": [ "0 Shoppe\n", "1 Walcart\n", "2 Bullseye\n", "3 Bullseye\n", "4 Bullseye\n", " ... \n", "14049 Bullseye\n", "14050 Bullseye\n", "14051 Understock\n", "14052 iBay\n", "14053 Understock\n", "Name: Channel, Length: 14054, dtype: object" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['Channel'].str.replace('.com', '')" ] }, { "cell_type": "code", "execution_count": 10, "id": "collectible-defensive", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 Shoppe.com\n", "1 Walcart\n", "2 Bullseye\n", "3 Bullseye\n", "4 Bullseye\n", " ... \n", "14049 Bullseye\n", "14050 Bullseye\n", "14051 Understock.com\n", "14052 Anazon.com\n", "14053 Understock.com\n", "Name: Channel, Length: 14054, dtype: object" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['Channel'].replace('iBay.com', 'Anazon.com')" ] }, { "cell_type": "markdown", "id": "sensitive-shame", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Splitting text values into multiple columns" ] }, { "cell_type": "code", "execution_count": 11, "id": "systematic-compatibility", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Product NameProductIDUnit PriceQuantityTotal
0Cannon Water Bomb Balloons 100 PackT&G/CAN-9750920.1114281.54
1LEGO Ninja Turtles Stealth Shell in Pursuit 79102T&G/LEG-377776.7016.70
2NaNT&G/PET-1420911.67558.35
3Transformers Age of Extinction Generations Del...T&G/TRA-2017013.46680.76
4Transformers Age of Extinction Generations Del...T&G/TRA-2017013.46680.76
..................
14049AC Adapter/Power Supply&Cord for Lenovo 3000 G...E/AC-6397528.728229.76
14050Cisco Systems Gigabit VPN Router (RV320K9NA)E/CIS-7499233.39133.39
14051Philips AJ3116M/37 Digital Tuning Clock Radio ...E/PHI-081004.1814.18
14052NaNE/POL-611644.7825119.50
14053Sirius Satellite Radio XADH2 Home Access Kit f...E/SIR-8338133.16266.32
\n", "

14054 rows × 5 columns

\n", "
" ], "text/plain": [ " Product Name ProductID \\\n", "0 Cannon Water Bomb Balloons 100 Pack T&G/CAN-97509 \n", "1 LEGO Ninja Turtles Stealth Shell in Pursuit 79102 T&G/LEG-37777 \n", "2 NaN T&G/PET-14209 \n", "3 Transformers Age of Extinction Generations Del... T&G/TRA-20170 \n", "4 Transformers Age of Extinction Generations Del... T&G/TRA-20170 \n", "... ... ... \n", "14049 AC Adapter/Power Supply&Cord for Lenovo 3000 G... E/AC-63975 \n", "14050 Cisco Systems Gigabit VPN Router (RV320K9NA) E/CIS-74992 \n", "14051 Philips AJ3116M/37 Digital Tuning Clock Radio ... E/PHI-08100 \n", "14052 NaN E/POL-61164 \n", "14053 Sirius Satellite Radio XADH2 Home Access Kit f... E/SIR-83381 \n", "\n", " Unit Price Quantity Total \n", "0 20.11 14 281.54 \n", "1 6.70 1 6.70 \n", "2 11.67 5 58.35 \n", "3 13.46 6 80.76 \n", "4 13.46 6 80.76 \n", "... ... ... ... \n", "14049 28.72 8 229.76 \n", "14050 33.39 1 33.39 \n", "14051 4.18 1 4.18 \n", "14052 4.78 25 119.50 \n", "14053 33.16 2 66.32 \n", "\n", "[14054 rows x 5 columns]" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df[['Product Name', 'ProductID', 'Unit Price', 'Quantity', 'Total']]" ] }, { "cell_type": "code", "execution_count": 12, "id": "foster-wings", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 [T&G, CAN-97509]\n", "1 [T&G, LEG-37777]\n", "2 [T&G, PET-14209]\n", "3 [T&G, TRA-20170]\n", "4 [T&G, TRA-20170]\n", " ... \n", "14049 [E, AC-63975]\n", "14050 [E, CIS-74992]\n", "14051 [E, PHI-08100]\n", "14052 [E, POL-61164]\n", "14053 [E, SIR-83381]\n", "Name: ProductID, Length: 14054, dtype: object" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['ProductID'].str.split('/')" ] }, { "cell_type": "code", "execution_count": 13, "id": "logical-missouri", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
0T&GCAN-97509
1T&GLEG-37777
2T&GPET-14209
3T&GTRA-20170
4T&GTRA-20170
.........
14049EAC-63975
14050ECIS-74992
14051EPHI-08100
14052EPOL-61164
14053ESIR-83381
\n", "

14054 rows × 2 columns

\n", "
" ], "text/plain": [ " 0 1\n", "0 T&G CAN-97509\n", "1 T&G LEG-37777\n", "2 T&G PET-14209\n", "3 T&G TRA-20170\n", "4 T&G TRA-20170\n", "... ... ...\n", "14049 E AC-63975\n", "14050 E CIS-74992\n", "14051 E PHI-08100\n", "14052 E POL-61164\n", "14053 E SIR-83381\n", "\n", "[14054 rows x 2 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['ProductID'].str.split('/', expand=True)" ] }, { "cell_type": "code", "execution_count": 14, "id": "satellite-kingdom", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
InvoiceNoChannelProduct NameProductIDAccountAccountNoDateDeadlineCurrencyUnit PriceQuantityTotalCategoryIDItemID
01532Shoppe.comCannon Water Bomb Balloons 100 PackT&G/CAN-97509Sales50042020-01-0111/23/19USD20.1114281.54T&GCAN-97509
11533WalcartLEGO Ninja Turtles Stealth Shell in Pursuit 79102T&G/LEG-37777Sales50042020-01-0106/15/20USD6.7016.70T&GLEG-37777
21534BullseyeNaNT&G/PET-14209Sales50042020-01-0105/07/20USD11.67558.35T&GPET-14209
31535BullseyeTransformers Age of Extinction Generations Del...T&G/TRA-20170Sales50042020-01-0112/22/19USD13.46680.76T&GTRA-20170
41535BullseyeTransformers Age of Extinction Generations Del...T&G/TRA-20170Sales50042020-01-0112/22/19USD13.46680.76T&GTRA-20170
.............................................
1404915581BullseyeAC Adapter/Power Supply&Cord for Lenovo 3000 G...E/AC-63975Sales50042020-01-31February 23 2020USD28.728229.76EAC-63975
1405015582BullseyeCisco Systems Gigabit VPN Router (RV320K9NA)E/CIS-74992Sales50042020-01-31January 21 2020USD33.39133.39ECIS-74992
1405115583Understock.comPhilips AJ3116M/37 Digital Tuning Clock Radio ...E/PHI-08100Sales50042020-01-31March 22 2020USD4.1814.18EPHI-08100
1405215584iBay.comNaNE/POL-61164Sales50042020-01-31June 25 2020USD4.7825119.50EPOL-61164
1405315585Understock.comSirius Satellite Radio XADH2 Home Access Kit f...E/SIR-83381Sales50042020-01-31February 01 2020USD33.16266.32ESIR-83381
\n", "

14054 rows × 14 columns

\n", "
" ], "text/plain": [ " InvoiceNo Channel \\\n", "0 1532 Shoppe.com \n", "1 1533 Walcart \n", "2 1534 Bullseye \n", "3 1535 Bullseye \n", "4 1535 Bullseye \n", "... ... ... \n", "14049 15581 Bullseye \n", "14050 15582 Bullseye \n", "14051 15583 Understock.com \n", "14052 15584 iBay.com \n", "14053 15585 Understock.com \n", "\n", " Product Name ProductID \\\n", "0 Cannon Water Bomb Balloons 100 Pack T&G/CAN-97509 \n", "1 LEGO Ninja Turtles Stealth Shell in Pursuit 79102 T&G/LEG-37777 \n", "2 NaN T&G/PET-14209 \n", "3 Transformers Age of Extinction Generations Del... T&G/TRA-20170 \n", "4 Transformers Age of Extinction Generations Del... T&G/TRA-20170 \n", "... ... ... \n", "14049 AC Adapter/Power Supply&Cord for Lenovo 3000 G... E/AC-63975 \n", "14050 Cisco Systems Gigabit VPN Router (RV320K9NA) E/CIS-74992 \n", "14051 Philips AJ3116M/37 Digital Tuning Clock Radio ... E/PHI-08100 \n", "14052 NaN E/POL-61164 \n", "14053 Sirius Satellite Radio XADH2 Home Access Kit f... E/SIR-83381 \n", "\n", " Account AccountNo Date Deadline Currency Unit Price \\\n", "0 Sales 5004 2020-01-01 11/23/19 USD 20.11 \n", "1 Sales 5004 2020-01-01 06/15/20 USD 6.70 \n", "2 Sales 5004 2020-01-01 05/07/20 USD 11.67 \n", "3 Sales 5004 2020-01-01 12/22/19 USD 13.46 \n", "4 Sales 5004 2020-01-01 12/22/19 USD 13.46 \n", "... ... ... ... ... ... ... \n", "14049 Sales 5004 2020-01-31 February 23 2020 USD 28.72 \n", "14050 Sales 5004 2020-01-31 January 21 2020 USD 33.39 \n", "14051 Sales 5004 2020-01-31 March 22 2020 USD 4.18 \n", "14052 Sales 5004 2020-01-31 June 25 2020 USD 4.78 \n", "14053 Sales 5004 2020-01-31 February 01 2020 USD 33.16 \n", "\n", " Quantity Total CategoryID ItemID \n", "0 14 281.54 T&G CAN-97509 \n", "1 1 6.70 T&G LEG-37777 \n", "2 5 58.35 T&G PET-14209 \n", "3 6 80.76 T&G TRA-20170 \n", "4 6 80.76 T&G TRA-20170 \n", "... ... ... ... ... \n", "14049 8 229.76 E AC-63975 \n", "14050 1 33.39 E CIS-74992 \n", "14051 1 4.18 E PHI-08100 \n", "14052 25 119.50 E POL-61164 \n", "14053 2 66.32 E SIR-83381 \n", "\n", "[14054 rows x 14 columns]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df[['CategoryID', 'ItemID']] = ledger_df['ProductID'].str.split('/', expand=True)\n", "\n", "ledger_df" ] }, { "cell_type": "markdown", "id": "realistic-kenya", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Concatenating text columns" ] }, { "cell_type": "code", "execution_count": 15, "id": "minus-lightning", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 Category ID is: T&G\n", "1 Category ID is: T&G\n", "2 Category ID is: T&G\n", "3 Category ID is: T&G\n", "4 Category ID is: T&G\n", " ... \n", "14049 Category ID is: E\n", "14050 Category ID is: E\n", "14051 Category ID is: E\n", "14052 Category ID is: E\n", "14053 Category ID is: E\n", "Name: CategoryID, Length: 14054, dtype: object" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "'Category ID is: ' + ledger_df['CategoryID']" ] }, { "cell_type": "code", "execution_count": 16, "id": "boring-wagon", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 T&G/CAN-97509\n", "1 T&G/LEG-37777\n", "2 T&G/PET-14209\n", "3 T&G/TRA-20170\n", "4 T&G/TRA-20170\n", " ... \n", "14049 E/AC-63975\n", "14050 E/CIS-74992\n", "14051 E/PHI-08100\n", "14052 E/POL-61164\n", "14053 E/SIR-83381\n", "Length: 14054, dtype: object" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['CategoryID'] + '/' + ledger_df['ItemID']" ] }, { "cell_type": "code", "execution_count": 21, "id": "oriented-hometown", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 1532/5004\n", "1 1533/5004\n", "2 1534/5004\n", "3 1535/5004\n", "4 1535/5004\n", " ... \n", "14049 15581/5004\n", "14050 15582/5004\n", "14051 15583/5004\n", "14052 15584/5004\n", "14053 15585/5004\n", "Length: 14054, dtype: string" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['InvoiceNo'].astype('string') + '/' + ledger_df['AccountNo'].astype('string')" ] }, { "cell_type": "markdown", "id": "unable-wagner", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## String data types in pandas" ] }, { "cell_type": "code", "execution_count": 24, "id": "turned-extension", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 14054 entries, 0 to 14053\n", "Data columns (total 12 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 InvoiceNo 14054 non-null Int64 \n", " 1 Channel 14054 non-null string \n", " 2 Product Name 12362 non-null string \n", " 3 ProductID 14054 non-null string \n", " 4 Account 14054 non-null string \n", " 5 AccountNo 14054 non-null Int64 \n", " 6 Date 14054 non-null datetime64[ns]\n", " 7 Deadline 14054 non-null string \n", " 8 Currency 14054 non-null string \n", " 9 Unit Price 14054 non-null Float64 \n", " 10 Quantity 14054 non-null Int64 \n", " 11 Total 14054 non-null Float64 \n", "dtypes: Float64(2), Int64(3), datetime64[ns](1), string(6)\n", "memory usage: 1.4 MB\n" ] } ], "source": [ "ledger_df = pd.read_excel('data/Q1Sales.xlsx').convert_dtypes()\n", "\n", "ledger_df.info()" ] }, { "cell_type": "code", "execution_count": 25, "id": "embedded-validity", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 Cannon Water Bomb Balloons 100 Pack\n", "1 LEGO Ninja Turtles Stealth Shell in Pursuit 79102\n", "2 \n", "3 Transformers Age of Extinction Generations Del...\n", "4 Transformers Age of Extinction Generations Del...\n", " ... \n", "14049 AC Adapter/Power Supply&Cord for Lenovo 3000 G...\n", "14050 Cisco Systems Gigabit VPN Router (RV320K9NA)\n", "14051 Philips AJ3116M/37 Digital Tuning Clock Radio ...\n", "14052 \n", "14053 Sirius Satellite Radio XADH2 Home Access Kit f...\n", "Name: Product Name, Length: 14054, dtype: string" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['Product Name'].astype('string')" ] }, { "cell_type": "code", "execution_count": 26, "id": "decent-parent", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 1011\n", "1 1320\n", "2 980\n", "3 645\n", "4 340\n", "dtype: object" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.Series([1011, '1320', \"980\", 645, 340])" ] }, { "cell_type": "code", "execution_count": 27, "id": "golden-humor", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 NaN\n", "1 1320\n", "2 980\n", "3 NaN\n", "4 NaN\n", "dtype: object" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.Series([1011, '1320', \"980\", 645, 340]).str.strip('')" ] }, { "cell_type": "code", "execution_count": 28, "id": "unusual-english", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 1011\n", "1 1320\n", "2 980\n", "3 645\n", "4 340\n", "dtype: string" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.Series([1011, '1320', \"980\", 645, 340]).astype('string').str.strip('')" ] }, { "cell_type": "markdown", "id": "major-sensitivity", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Overthinking: Regular expressions" ] }, { "cell_type": "code", "execution_count": 29, "id": "desperate-memory", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [], "source": [ "pattern = '(Nikon).*(Camera)'" ] }, { "cell_type": "code", "execution_count": 30, "id": "turkish-assessment", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ProductIDProduct NameTotal
66C&P/KID-94587Kidz Digital Camera275.64
117C&P/KOD-01305Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit...64.1
154C&P/FOS-95687Foscam FI8910W White Wireless IP Cameras 2-pack669.2
265C&P/Q-S-31839Q-See QSC414D Outdoor Dome Color CCD Camera wi...7.82
287C&P/KOD-01305Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit...64.1
............
13916C&P/NEE-31972NEEWER® 10x25 Zoom LCD Binoculars Built-in Dig...37.68
13950T&G/LIG-86589Lights,Camera,Action Decor23.02
13954C&P/DAH-04621Dahua IPC-HFW2100 S 1.3MP Weatherproof HD IP S...126.0
13986C&P/NIK-92147Nikon D3100 14.2MP Digital SLR Camera with 18-...54.1
13996C&P/SEC-57209Securityman Wi-Fi Interference Free Wireless O...17.44
\n", "

443 rows × 3 columns

\n", "
" ], "text/plain": [ " ProductID Product Name \\\n", "66 C&P/KID-94587 Kidz Digital Camera \n", "117 C&P/KOD-01305 Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit... \n", "154 C&P/FOS-95687 Foscam FI8910W White Wireless IP Cameras 2-pack \n", "265 C&P/Q-S-31839 Q-See QSC414D Outdoor Dome Color CCD Camera wi... \n", "287 C&P/KOD-01305 Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit... \n", "... ... ... \n", "13916 C&P/NEE-31972 NEEWER® 10x25 Zoom LCD Binoculars Built-in Dig... \n", "13950 T&G/LIG-86589 Lights,Camera,Action Decor \n", "13954 C&P/DAH-04621 Dahua IPC-HFW2100 S 1.3MP Weatherproof HD IP S... \n", "13986 C&P/NIK-92147 Nikon D3100 14.2MP Digital SLR Camera with 18-... \n", "13996 C&P/SEC-57209 Securityman Wi-Fi Interference Free Wireless O... \n", "\n", " Total \n", "66 275.64 \n", "117 64.1 \n", "154 669.2 \n", "265 7.82 \n", "287 64.1 \n", "... ... \n", "13916 37.68 \n", "13950 23.02 \n", "13954 126.0 \n", "13986 54.1 \n", "13996 17.44 \n", "\n", "[443 rows x 3 columns]" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "is_camera = ledger_df['Product Name'].str.contains('camera', case=False).fillna(False)\n", "\n", "cameras_df = ledger_df[is_camera]\n", "cameras_df = cameras_df[['ProductID', 'Product Name', 'Total']]\n", "\n", "cameras_df" ] }, { "cell_type": "code", "execution_count": 31, "id": "straight-church", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/tmp/ipykernel_34612/751983995.py:2: UserWarning: This pattern has match groups. To actually get the groups, use str.extract.\n", " cameras_df = cameras_df[cameras_df['Product Name'].str.contains(pattern)]\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ProductIDProduct NameTotal
117C&P/KOD-01305Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit...64.1
287C&P/KOD-01305Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit...64.1
616C&P/KOD-32137Kodak EasyShare Z990 12 MP Digital Camera with...166.3
2151C&P/CAN-50721Canon PowerShot SX50 HS 12MP Digital Camera wi...6.45
2280C&P/CAN-50721Canon PowerShot SX50 HS 12MP Digital Camera wi...6.45
............
12459C&P/CAN-12514Canon EOS Rebel T2i DSLR Camera (Body Only)35.4
12522C&P/CAN-12514Canon EOS Rebel T2i DSLR Camera (Body Only)35.4
12905C&P/KOD-32137Kodak EasyShare Z990 12 MP Digital Camera with...332.6
13594C&P/NIK-92147Nikon D3100 14.2MP Digital SLR Camera with 18-...54.1
13986C&P/NIK-92147Nikon D3100 14.2MP Digital SLR Camera with 18-...54.1
\n", "

77 rows × 3 columns

\n", "
" ], "text/plain": [ " ProductID Product Name Total\n", "117 C&P/KOD-01305 Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit... 64.1\n", "287 C&P/KOD-01305 Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit... 64.1\n", "616 C&P/KOD-32137 Kodak EasyShare Z990 12 MP Digital Camera with... 166.3\n", "2151 C&P/CAN-50721 Canon PowerShot SX50 HS 12MP Digital Camera wi... 6.45\n", "2280 C&P/CAN-50721 Canon PowerShot SX50 HS 12MP Digital Camera wi... 6.45\n", "... ... ... ...\n", "12459 C&P/CAN-12514 Canon EOS Rebel T2i DSLR Camera (Body Only) 35.4\n", "12522 C&P/CAN-12514 Canon EOS Rebel T2i DSLR Camera (Body Only) 35.4\n", "12905 C&P/KOD-32137 Kodak EasyShare Z990 12 MP Digital Camera with... 332.6\n", "13594 C&P/NIK-92147 Nikon D3100 14.2MP Digital SLR Camera with 18-... 54.1\n", "13986 C&P/NIK-92147 Nikon D3100 14.2MP Digital SLR Camera with 18-... 54.1\n", "\n", "[77 rows x 3 columns]" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pattern = '(Nikon|Canon|Kodak)'\n", "cameras_df = cameras_df[cameras_df['Product Name'].str.contains(pattern)]\n", "\n", "cameras_df" ] }, { "cell_type": "code", "execution_count": 32, "id": "instrumental-colony", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ProductIDProduct NameTotal
117C&P/KOD-01305Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit...64.1
287C&P/KOD-01305Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit...64.1
616C&P/KOD-32137Kodak EasyShare Z990 12 MP Digital Camera with...166.3
2151C&P/CAN-50721Canon PowerShot SX50 HS 12MP Digital Camera wi...6.45
2280C&P/CAN-50721Canon PowerShot SX50 HS 12MP Digital Camera wi...6.45
............
12459C&P/CAN-12514Canon EOS Rebel T2i DSLR Camera (Body Only)35.4
12522C&P/CAN-12514Canon EOS Rebel T2i DSLR Camera (Body Only)35.4
12905C&P/KOD-32137Kodak EasyShare Z990 12 MP Digital Camera with...332.6
13594C&P/NIK-92147Nikon D3100 14.2MP Digital SLR Camera with 18-...54.1
13986C&P/NIK-92147Nikon D3100 14.2MP Digital SLR Camera with 18-...54.1
\n", "

77 rows × 3 columns

\n", "
" ], "text/plain": [ " ProductID Product Name Total\n", "117 C&P/KOD-01305 Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit... 64.1\n", "287 C&P/KOD-01305 Kodak ZM1-NM 1 MP 1-Inch LCD CMOS Sensor Digit... 64.1\n", "616 C&P/KOD-32137 Kodak EasyShare Z990 12 MP Digital Camera with... 166.3\n", "2151 C&P/CAN-50721 Canon PowerShot SX50 HS 12MP Digital Camera wi... 6.45\n", "2280 C&P/CAN-50721 Canon PowerShot SX50 HS 12MP Digital Camera wi... 6.45\n", "... ... ... ...\n", "12459 C&P/CAN-12514 Canon EOS Rebel T2i DSLR Camera (Body Only) 35.4\n", "12522 C&P/CAN-12514 Canon EOS Rebel T2i DSLR Camera (Body Only) 35.4\n", "12905 C&P/KOD-32137 Kodak EasyShare Z990 12 MP Digital Camera with... 332.6\n", "13594 C&P/NIK-92147 Nikon D3100 14.2MP Digital SLR Camera with 18-... 54.1\n", "13986 C&P/NIK-92147 Nikon D3100 14.2MP Digital SLR Camera with 18-... 54.1\n", "\n", "[77 rows x 3 columns]" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cameras_df[\n", " (cameras_df['Product Name'].str.contains('Nikon'))\n", " | (cameras_df['Product Name'].str.contains('Canon'))\n", " | (cameras_df['Product Name'].str.contains('Kodak'))\n", "]" ] }, { "cell_type": "code", "execution_count": 33, "id": "hired-success", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
01
117KodakZM1
287KodakZM1
616KodakEasyShare
2151CanonPowerShot
2280CanonPowerShot
.........
12459CanonEOS
12522CanonEOS
12905KodakEasyShare
13594NikonD3100
13986NikonD3100
\n", "

77 rows × 2 columns

\n", "
" ], "text/plain": [ " 0 1\n", "117 Kodak ZM1\n", "287 Kodak ZM1\n", "616 Kodak EasyShare\n", "2151 Canon PowerShot\n", "2280 Canon PowerShot\n", "... ... ...\n", "12459 Canon EOS\n", "12522 Canon EOS\n", "12905 Kodak EasyShare\n", "13594 Nikon D3100\n", "13986 Nikon D3100\n", "\n", "[77 rows x 2 columns]" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pattern = '(Nikon|Canon|Kodak)\\s+(\\w*)'\n", "\n", "cameras_df['Product Name'].str.extract(pattern)" ] }, { "cell_type": "markdown", "id": "clean-dictionary", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "\n", "# Applying custom functions\n", "\n", "## Applying functions to columns" ] }, { "cell_type": "code", "execution_count": 34, "id": "practical-taxation", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [], "source": [ "def process_channel(channel):\n", " return 'Name: ' + channel.upper()" ] }, { "cell_type": "code", "execution_count": 35, "id": "naughty-causing", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "'Name: SHOPPE.COM'" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "process_channel('Shoppe.com')" ] }, { "cell_type": "code", "execution_count": 36, "id": "infrared-object", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "'Name: BULLSEYE'" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "process_channel('Bullseye')" ] }, { "cell_type": "code", "execution_count": 37, "id": "distinguished-robinson", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [], "source": [ "for channel in ledger_df['Channel']:\n", " process_channel(channel)" ] }, { "cell_type": "code", "execution_count": 38, "id": "interior-invite", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 Name: SHOPPE.COM\n", "1 Name: WALCART\n", "2 Name: BULLSEYE\n", "3 Name: BULLSEYE\n", "4 Name: BULLSEYE\n", " ... \n", "14049 Name: BULLSEYE\n", "14050 Name: BULLSEYE\n", "14051 Name: UNDERSTOCK.COM\n", "14052 Name: IBAY.COM\n", "14053 Name: UNDERSTOCK.COM\n", "Name: Channel, Length: 14054, dtype: object" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['Channel'].apply(process_channel)" ] }, { "cell_type": "code", "execution_count": 41, "id": "least-collins", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [], "source": [ "def process_product(product):\n", " if pd.isna(product):\n", " return 'EMPTY PRODUCT NAME'\n", " else:\n", " return 'Product: ' + product.upper()\n", " " ] }, { "cell_type": "code", "execution_count": 42, "id": "least-andrew", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 Product: CANNON WATER BOMB BALLOONS 100 PACK\n", "1 Product: LEGO NINJA TURTLES STEALTH SHELL IN P...\n", "2 EMPTY PRODUCT NAME\n", "3 Product: TRANSFORMERS AGE OF EXTINCTION GENERA...\n", "4 Product: TRANSFORMERS AGE OF EXTINCTION GENERA...\n", " ... \n", "14049 Product: AC ADAPTER/POWER SUPPLY&CORD FOR LENO...\n", "14050 Product: CISCO SYSTEMS GIGABIT VPN ROUTER (RV3...\n", "14051 Product: PHILIPS AJ3116M/37 DIGITAL TUNING CLO...\n", "14052 EMPTY PRODUCT NAME\n", "14053 Product: SIRIUS SATELLITE RADIO XADH2 HOME ACC...\n", "Name: Product Name, Length: 14054, dtype: object" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['Product Name'].apply(process_product)" ] }, { "cell_type": "markdown", "id": "introductory-romantic", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Overthinking: Functions without a name" ] }, { "cell_type": "code", "execution_count": 43, "id": "victorian-flooring", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 Name: SHOPPE.COM\n", "1 Name: WALCART\n", "2 Name: BULLSEYE\n", "3 Name: BULLSEYE\n", "4 Name: BULLSEYE\n", " ... \n", "14049 Name: BULLSEYE\n", "14050 Name: BULLSEYE\n", "14051 Name: UNDERSTOCK.COM\n", "14052 Name: IBAY.COM\n", "14053 Name: UNDERSTOCK.COM\n", "Name: Channel, Length: 14054, dtype: object" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df['Channel'].apply(lambda channel: 'Name: ' + channel.upper())" ] }, { "cell_type": "code", "execution_count": 44, "id": "productive-dancing", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "(channel)>" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# regular function definition\n", "def process_channel(channel):\n", " return 'Name: ' + channel.upper()\n", "\n", "# anonymous function definition\n", "lambda channel: 'Name: ' + channel.upper()" ] }, { "cell_type": "markdown", "id": "electronic-greensboro", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Applying functions to rows" ] }, { "cell_type": "code", "execution_count": 45, "id": "static-villa", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "InvoiceNo 1532\n", "Channel Shoppe.com\n", "Product Name Cannon Water Bomb Balloons 100 Pack\n", "ProductID T&G/CAN-97509\n", "Account Sales\n", "AccountNo 5004\n", "Date 2020-01-01 00:00:00\n", "Deadline 11/23/19\n", "Currency USD\n", "Unit Price 20.11\n", "Quantity 14\n", "Total 281.54\n", "Name: 0, dtype: object" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "first_row = ledger_df.iloc[0]\n", "\n", "first_row" ] }, { "cell_type": "code", "execution_count": 46, "id": "physical-container", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "281.54" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "first_row['Total']" ] }, { "cell_type": "code", "execution_count": 47, "id": "amateur-literacy", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "'Shoppe.com'" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "first_row['Channel']" ] }, { "cell_type": "code", "execution_count": 48, "id": "invalid-ideal", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [], "source": [ "def calculate_tax(row):\n", " if row['Channel'] == 'Shoppe.com':\n", " return row['Total'] * (16 / 100)\n", " elif row['Channel'] == 'iBay.com':\n", " return row['Total'] * (11 / 100)\n", " elif row['Channel'] == 'Understock.com':\n", " return row['Total'] * (9 / 100)\n", " else:\n", " return 0" ] }, { "cell_type": "code", "execution_count": 49, "id": "unexpected-honor", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "45.046400000000006" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "calculate_tax(first_row)" ] }, { "cell_type": "code", "execution_count": 50, "id": "painful-region", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "calculate_tax(ledger_df.iloc[10])" ] }, { "cell_type": "code", "execution_count": 51, "id": "functional-guess", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "3.355" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "calculate_tax(ledger_df.iloc[100])" ] }, { "cell_type": "code", "execution_count": 52, "id": "arctic-button", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 45.0464\n", "1 0.0000\n", "2 0.0000\n", "3 0.0000\n", "4 0.0000\n", " ... \n", "14049 0.0000\n", "14050 0.0000\n", "14051 0.3762\n", "14052 13.1450\n", "14053 5.9688\n", "Length: 14054, dtype: float64" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df.apply(calculate_tax, axis='columns')" ] }, { "cell_type": "markdown", "id": "partial-september", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Overthinking: Other function parameters" ] }, { "cell_type": "code", "execution_count": 53, "id": "blank-estate", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [], "source": [ "def calculate_tax(row, levels={}):\n", " channel = row['Channel']\n", " total = row['Total']\n", " \n", " tax = 0\n", " if channel in levels:\n", " tax = levels[channel] \n", "\n", " return total * tax" ] }, { "cell_type": "code", "execution_count": 54, "id": "loaded-lender", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 0.0\n", "1 0.0\n", "2 0.0\n", "3 0.0\n", "4 0.0\n", " ... \n", "14049 0.0\n", "14050 0.0\n", "14051 0.0\n", "14052 0.0\n", "14053 0.0\n", "Length: 14054, dtype: float64" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df.apply(calculate_tax, axis='columns')" ] }, { "cell_type": "code", "execution_count": 55, "id": "sapphire-sodium", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 45.0464\n", "1 0.0000\n", "2 0.0000\n", "3 0.0000\n", "4 0.0000\n", " ... \n", "14049 0.0000\n", "14050 0.0000\n", "14051 0.3762\n", "14052 13.1450\n", "14053 5.9688\n", "Length: 14054, dtype: float64" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df.apply(\n", " calculate_tax, \n", " levels={\n", " 'Shoppe.com': (16 / 100),\n", " 'iBay.com': (11 / 100),\n", " 'Understock.com': (9 / 100),\n", " }, \n", " axis='columns'\n", ")" ] }, { "cell_type": "code", "execution_count": 56, "id": "grave-speaker", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 45.0464\n", "1 0.2680\n", "2 3.5010\n", "3 4.8456\n", "4 4.8456\n", " ... \n", "14049 13.7856\n", "14050 2.0034\n", "14051 0.3762\n", "14052 13.1450\n", "14053 5.9688\n", "Length: 14054, dtype: float64" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df.apply(\n", " calculate_tax, \n", " levels={\n", " 'Shoppe.com': (16 / 100),\n", " 'iBay.com': (11 / 100),\n", " 'Understock.com': (9 / 100),\n", " 'Bullseye': (6 / 100),\n", " 'Walcart': (4 / 100),\n", " }, \n", " axis='columns'\n", ")" ] } ], "metadata": { "celltoolbar": "Slideshow", "jupytext": { "formats": "ipynb" }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" }, "rise": { "auto_select": "first", "autolaunch": false, "enable_chalkboard": true, "start_slideshow_at": "selected", "theme": "black" } }, "nbformat": 4, "nbformat_minor": 5 }