{ "cells": [ { "cell_type": "markdown", "id": "36fae451", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "# Groups and pivot tables with Pandas\n", "\n", "Feng Li\n", "\n", "School of Statistics and Mathematics\n", "\n", "Central University of Finance and Economics\n", "\n", "[feng.li@cufe.edu.cn](mailto:feng.li@cufe.edu.cn)\n", "\n", "[https://feng.li/python](https://feng.li/python)" ] }, { "cell_type": "code", "execution_count": 4, "id": "24c38d8a", "metadata": { "slideshow": { "slide_type": "slide" } }, "outputs": [], "source": [ "import pandas as pd\n", "ledger_df_info = pd.ExcelFile('data/Q1Sales.xlsx')\n", "ledger_df_info.sheet_names\n", "ledger_df = pd.read_excel('data/Q1Sales.xlsx') # read the first sheet" ] }, { "cell_type": "code", "execution_count": 5, "id": "monthly-facial", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [], "source": [ "columns = ['ProductID', 'Product Name', 'Channel', 'Unit Price', 'Quantity', 'Total']\n", "sample_df = ledger_df[columns].tail(10)" ] }, { "cell_type": "code", "execution_count": 6, "id": "affected-briefs", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ProductIDProduct NameChannelUnit PriceQuantityTotal
14044MI/SEN-01085Sennheiser EW 112P G3-B omni-directional EW sy...Understock.com18.586111.48
14045I&S/WIH-08645Wiha 26598 Nut Driver Set, Metric In Canvas Po...Shoppe.com16.56621026.72
14046H&K/KIK-91404Kikkerland Magnetic Decision MakeriBay.com3.641554.60
14047T&G/YU--76445Yu-Gi-Oh! - Light-Imprisoning Mirror (GLAS-EN0...Understock.com4.50418.00
14048T&G/LAU-88048Lauri Toddler ToteiBay.com14.46114.46
14049E/AC-63975AC Adapter/Power Supply&Cord for Lenovo 3000 G...Bullseye28.728229.76
14050E/CIS-74992Cisco Systems Gigabit VPN Router (RV320K9NA)Bullseye33.39133.39
14051E/PHI-08100Philips AJ3116M/37 Digital Tuning Clock Radio ...Understock.com4.1814.18
14052E/POL-61164NaNiBay.com4.7825119.50
14053E/SIR-83381Sirius Satellite Radio XADH2 Home Access Kit f...Understock.com33.16266.32
\n", "
" ], "text/plain": [ " ProductID Product Name \\\n", "14044 MI/SEN-01085 Sennheiser EW 112P G3-B omni-directional EW sy... \n", "14045 I&S/WIH-08645 Wiha 26598 Nut Driver Set, Metric In Canvas Po... \n", "14046 H&K/KIK-91404 Kikkerland Magnetic Decision Maker \n", "14047 T&G/YU--76445 Yu-Gi-Oh! - Light-Imprisoning Mirror (GLAS-EN0... \n", "14048 T&G/LAU-88048 Lauri Toddler Tote \n", "14049 E/AC-63975 AC Adapter/Power Supply&Cord for Lenovo 3000 G... \n", "14050 E/CIS-74992 Cisco Systems Gigabit VPN Router (RV320K9NA) \n", "14051 E/PHI-08100 Philips AJ3116M/37 Digital Tuning Clock Radio ... \n", "14052 E/POL-61164 NaN \n", "14053 E/SIR-83381 Sirius Satellite Radio XADH2 Home Access Kit f... \n", "\n", " Channel Unit Price Quantity Total \n", "14044 Understock.com 18.58 6 111.48 \n", "14045 Shoppe.com 16.56 62 1026.72 \n", "14046 iBay.com 3.64 15 54.60 \n", "14047 Understock.com 4.50 4 18.00 \n", "14048 iBay.com 14.46 1 14.46 \n", "14049 Bullseye 28.72 8 229.76 \n", "14050 Bullseye 33.39 1 33.39 \n", "14051 Understock.com 4.18 1 4.18 \n", "14052 iBay.com 4.78 25 119.50 \n", "14053 Understock.com 33.16 2 66.32 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_df" ] }, { "cell_type": "code", "execution_count": 7, "id": "lined-grounds", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Total
Channel
Bullseye263.15
Shoppe.com1026.72
Understock.com199.98
iBay.com188.56
\n", "
" ], "text/plain": [ " Total\n", "Channel \n", "Bullseye 263.15\n", "Shoppe.com 1026.72\n", "Understock.com 199.98\n", "iBay.com 188.56" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_df.groupby('Channel').agg({'Total': 'sum'})" ] }, { "cell_type": "markdown", "id": "portuguese-complaint", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### The pandas group object" ] }, { "cell_type": "code", "execution_count": 8, "id": "committed-copying", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [], "source": [ "groups = sample_df.groupby('Channel')" ] }, { "cell_type": "code", "execution_count": 9, "id": "broad-overhead", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "pandas.core.groupby.generic.DataFrameGroupBy" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(groups)" ] }, { "cell_type": "code", "execution_count": 10, "id": "secondary-geology", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "{'Bullseye': [14049, 14050], 'Shoppe.com': [14045], 'Understock.com': [14044, 14047, 14051, 14053], 'iBay.com': [14046, 14048, 14052]}" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "groups.groups" ] }, { "cell_type": "code", "execution_count": 11, "id": "sweet-settlement", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ProductIDProduct NameChannelUnit PriceQuantityTotal
14049E/AC-63975AC Adapter/Power Supply&Cord for Lenovo 3000 G...Bullseye28.728229.76
14050E/CIS-74992Cisco Systems Gigabit VPN Router (RV320K9NA)Bullseye33.39133.39
\n", "
" ], "text/plain": [ " ProductID Product Name \\\n", "14049 E/AC-63975 AC Adapter/Power Supply&Cord for Lenovo 3000 G... \n", "14050 E/CIS-74992 Cisco Systems Gigabit VPN Router (RV320K9NA) \n", "\n", " Channel Unit Price Quantity Total \n", "14049 Bullseye 28.72 8 229.76 \n", "14050 Bullseye 33.39 1 33.39 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "groups.get_group('Bullseye')" ] }, { "cell_type": "code", "execution_count": 12, "id": "ruled-radical", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "14049 229.76\n", "14050 33.39\n", "Name: Total, dtype: float64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "groups['Total'].get_group('Bullseye')" ] }, { "cell_type": "code", "execution_count": 13, "id": "latest-investing", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
QuantityTotal
140498229.76
14050133.39
\n", "
" ], "text/plain": [ " Quantity Total\n", "14049 8 229.76\n", "14050 1 33.39" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "groups[['Quantity', 'Total']].get_group('Bullseye')" ] }, { "cell_type": "code", "execution_count": 14, "id": "celtic-england", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "Channel\n", "Bullseye 263.15\n", "Shoppe.com 1026.72\n", "Understock.com 199.98\n", "iBay.com 188.56\n", "Name: Total, dtype: float64" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "groups['Total'].sum()" ] }, { "cell_type": "code", "execution_count": 15, "id": "federal-candy", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
QuantityTotal
Channel
Bullseye9263.15
Shoppe.com621026.72
Understock.com13199.98
iBay.com41188.56
\n", "
" ], "text/plain": [ " Quantity Total\n", "Channel \n", "Bullseye 9 263.15\n", "Shoppe.com 62 1026.72\n", "Understock.com 13 199.98\n", "iBay.com 41 188.56" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "groups[['Quantity', 'Total']].sum()" ] }, { "cell_type": "code", "execution_count": 16, "id": "sacred-removal", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "Channel\n", "Bullseye 263.15\n", "Shoppe.com 1026.72\n", "Understock.com 199.98\n", "iBay.com 188.56\n", "Name: Total, dtype: float64" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_df.groupby('Channel')['Total'].sum()" ] }, { "cell_type": "markdown", "id": "south-retreat", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Aggregating group functions" ] }, { "cell_type": "code", "execution_count": 17, "id": "reduced-divide", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Total
Channel
Bullseye263.15
Shoppe.com1026.72
Understock.com199.98
iBay.com188.56
\n", "
" ], "text/plain": [ " Total\n", "Channel \n", "Bullseye 263.15\n", "Shoppe.com 1026.72\n", "Understock.com 199.98\n", "iBay.com 188.56" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# this works as well\n", "# sample_df.groupby('Channel')['Total'].agg('sum')\n", "sample_df.groupby('Channel').agg({'Total': 'sum'})" ] }, { "cell_type": "code", "execution_count": 18, "id": "tribal-rental", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TotalQuantity
summeansummaxmin
Channel
Bullseye263.15131.575000981
Shoppe.com1026.721026.720000626262
Understock.com199.9849.9950001361
iBay.com188.5662.85333341251
\n", "
" ], "text/plain": [ " Total Quantity \n", " sum mean sum max min\n", "Channel \n", "Bullseye 263.15 131.575000 9 8 1\n", "Shoppe.com 1026.72 1026.720000 62 62 62\n", "Understock.com 199.98 49.995000 13 6 1\n", "iBay.com 188.56 62.853333 41 25 1" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(sample_df\n", " .groupby('Channel').agg({\n", " 'Total': ['sum', 'mean'],\n", " 'Quantity': ['sum', 'max', 'min']\n", " })\n", ") " ] }, { "cell_type": "code", "execution_count": 19, "id": "mechanical-savage", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "Channel\n", "Bullseye 8\n", "Shoppe.com 62\n", "Understock.com 6\n", "iBay.com 25\n", "Name: (Quantity, max), dtype: int64" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# assigns the output above\n", "# to another variable \n", "aggregate_df = (\n", " sample_df\n", " .groupby('Channel').agg({\n", " 'Total': ['sum', 'mean'],\n", " 'Quantity': ['sum', 'max', 'min']\n", " })\n", ") \n", "\n", "# selects the max column under Quantity\n", "aggregate_df.loc[:, ('Quantity', 'max')]" ] }, { "cell_type": "code", "execution_count": 20, "id": "sensitive-tutorial", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
summaxmin
Channel
Bullseye981
Shoppe.com626262
Understock.com1361
iBay.com41251
\n", "
" ], "text/plain": [ " sum max min\n", "Channel \n", "Bullseye 9 8 1\n", "Shoppe.com 62 62 62\n", "Understock.com 13 6 1\n", "iBay.com 41 25 1" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "aggregate_df.loc[:, 'Quantity']" ] }, { "cell_type": "markdown", "id": "endless-assistant", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "#### Custom aggregating functions" ] }, { "cell_type": "code", "execution_count": 21, "id": "central-selection", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Total
minmaxtotal_diff
Channel
Bullseye33.39229.76196.37
Shoppe.com1026.721026.720.00
Understock.com4.18111.48107.30
iBay.com14.46119.50105.04
\n", "
" ], "text/plain": [ " Total \n", " min max total_diff\n", "Channel \n", "Bullseye 33.39 229.76 196.37\n", "Shoppe.com 1026.72 1026.72 0.00\n", "Understock.com 4.18 111.48 107.30\n", "iBay.com 14.46 119.50 105.04" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def total_diff(column):\n", " return column.max() - column.min() \n", "\n", "sample_df.groupby('Channel').agg({'Total': ['min', 'max', total_diff]})" ] }, { "cell_type": "markdown", "id": "sealed-airplane", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "### Overthinking: Other group functions" ] }, { "cell_type": "code", "execution_count": 23, "id": "dated-equality", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ProductIDProduct NameChannelUnit PriceQuantityTotal
Channel
Bullseye14049E/AC-63975AC Adapter/Power Supply&Cord for Lenovo 3000 G...Bullseye28.728229.76
14050E/CIS-74992Cisco Systems Gigabit VPN Router (RV320K9NA)Bullseye33.39133.39
Shoppe.com14045I&S/WIH-08645Wiha 26598 Nut Driver Set, Metric In Canvas Po...Shoppe.com16.56621026.72
\n", "
" ], "text/plain": [ " ProductID \\\n", "Channel \n", "Bullseye 14049 E/AC-63975 \n", " 14050 E/CIS-74992 \n", "Shoppe.com 14045 I&S/WIH-08645 \n", "\n", " Product Name \\\n", "Channel \n", "Bullseye 14049 AC Adapter/Power Supply&Cord for Lenovo 3000 G... \n", " 14050 Cisco Systems Gigabit VPN Router (RV320K9NA) \n", "Shoppe.com 14045 Wiha 26598 Nut Driver Set, Metric In Canvas Po... \n", "\n", " Channel Unit Price Quantity Total \n", "Channel \n", "Bullseye 14049 Bullseye 28.72 8 229.76 \n", " 14050 Bullseye 33.39 1 33.39 \n", "Shoppe.com 14045 Shoppe.com 16.56 62 1026.72 " ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def filter_group(group_df):\n", " return group_df if group_df['Total'].sum() > 200 else None\n", "\n", "sample_df.groupby('Channel').apply(filter_group)" ] }, { "cell_type": "code", "execution_count": 24, "id": "violent-jewel", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ProductIDProduct NameChannelUnit PriceQuantityTotal
0E/AC-63975AC Adapter/Power Supply&Cord for Lenovo 3000 G...Bullseye28.728229.76
1E/CIS-74992Cisco Systems Gigabit VPN Router (RV320K9NA)Bullseye33.39133.39
2I&S/WIH-08645Wiha 26598 Nut Driver Set, Metric In Canvas Po...Shoppe.com16.56621026.72
\n", "
" ], "text/plain": [ " ProductID Product Name \\\n", "0 E/AC-63975 AC Adapter/Power Supply&Cord for Lenovo 3000 G... \n", "1 E/CIS-74992 Cisco Systems Gigabit VPN Router (RV320K9NA) \n", "2 I&S/WIH-08645 Wiha 26598 Nut Driver Set, Metric In Canvas Po... \n", "\n", " Channel Unit Price Quantity Total \n", "0 Bullseye 28.72 8 229.76 \n", "1 Bullseye 33.39 1 33.39 \n", "2 Shoppe.com 16.56 62 1026.72 " ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_df.groupby('Channel').apply(filter_group).reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 25, "id": "spectacular-costs", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ProductIDProduct NameChannelUnit PriceQuantityTotal% Group Total
14044MI/SEN-01085Sennheiser EW 112P G3-B omni-directional EW sy...Understock.com18.586111.4855.75
14045I&S/WIH-08645Wiha 26598 Nut Driver Set, Metric In Canvas Po...Shoppe.com16.56621026.72100.00
14046H&K/KIK-91404Kikkerland Magnetic Decision MakeriBay.com3.641554.6028.96
14047T&G/YU--76445Yu-Gi-Oh! - Light-Imprisoning Mirror (GLAS-EN0...Understock.com4.50418.009.00
14048T&G/LAU-88048Lauri Toddler ToteiBay.com14.46114.467.67
14049E/AC-63975AC Adapter/Power Supply&Cord for Lenovo 3000 G...Bullseye28.728229.7687.31
14050E/CIS-74992Cisco Systems Gigabit VPN Router (RV320K9NA)Bullseye33.39133.3912.69
14051E/PHI-08100Philips AJ3116M/37 Digital Tuning Clock Radio ...Understock.com4.1814.182.09
14052E/POL-61164NaNiBay.com4.7825119.5063.38
14053E/SIR-83381Sirius Satellite Radio XADH2 Home Access Kit f...Understock.com33.16266.3233.16
\n", "
" ], "text/plain": [ " ProductID Product Name \\\n", "14044 MI/SEN-01085 Sennheiser EW 112P G3-B omni-directional EW sy... \n", "14045 I&S/WIH-08645 Wiha 26598 Nut Driver Set, Metric In Canvas Po... \n", "14046 H&K/KIK-91404 Kikkerland Magnetic Decision Maker \n", "14047 T&G/YU--76445 Yu-Gi-Oh! - Light-Imprisoning Mirror (GLAS-EN0... \n", "14048 T&G/LAU-88048 Lauri Toddler Tote \n", "14049 E/AC-63975 AC Adapter/Power Supply&Cord for Lenovo 3000 G... \n", "14050 E/CIS-74992 Cisco Systems Gigabit VPN Router (RV320K9NA) \n", "14051 E/PHI-08100 Philips AJ3116M/37 Digital Tuning Clock Radio ... \n", "14052 E/POL-61164 NaN \n", "14053 E/SIR-83381 Sirius Satellite Radio XADH2 Home Access Kit f... \n", "\n", " Channel Unit Price Quantity Total % Group Total \n", "14044 Understock.com 18.58 6 111.48 55.75 \n", "14045 Shoppe.com 16.56 62 1026.72 100.00 \n", "14046 iBay.com 3.64 15 54.60 28.96 \n", "14047 Understock.com 4.50 4 18.00 9.00 \n", "14048 iBay.com 14.46 1 14.46 7.67 \n", "14049 Bullseye 28.72 8 229.76 87.31 \n", "14050 Bullseye 33.39 1 33.39 12.69 \n", "14051 Understock.com 4.18 1 4.18 2.09 \n", "14052 iBay.com 4.78 25 119.50 63.38 \n", "14053 Understock.com 33.16 2 66.32 33.16 " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def percent_group_total(group_df): \n", " group_df['% Group Total'] = group_df['Total'] / group_df['Total'].sum() * 100\n", " group_df['% Group Total'] = group_df['% Group Total'].round(2)\n", " \n", " return group_df\n", "\n", "sample_df.groupby('Channel').apply(percent_group_total)" ] }, { "cell_type": "markdown", "id": "detected-beauty", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Stacking and unstacking" ] }, { "cell_type": "code", "execution_count": 26, "id": "relative-rochester", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ProductIDChannelTotal
0T&G/CAN-97509Shoppe.com281.54
1T&G/LEG-37777Walcart6.70
2T&G/PET-14209Bullseye58.35
3T&G/TRA-20170Bullseye80.76
4T&G/TRA-20170Bullseye80.76
\n", "
" ], "text/plain": [ " ProductID Channel Total\n", "0 T&G/CAN-97509 Shoppe.com 281.54\n", "1 T&G/LEG-37777 Walcart 6.70\n", "2 T&G/PET-14209 Bullseye 58.35\n", "3 T&G/TRA-20170 Bullseye 80.76\n", "4 T&G/TRA-20170 Bullseye 80.76" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "columns = ['ProductID', 'Channel', 'Total']\n", "sample_df = ledger_df[columns].head()\n", "\n", "sample_df" ] }, { "cell_type": "code", "execution_count": 27, "id": "toxic-grenada", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/plain": [ "0 ProductID T&G/CAN-97509\n", " Channel Shoppe.com\n", " Total 281.54\n", "1 ProductID T&G/LEG-37777\n", " Channel Walcart\n", " Total 6.7\n", "2 ProductID T&G/PET-14209\n", " Channel Bullseye\n", " Total 58.35\n", "3 ProductID T&G/TRA-20170\n", " Channel Bullseye\n", " Total 80.76\n", "4 ProductID T&G/TRA-20170\n", " Channel Bullseye\n", " Total 80.76\n", "dtype: object" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_df.stack()" ] }, { "cell_type": "code", "execution_count": 28, "id": "rocky-kuwait", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ProductIDChannelTotal
0T&G/CAN-97509Shoppe.com281.54
1T&G/LEG-37777Walcart6.7
2T&G/PET-14209Bullseye58.35
3T&G/TRA-20170Bullseye80.76
4T&G/TRA-20170Bullseye80.76
\n", "
" ], "text/plain": [ " ProductID Channel Total\n", "0 T&G/CAN-97509 Shoppe.com 281.54\n", "1 T&G/LEG-37777 Walcart 6.7\n", "2 T&G/PET-14209 Bullseye 58.35\n", "3 T&G/TRA-20170 Bullseye 80.76\n", "4 T&G/TRA-20170 Bullseye 80.76" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stacked_sample = sample_df.stack() \n", "\n", "stacked_sample.unstack()" ] }, { "cell_type": "markdown", "id": "center-ultimate", "metadata": { "slideshow": { "slide_type": "slide" } }, "source": [ "## Pivot tables" ] }, { "cell_type": "code", "execution_count": 31, "id": "framed-utilization", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
InvoiceNoChannelProduct NameProductIDAccountAccountNoDateDeadlineCurrencyUnit PriceQuantityTotalDeadline Quarter
01532Shoppe.comCannon Water Bomb Balloons 100 PackT&G/CAN-97509Sales50042020-01-012019-11-23USD20.1114281.542019Q4
11533WalcartLEGO Ninja Turtles Stealth Shell in Pursuit 79102T&G/LEG-37777Sales50042020-01-012020-06-15USD6.7016.702020Q2
21534BullseyeNaNT&G/PET-14209Sales50042020-01-012020-05-07USD11.67558.352020Q2
31535BullseyeTransformers Age of Extinction Generations Del...T&G/TRA-20170Sales50042020-01-012019-12-22USD13.46680.762019Q4
41535BullseyeTransformers Age of Extinction Generations Del...T&G/TRA-20170Sales50042020-01-012019-12-22USD13.46680.762019Q4
..........................................
1404915581BullseyeAC Adapter/Power Supply&Cord for Lenovo 3000 G...E/AC-63975Sales50042020-01-312020-02-23USD28.728229.762020Q1
1405015582BullseyeCisco Systems Gigabit VPN Router (RV320K9NA)E/CIS-74992Sales50042020-01-312020-01-21USD33.39133.392020Q1
1405115583Understock.comPhilips AJ3116M/37 Digital Tuning Clock Radio ...E/PHI-08100Sales50042020-01-312020-03-22USD4.1814.182020Q1
1405215584iBay.comNaNE/POL-61164Sales50042020-01-312020-06-25USD4.7825119.502020Q2
1405315585Understock.comSirius Satellite Radio XADH2 Home Access Kit f...E/SIR-83381Sales50042020-01-312020-02-01USD33.16266.322020Q1
\n", "

14054 rows × 13 columns

\n", "
" ], "text/plain": [ " InvoiceNo Channel \\\n", "0 1532 Shoppe.com \n", "1 1533 Walcart \n", "2 1534 Bullseye \n", "3 1535 Bullseye \n", "4 1535 Bullseye \n", "... ... ... \n", "14049 15581 Bullseye \n", "14050 15582 Bullseye \n", "14051 15583 Understock.com \n", "14052 15584 iBay.com \n", "14053 15585 Understock.com \n", "\n", " Product Name ProductID \\\n", "0 Cannon Water Bomb Balloons 100 Pack T&G/CAN-97509 \n", "1 LEGO Ninja Turtles Stealth Shell in Pursuit 79102 T&G/LEG-37777 \n", "2 NaN T&G/PET-14209 \n", "3 Transformers Age of Extinction Generations Del... T&G/TRA-20170 \n", "4 Transformers Age of Extinction Generations Del... T&G/TRA-20170 \n", "... ... ... \n", "14049 AC Adapter/Power Supply&Cord for Lenovo 3000 G... E/AC-63975 \n", "14050 Cisco Systems Gigabit VPN Router (RV320K9NA) E/CIS-74992 \n", "14051 Philips AJ3116M/37 Digital Tuning Clock Radio ... E/PHI-08100 \n", "14052 NaN E/POL-61164 \n", "14053 Sirius Satellite Radio XADH2 Home Access Kit f... E/SIR-83381 \n", "\n", " Account AccountNo Date Deadline Currency Unit Price Quantity \\\n", "0 Sales 5004 2020-01-01 2019-11-23 USD 20.11 14 \n", "1 Sales 5004 2020-01-01 2020-06-15 USD 6.70 1 \n", "2 Sales 5004 2020-01-01 2020-05-07 USD 11.67 5 \n", "3 Sales 5004 2020-01-01 2019-12-22 USD 13.46 6 \n", "4 Sales 5004 2020-01-01 2019-12-22 USD 13.46 6 \n", "... ... ... ... ... ... ... ... \n", "14049 Sales 5004 2020-01-31 2020-02-23 USD 28.72 8 \n", "14050 Sales 5004 2020-01-31 2020-01-21 USD 33.39 1 \n", "14051 Sales 5004 2020-01-31 2020-03-22 USD 4.18 1 \n", "14052 Sales 5004 2020-01-31 2020-06-25 USD 4.78 25 \n", "14053 Sales 5004 2020-01-31 2020-02-01 USD 33.16 2 \n", "\n", " Total Deadline Quarter \n", "0 281.54 2019Q4 \n", "1 6.70 2020Q2 \n", "2 58.35 2020Q2 \n", "3 80.76 2019Q4 \n", "4 80.76 2019Q4 \n", "... ... ... \n", "14049 229.76 2020Q1 \n", "14050 33.39 2020Q1 \n", "14051 4.18 2020Q1 \n", "14052 119.50 2020Q2 \n", "14053 66.32 2020Q1 \n", "\n", "[14054 rows x 13 columns]" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df = pd.read_excel('data/Q1Sales.xlsx')\n", "\n", "ledger_df['Deadline'] = pd.to_datetime(ledger_df['Deadline'])\n", "ledger_df['Deadline Quarter'] = ledger_df['Deadline'].dt.to_period(freq='Q-DEC')\n", "\n", "ledger_df" ] }, { "cell_type": "code", "execution_count": 32, "id": "marine-firewall", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Quantity
ChannelDeadline Quarter
Bullseye2019Q42212
2020Q13413
2020Q23034
2020Q3377
Shoppe.com2019Q411424
2020Q19969
2020Q211212
2020Q31757
Understock.com2019Q418518
2020Q121614
2020Q223222
2020Q33484
Walcart2019Q43816
2020Q15174
2020Q23921
2020Q3411
iBay.com2019Q49711
2020Q111525
2020Q211995
2020Q31317
\n", "
" ], "text/plain": [ " Quantity\n", "Channel Deadline Quarter \n", "Bullseye 2019Q4 2212\n", " 2020Q1 3413\n", " 2020Q2 3034\n", " 2020Q3 377\n", "Shoppe.com 2019Q4 11424\n", " 2020Q1 9969\n", " 2020Q2 11212\n", " 2020Q3 1757\n", "Understock.com 2019Q4 18518\n", " 2020Q1 21614\n", " 2020Q2 23222\n", " 2020Q3 3484\n", "Walcart 2019Q4 3816\n", " 2020Q1 5174\n", " 2020Q2 3921\n", " 2020Q3 411\n", "iBay.com 2019Q4 9711\n", " 2020Q1 11525\n", " 2020Q2 11995\n", " 2020Q3 1317" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df.groupby(['Channel', 'Deadline Quarter']).agg({'Quantity': 'sum'})" ] }, { "cell_type": "code", "execution_count": 33, "id": "fitting-leone", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Quantity
Deadline Quarter2019Q42020Q12020Q22020Q3
Channel
Bullseye221234133034377
Shoppe.com114249969112121757
Understock.com1851821614232223484
Walcart381651743921411
iBay.com971111525119951317
\n", "
" ], "text/plain": [ " Quantity \n", "Deadline Quarter 2019Q4 2020Q1 2020Q2 2020Q3\n", "Channel \n", "Bullseye 2212 3413 3034 377\n", "Shoppe.com 11424 9969 11212 1757\n", "Understock.com 18518 21614 23222 3484\n", "Walcart 3816 5174 3921 411\n", "iBay.com 9711 11525 11995 1317" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ledger_df.groupby(['Channel', 'Deadline Quarter']).agg({'Quantity': 'sum'}).unstack()" ] }, { "cell_type": "code", "execution_count": 34, "id": "dominican-seafood", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Deadline Quarter2019Q42020Q12020Q22020Q3
Channel
Bullseye221234133034377
Shoppe.com114249969112121757
Understock.com1851821614232223484
Walcart381651743921411
iBay.com971111525119951317
\n", "
" ], "text/plain": [ "Deadline Quarter 2019Q4 2020Q1 2020Q2 2020Q3\n", "Channel \n", "Bullseye 2212 3413 3034 377\n", "Shoppe.com 11424 9969 11212 1757\n", "Understock.com 18518 21614 23222 3484\n", "Walcart 3816 5174 3921 411\n", "iBay.com 9711 11525 11995 1317" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.pivot_table(ledger_df, \n", " index='Channel', \n", " columns='Deadline Quarter', \n", " values='Quantity',\n", " aggfunc='sum')" ] }, { "cell_type": "code", "execution_count": 35, "id": "bacterial-powell", "metadata": { "slideshow": { "slide_type": "slide" }, "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Deadline Quarter2019Q42020Q12020Q22020Q3TOTAL
Channel
Bullseye2212341330343779036
Shoppe.com11424996911212175734362
Understock.com185182161423222348466838
Walcart38165174392141113322
iBay.com97111152511995131734548
TOTAL4568151695533847346158106
\n", "
" ], "text/plain": [ "Deadline Quarter 2019Q4 2020Q1 2020Q2 2020Q3 TOTAL\n", "Channel \n", "Bullseye 2212 3413 3034 377 9036\n", "Shoppe.com 11424 9969 11212 1757 34362\n", "Understock.com 18518 21614 23222 3484 66838\n", "Walcart 3816 5174 3921 411 13322\n", "iBay.com 9711 11525 11995 1317 34548\n", "TOTAL 45681 51695 53384 7346 158106" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.pivot_table(ledger_df, \n", " index='Channel', \n", " columns='Deadline Quarter', \n", " values='Quantity', \n", " aggfunc='sum',\n", " margins=True,\n", " margins_name='TOTAL')" ] } ], "metadata": { "celltoolbar": "Slideshow", "jupytext": { "formats": "ipynb" }, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" }, "rise": { "auto_select": "first", "autolaunch": false, "enable_chalkboard": true, "start_slideshow_at": "selected", "theme": "black" } }, "nbformat": 4, "nbformat_minor": 5 }