Feng Li
School of Statistics and Mathematics
Central University of Finance and Economics
import pandas as pd
ledger_df_info = pd.ExcelFile('data/Q1Sales.xlsx')
ledger_df_info.sheet_names
ledger_df = pd.read_excel('data/Q1Sales.xlsx') # read the first sheet
columns = ['ProductID', 'Product Name', 'Channel', 'Unit Price', 'Quantity', 'Total']
sample_df = ledger_df[columns].tail(10)
sample_df
ProductID | Product Name | Channel | Unit Price | Quantity | Total | |
---|---|---|---|---|---|---|
14044 | MI/SEN-01085 | Sennheiser EW 112P G3-B omni-directional EW sy... | Understock.com | 18.58 | 6 | 111.48 |
14045 | I&S/WIH-08645 | Wiha 26598 Nut Driver Set, Metric In Canvas Po... | Shoppe.com | 16.56 | 62 | 1026.72 |
14046 | H&K/KIK-91404 | Kikkerland Magnetic Decision Maker | iBay.com | 3.64 | 15 | 54.60 |
14047 | T&G/YU--76445 | Yu-Gi-Oh! - Light-Imprisoning Mirror (GLAS-EN0... | Understock.com | 4.50 | 4 | 18.00 |
14048 | T&G/LAU-88048 | Lauri Toddler Tote | iBay.com | 14.46 | 1 | 14.46 |
14049 | E/AC-63975 | AC Adapter/Power Supply&Cord for Lenovo 3000 G... | Bullseye | 28.72 | 8 | 229.76 |
14050 | E/CIS-74992 | Cisco Systems Gigabit VPN Router (RV320K9NA) | Bullseye | 33.39 | 1 | 33.39 |
14051 | E/PHI-08100 | Philips AJ3116M/37 Digital Tuning Clock Radio ... | Understock.com | 4.18 | 1 | 4.18 |
14052 | E/POL-61164 | NaN | iBay.com | 4.78 | 25 | 119.50 |
14053 | E/SIR-83381 | Sirius Satellite Radio XADH2 Home Access Kit f... | Understock.com | 33.16 | 2 | 66.32 |
sample_df.groupby('Channel').agg({'Total': 'sum'})
Total | |
---|---|
Channel | |
Bullseye | 263.15 |
Shoppe.com | 1026.72 |
Understock.com | 199.98 |
iBay.com | 188.56 |
groups = sample_df.groupby('Channel')
type(groups)
pandas.core.groupby.generic.DataFrameGroupBy
groups.groups
{'Bullseye': [14049, 14050], 'Shoppe.com': [14045], 'Understock.com': [14044, 14047, 14051, 14053], 'iBay.com': [14046, 14048, 14052]}
groups.get_group('Bullseye')
ProductID | Product Name | Channel | Unit Price | Quantity | Total | |
---|---|---|---|---|---|---|
14049 | E/AC-63975 | AC Adapter/Power Supply&Cord for Lenovo 3000 G... | Bullseye | 28.72 | 8 | 229.76 |
14050 | E/CIS-74992 | Cisco Systems Gigabit VPN Router (RV320K9NA) | Bullseye | 33.39 | 1 | 33.39 |
groups['Total'].get_group('Bullseye')
14049 229.76 14050 33.39 Name: Total, dtype: float64
groups[['Quantity', 'Total']].get_group('Bullseye')
Quantity | Total | |
---|---|---|
14049 | 8 | 229.76 |
14050 | 1 | 33.39 |
groups['Total'].sum()
Channel Bullseye 263.15 Shoppe.com 1026.72 Understock.com 199.98 iBay.com 188.56 Name: Total, dtype: float64
groups[['Quantity', 'Total']].sum()
Quantity | Total | |
---|---|---|
Channel | ||
Bullseye | 9 | 263.15 |
Shoppe.com | 62 | 1026.72 |
Understock.com | 13 | 199.98 |
iBay.com | 41 | 188.56 |
sample_df.groupby('Channel')['Total'].sum()
Channel Bullseye 263.15 Shoppe.com 1026.72 Understock.com 199.98 iBay.com 188.56 Name: Total, dtype: float64
# this works as well
# sample_df.groupby('Channel')['Total'].agg('sum')
sample_df.groupby('Channel').agg({'Total': 'sum'})
Total | |
---|---|
Channel | |
Bullseye | 263.15 |
Shoppe.com | 1026.72 |
Understock.com | 199.98 |
iBay.com | 188.56 |
(sample_df
.groupby('Channel').agg({
'Total': ['sum', 'mean'],
'Quantity': ['sum', 'max', 'min']
})
)
Total | Quantity | ||||
---|---|---|---|---|---|
sum | mean | sum | max | min | |
Channel | |||||
Bullseye | 263.15 | 131.575000 | 9 | 8 | 1 |
Shoppe.com | 1026.72 | 1026.720000 | 62 | 62 | 62 |
Understock.com | 199.98 | 49.995000 | 13 | 6 | 1 |
iBay.com | 188.56 | 62.853333 | 41 | 25 | 1 |
# assigns the output above
# to another variable
aggregate_df = (
sample_df
.groupby('Channel').agg({
'Total': ['sum', 'mean'],
'Quantity': ['sum', 'max', 'min']
})
)
# selects the max column under Quantity
aggregate_df.loc[:, ('Quantity', 'max')]
Channel Bullseye 8 Shoppe.com 62 Understock.com 6 iBay.com 25 Name: (Quantity, max), dtype: int64
aggregate_df.loc[:, 'Quantity']
sum | max | min | |
---|---|---|---|
Channel | |||
Bullseye | 9 | 8 | 1 |
Shoppe.com | 62 | 62 | 62 |
Understock.com | 13 | 6 | 1 |
iBay.com | 41 | 25 | 1 |
def total_diff(column):
return column.max() - column.min()
sample_df.groupby('Channel').agg({'Total': ['min', 'max', total_diff]})
Total | |||
---|---|---|---|
min | max | total_diff | |
Channel | |||
Bullseye | 33.39 | 229.76 | 196.37 |
Shoppe.com | 1026.72 | 1026.72 | 0.00 |
Understock.com | 4.18 | 111.48 | 107.30 |
iBay.com | 14.46 | 119.50 | 105.04 |
def filter_group(group_df):
return group_df if group_df['Total'].sum() > 200 else None
sample_df.groupby('Channel').apply(filter_group)
ProductID | Product Name | Channel | Unit Price | Quantity | Total | ||
---|---|---|---|---|---|---|---|
Channel | |||||||
Bullseye | 14049 | E/AC-63975 | AC Adapter/Power Supply&Cord for Lenovo 3000 G... | Bullseye | 28.72 | 8 | 229.76 |
14050 | E/CIS-74992 | Cisco Systems Gigabit VPN Router (RV320K9NA) | Bullseye | 33.39 | 1 | 33.39 | |
Shoppe.com | 14045 | I&S/WIH-08645 | Wiha 26598 Nut Driver Set, Metric In Canvas Po... | Shoppe.com | 16.56 | 62 | 1026.72 |
sample_df.groupby('Channel').apply(filter_group).reset_index(drop=True)
ProductID | Product Name | Channel | Unit Price | Quantity | Total | |
---|---|---|---|---|---|---|
0 | E/AC-63975 | AC Adapter/Power Supply&Cord for Lenovo 3000 G... | Bullseye | 28.72 | 8 | 229.76 |
1 | E/CIS-74992 | Cisco Systems Gigabit VPN Router (RV320K9NA) | Bullseye | 33.39 | 1 | 33.39 |
2 | I&S/WIH-08645 | Wiha 26598 Nut Driver Set, Metric In Canvas Po... | Shoppe.com | 16.56 | 62 | 1026.72 |
def percent_group_total(group_df):
group_df['% Group Total'] = group_df['Total'] / group_df['Total'].sum() * 100
group_df['% Group Total'] = group_df['% Group Total'].round(2)
return group_df
sample_df.groupby('Channel').apply(percent_group_total)
ProductID | Product Name | Channel | Unit Price | Quantity | Total | % Group Total | |
---|---|---|---|---|---|---|---|
14044 | MI/SEN-01085 | Sennheiser EW 112P G3-B omni-directional EW sy... | Understock.com | 18.58 | 6 | 111.48 | 55.75 |
14045 | I&S/WIH-08645 | Wiha 26598 Nut Driver Set, Metric In Canvas Po... | Shoppe.com | 16.56 | 62 | 1026.72 | 100.00 |
14046 | H&K/KIK-91404 | Kikkerland Magnetic Decision Maker | iBay.com | 3.64 | 15 | 54.60 | 28.96 |
14047 | T&G/YU--76445 | Yu-Gi-Oh! - Light-Imprisoning Mirror (GLAS-EN0... | Understock.com | 4.50 | 4 | 18.00 | 9.00 |
14048 | T&G/LAU-88048 | Lauri Toddler Tote | iBay.com | 14.46 | 1 | 14.46 | 7.67 |
14049 | E/AC-63975 | AC Adapter/Power Supply&Cord for Lenovo 3000 G... | Bullseye | 28.72 | 8 | 229.76 | 87.31 |
14050 | E/CIS-74992 | Cisco Systems Gigabit VPN Router (RV320K9NA) | Bullseye | 33.39 | 1 | 33.39 | 12.69 |
14051 | E/PHI-08100 | Philips AJ3116M/37 Digital Tuning Clock Radio ... | Understock.com | 4.18 | 1 | 4.18 | 2.09 |
14052 | E/POL-61164 | NaN | iBay.com | 4.78 | 25 | 119.50 | 63.38 |
14053 | E/SIR-83381 | Sirius Satellite Radio XADH2 Home Access Kit f... | Understock.com | 33.16 | 2 | 66.32 | 33.16 |
columns = ['ProductID', 'Channel', 'Total']
sample_df = ledger_df[columns].head()
sample_df
ProductID | Channel | Total | |
---|---|---|---|
0 | T&G/CAN-97509 | Shoppe.com | 281.54 |
1 | T&G/LEG-37777 | Walcart | 6.70 |
2 | T&G/PET-14209 | Bullseye | 58.35 |
3 | T&G/TRA-20170 | Bullseye | 80.76 |
4 | T&G/TRA-20170 | Bullseye | 80.76 |
sample_df.stack()
0 ProductID T&G/CAN-97509 Channel Shoppe.com Total 281.54 1 ProductID T&G/LEG-37777 Channel Walcart Total 6.7 2 ProductID T&G/PET-14209 Channel Bullseye Total 58.35 3 ProductID T&G/TRA-20170 Channel Bullseye Total 80.76 4 ProductID T&G/TRA-20170 Channel Bullseye Total 80.76 dtype: object
stacked_sample = sample_df.stack()
stacked_sample.unstack()
ProductID | Channel | Total | |
---|---|---|---|
0 | T&G/CAN-97509 | Shoppe.com | 281.54 |
1 | T&G/LEG-37777 | Walcart | 6.7 |
2 | T&G/PET-14209 | Bullseye | 58.35 |
3 | T&G/TRA-20170 | Bullseye | 80.76 |
4 | T&G/TRA-20170 | Bullseye | 80.76 |
ledger_df = pd.read_excel('data/Q1Sales.xlsx')
ledger_df['Deadline'] = pd.to_datetime(ledger_df['Deadline'])
ledger_df['Deadline Quarter'] = ledger_df['Deadline'].dt.to_period(freq='Q-DEC')
ledger_df
InvoiceNo | Channel | Product Name | ProductID | Account | AccountNo | Date | Deadline | Currency | Unit Price | Quantity | Total | Deadline Quarter | |
---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | 1532 | Shoppe.com | Cannon Water Bomb Balloons 100 Pack | T&G/CAN-97509 | Sales | 5004 | 2020-01-01 | 2019-11-23 | USD | 20.11 | 14 | 281.54 | 2019Q4 |
1 | 1533 | Walcart | LEGO Ninja Turtles Stealth Shell in Pursuit 79102 | T&G/LEG-37777 | Sales | 5004 | 2020-01-01 | 2020-06-15 | USD | 6.70 | 1 | 6.70 | 2020Q2 |
2 | 1534 | Bullseye | NaN | T&G/PET-14209 | Sales | 5004 | 2020-01-01 | 2020-05-07 | USD | 11.67 | 5 | 58.35 | 2020Q2 |
3 | 1535 | Bullseye | Transformers Age of Extinction Generations Del... | T&G/TRA-20170 | Sales | 5004 | 2020-01-01 | 2019-12-22 | USD | 13.46 | 6 | 80.76 | 2019Q4 |
4 | 1535 | Bullseye | Transformers Age of Extinction Generations Del... | T&G/TRA-20170 | Sales | 5004 | 2020-01-01 | 2019-12-22 | USD | 13.46 | 6 | 80.76 | 2019Q4 |
... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
14049 | 15581 | Bullseye | AC Adapter/Power Supply&Cord for Lenovo 3000 G... | E/AC-63975 | Sales | 5004 | 2020-01-31 | 2020-02-23 | USD | 28.72 | 8 | 229.76 | 2020Q1 |
14050 | 15582 | Bullseye | Cisco Systems Gigabit VPN Router (RV320K9NA) | E/CIS-74992 | Sales | 5004 | 2020-01-31 | 2020-01-21 | USD | 33.39 | 1 | 33.39 | 2020Q1 |
14051 | 15583 | Understock.com | Philips AJ3116M/37 Digital Tuning Clock Radio ... | E/PHI-08100 | Sales | 5004 | 2020-01-31 | 2020-03-22 | USD | 4.18 | 1 | 4.18 | 2020Q1 |
14052 | 15584 | iBay.com | NaN | E/POL-61164 | Sales | 5004 | 2020-01-31 | 2020-06-25 | USD | 4.78 | 25 | 119.50 | 2020Q2 |
14053 | 15585 | Understock.com | Sirius Satellite Radio XADH2 Home Access Kit f... | E/SIR-83381 | Sales | 5004 | 2020-01-31 | 2020-02-01 | USD | 33.16 | 2 | 66.32 | 2020Q1 |
14054 rows × 13 columns
ledger_df.groupby(['Channel', 'Deadline Quarter']).agg({'Quantity': 'sum'})
Quantity | ||
---|---|---|
Channel | Deadline Quarter | |
Bullseye | 2019Q4 | 2212 |
2020Q1 | 3413 | |
2020Q2 | 3034 | |
2020Q3 | 377 | |
Shoppe.com | 2019Q4 | 11424 |
2020Q1 | 9969 | |
2020Q2 | 11212 | |
2020Q3 | 1757 | |
Understock.com | 2019Q4 | 18518 |
2020Q1 | 21614 | |
2020Q2 | 23222 | |
2020Q3 | 3484 | |
Walcart | 2019Q4 | 3816 |
2020Q1 | 5174 | |
2020Q2 | 3921 | |
2020Q3 | 411 | |
iBay.com | 2019Q4 | 9711 |
2020Q1 | 11525 | |
2020Q2 | 11995 | |
2020Q3 | 1317 |
ledger_df.groupby(['Channel', 'Deadline Quarter']).agg({'Quantity': 'sum'}).unstack()
Quantity | ||||
---|---|---|---|---|
Deadline Quarter | 2019Q4 | 2020Q1 | 2020Q2 | 2020Q3 |
Channel | ||||
Bullseye | 2212 | 3413 | 3034 | 377 |
Shoppe.com | 11424 | 9969 | 11212 | 1757 |
Understock.com | 18518 | 21614 | 23222 | 3484 |
Walcart | 3816 | 5174 | 3921 | 411 |
iBay.com | 9711 | 11525 | 11995 | 1317 |
pd.pivot_table(ledger_df,
index='Channel',
columns='Deadline Quarter',
values='Quantity',
aggfunc='sum')
Deadline Quarter | 2019Q4 | 2020Q1 | 2020Q2 | 2020Q3 |
---|---|---|---|---|
Channel | ||||
Bullseye | 2212 | 3413 | 3034 | 377 |
Shoppe.com | 11424 | 9969 | 11212 | 1757 |
Understock.com | 18518 | 21614 | 23222 | 3484 |
Walcart | 3816 | 5174 | 3921 | 411 |
iBay.com | 9711 | 11525 | 11995 | 1317 |
pd.pivot_table(ledger_df,
index='Channel',
columns='Deadline Quarter',
values='Quantity',
aggfunc='sum',
margins=True,
margins_name='TOTAL')
Deadline Quarter | 2019Q4 | 2020Q1 | 2020Q2 | 2020Q3 | TOTAL |
---|---|---|---|---|---|
Channel | |||||
Bullseye | 2212 | 3413 | 3034 | 377 | 9036 |
Shoppe.com | 11424 | 9969 | 11212 | 1757 | 34362 |
Understock.com | 18518 | 21614 | 23222 | 3484 | 66838 |
Walcart | 3816 | 5174 | 3921 | 411 | 13322 |
iBay.com | 9711 | 11525 | 11995 | 1317 | 34548 |
TOTAL | 45681 | 51695 | 53384 | 7346 | 158106 |