! head data/microbiome.csv

Taxon,Patient,Tissue,Stool
Firmicutes,1,632,305
Firmicutes,2,136,4182
Firmicutes,3,1174,703
Firmicutes,4,408,3946
Firmicutes,5,831,8605
Firmicutes,6,693,50
Firmicutes,7,718,717
Firmicutes,8,173,33
Firmicutes,9,228,80


import pandas as pd
mb = pd.read_csv("microbiome.csv")
mb


pd.read_csv("data/microbiome.csv", header=None)


mb = pd.read_csv("data/microbiome.csv", index_col=['Taxon','Patient'])
mb


pd.read_csv("data/microbiome.csv", skiprows=[3,4,6])


pd.read_csv("data/microbiome.csv", nrows=4)


data_chunks = pd.read_csv("data/microbiome.csv", chunksize=15)
data_chunks

<pandas.io.parsers.readers.TextFileReader at 0x7f4741161460>


!head data/microbiome_missing.csv

Taxon,Patient,Tissue,Stool
Firmicutes,1,632,305
Firmicutes,2,136,4182
Firmicutes,3,,703
Firmicutes,4,408,3946
Firmicutes,5,831,8605
Firmicutes,6,693,50
Firmicutes,7,718,717
Firmicutes,8,173,33
Firmicutes,9,228,NA


import pandas as pd
pd.read_csv("data/microbiome_missing.csv").head(20)


pd.isnull(pd.read_csv("data/microbiome_missing.csv")).head(20)


pd.read_csv("data/microbiome_missing.csv", na_values=['?', -99999]).head(20)


baseball = pd.read_csv("data/baseball.csv", index_col='id')
baseball


player_id = baseball.player + baseball.year.astype(str)
baseball_newind = baseball.copy()
baseball_newind.index = player_id
baseball_newind


baseball_newind.index.is_unique

False


reverse_index = baseball.index[::-1]
baseball.reindex(reverse_index)


id_range = range(baseball.index.values.min(), baseball.index.values.max())
baseball.reindex(id_range)


baseball.reindex(id_range, fill_value='mr.nobody', columns=['player'])


baseball_newind.sort_index()


baseball_newind.sort_index(ascending=False)


baseball_newind.sort_index(axis=1)


baseball.hr.rank()

id
88641    62.5
88643    29.0
88645    29.0
88649    29.0
88650    29.0
         ... 
89525    29.0
89526    29.0
89530    71.5
89533    88.0
89534    29.0
Name: hr, Length: 100, dtype: float64


baseball.hr.rank(method='first')

id
88641    58.0
88643     1.0
88645     2.0
88649     3.0
88650     4.0
         ... 
89525    55.0
89526    56.0
89530    72.0
89533    88.0
89534    57.0
Name: hr, Length: 100, dtype: float64


baseball.rank(ascending=False)


baseball[['r','h','hr']].rank(ascending=False)


baseball.shape

(100, 22)


baseball.drop([89525, 89526]) # does not modify the original DataFrame


baseball.shape

(100, 22)


baseball.drop(['ibb','hbp'], axis=1) # Pandas axis=0 indicating row, axis=1 indicating column


hr2006 = baseball[baseball.year==2006].xs('hr', axis=1)
hr2006.index = baseball.player[baseball.year==2006]

hr2007 = baseball[baseball.year==2007].xs('hr', axis=1)
hr2007.index = baseball.player[baseball.year==2007]


hr2006 = pd.Series(baseball.hr[baseball.year==2006].values, index=baseball.player[baseball.year==2006])
hr2006

player
womacto01     1
schilcu01     0
myersmi01     0
helliri01     0
johnsra05     0
finlest01     6
gonzalu01    15
seleaa01      0
dtype: int64


hr2007 = pd.Series(baseball.hr[baseball.year==2007].values, index=baseball.player[baseball.year==2007])
hr2007

player
francju01     0
francju01     1
zaungr01     10
witasja01     0
williwo02     1
             ..
benitar01     0
benitar01     0
ausmubr01     3
aloumo01     13
alomasa02     0
Length: 92, dtype: int64


hr_total = hr2006 + hr2007 
hr_total

player
alomasa02   NaN
aloumo01    NaN
ausmubr01   NaN
benitar01   NaN
benitar01   NaN
             ..
wickmbo01   NaN
williwo02   NaN
witasja01   NaN
womacto01   NaN
zaungr01    NaN
Length: 94, dtype: float64


hr_total[hr_total.notnull()]

player
finlest01     7.0
gonzalu01    30.0
johnsra05     0.0
myersmi01     0.0
schilcu01     0.0
seleaa01      0.0
dtype: float64


segments = pd.read_csv("data/AIS/transit_segments.csv")
segments


vessels = pd.read_csv("data/AIS/vessel_information.csv", index_col='mmsi')
vessels


vessels.type.value_counts()

Cargo                      5622
Tanker                     2440
Pleasure                    601
Tug                         221
Sailing                     205
                           ... 
AntiPol/Other                 1
Fishing/Law                   1
Cargo/Other/Towing            1
Cargo/Fishing                 1
Fishing/Reserved/Towing       1
Name: type, Length: 206, dtype: int64


import numpy as np
df1 = pd.DataFrame(dict(id=range(4), age=np.random.randint(18, 31, size=4)))
df2 = pd.DataFrame(dict(id=range(6), score=np.random.random(size=6)))


pd.merge(df1, df2)


pd.merge(df1, df2, how='outer')


segments.head(1)


vessels.head(1)


segments_merged = pd.merge(vessels, segments, left_index=True, right_on='mmsi')


segments_merged


vessels.merge(segments, left_index=True, right_on='mmsi')


segments['type'] = 'foo'
pd.merge(vessels, segments, left_index=True, right_on='mmsi')


np.concatenate([np.random.random(5), np.random.random(5)])

array([0.42336207, 0.74807952, 0.61839076, 0.54794432, 0.06227732,
       0.71618874, 0.31763132, 0.26021656, 0.22395665, 0.08499033])


np.r_[np.random.random(5), np.random.random(5)]

array([0.48601962, 0.11484701, 0.93892836, 0.16884999, 0.71700162,
       0.92519913, 0.26827622, 0.41866975, 0.59348726, 0.06054373])


np.c_[np.random.random(5), np.random.random(5)]

array([[0.48861909, 0.23961022],
       [0.68685816, 0.7662155 ],
       [0.76304197, 0.63356894],
       [0.45533848, 0.36265383],
       [0.85205653, 0.84605096]])


# Pandas requires external modules to read Excel files
! pip3 install xlrd openpyxl --user

Looking in indexes: https://mirrors.163.com/pypi/simple/
Requirement already satisfied: xlrd in /home/fli/.local/lib/python3.9/site-packages (2.0.1)
Requirement already satisfied: openpyxl in /home/fli/.local/lib/python3.9/site-packages (3.0.9)
Requirement already satisfied: et-xmlfile in /home/fli/.local/lib/python3.9/site-packages (from openpyxl) (1.0.1)


mb1 = pd.read_excel('data/microbiome/MID1.xls', 'Sheet 1', index_col=0, header=None)
mb2 = pd.read_excel('data/microbiome/MID2.xls', 'Sheet 1', index_col=0, header=None)
mb1.shape, mb2.shape

((272, 1), (288, 1))

mb1


mb1.columns = mb2.columns = ['Count']


mb1.index.name = mb2.index.name = 'Taxon'

mb1


mb1.index[:3]

Index(['Archaea "Crenarchaeota" Thermoprotei Desulfurococcales Desulfurococcaceae Ignisphaera',
       'Archaea "Crenarchaeota" Thermoprotei Desulfurococcales Pyrodictiaceae Pyrolobus',
       'Archaea "Crenarchaeota" Thermoprotei Sulfolobales Sulfolobaceae Stygiolobus'],
      dtype='object', name='Taxon')


mb1.index.is_unique

True


pd.concat([mb1, mb2], axis=0).shape

(560, 1)


pd.concat([mb1, mb2], axis=0).index.is_unique

False


pd.concat([mb1, mb2], axis=1).shape

(438, 2)


pd.concat([mb1, mb2], axis=1)


pd.concat([mb1, mb2], axis=1).values[:5]

array([[ 7., 23.],
       [ 2.,  2.],
       [ 3., 10.],
       [ 3.,  9.],
       [ 7.,  9.]])


pd.concat([mb1, mb2], axis=1, join='inner')


mb1.combine_first(mb2)


pd.concat(dict(patient1=mb1, patient2=mb2), axis=1)


import numpy as np
foo = pd.Series([np.nan, -3, None, 'foobar'])
foo

0       NaN
1        -3
2      None
3    foobar
dtype: object


foo.isnull()

0     True
1    False
2     True
3    False
dtype: bool


bacteria_dict = {'Firmicutes': 632, 'Proteobacteria': 1638, 'Actinobacteria': 569, 'Bacteroidetes': 115}
bacteria2 = pd.Series(bacteria_dict, index=['Cyanobacteria','Firmicutes','Proteobacteria','Actinobacteria'])
bacteria2

Cyanobacteria        NaN
Firmicutes         632.0
Proteobacteria    1638.0
Actinobacteria     569.0
dtype: float64


bacteria2.dropna()

Firmicutes         632.0
Proteobacteria    1638.0
Actinobacteria     569.0
dtype: float64


bacteria2[bacteria2.notnull()]

Firmicutes         632.0
Proteobacteria    1638.0
Actinobacteria     569.0
dtype: float64


data.dropna(how='all')


data.dropna(thresh=4)


data.dropna(axis=1)


bacteria2.fillna(0)

Cyanobacteria        0.0
Firmicutes         632.0
Proteobacteria    1638.0
Actinobacteria     569.0
dtype: float64


data = pd.DataFrame({'value':[632, 1638, 569, 115, 433, 1130, 754, 555],
                     'treatment':[1, 1, 1, None, 2, 2, 2, 2],
                     'year':[1994,1997,1999, None,2015,2017,2019,2021]})

data.fillna({'year': 2013, 'treatment':2})


cdystonia = pd.read_csv("data/cdystonia.csv")
cdystonia


cdystonia_grouped = cdystonia.groupby(cdystonia.patient)
cdystonia_grouped

<pandas.core.groupby.generic.DataFrameGroupBy object at 0x7f473e94b5e0>


# for patient, group in cdystonia_grouped:
#    print(patient)
#    print(group)


import numpy as np
cdystonia_grouped.agg(np.mean)


cdystonia_grouped.mean()


cdystonia_grouped.mean().add_suffix('_mean')


# The median of the `twstrs` variable
cdystonia_grouped['twstrs'].quantile(0.5)

patient
1      34.0
2      50.5
3      30.5
4      61.5
5      48.5
       ... 
105    45.5
106    67.5
107    44.0
108    50.5
109    38.0
Name: twstrs, Length: 109, dtype: float64


cdystonia.groupby(['week','site']).mean()


cdystonia2 = cdystonia_grouped[["obs", "week", "twstrs"]]
normalize = lambda x: (x - x.mean())/x.std()

cdystonia2.transform(normalize)


cdystonia_grouped['twstrs'].mean()

patient
1      33.000000
2      47.666667
3      30.500000
4      60.000000
5      46.166667
         ...    
105    43.666667
106    67.666667
107    42.000000
108    52.333333
109    42.200000
Name: twstrs, Length: 109, dtype: float64


chunks = dict(list(cdystonia_grouped))
chunks[4]


dict(list(cdystonia.groupby(cdystonia.dtypes, axis=1)))

{dtype('int64'):      patient  obs  week  site  id  age  twstrs
 0          1    1     0     1   1   65      32
 1          1    2     2     1   1   65      30
 2          1    3     4     1   1   65      24
 3          1    4     8     1   1   65      37
 4          1    5    12     1   1   65      39
 ..       ...  ...   ...   ...  ..  ...     ...
 626      109    1     0     9  11   57      53
 627      109    2     2     9  11   57      38
 628      109    4     8     9  11   57      33
 629      109    5    12     9  11   57      36
 630      109    6    16     9  11   57      51
 
 [631 rows x 7 columns],
 dtype('O'):      treat sex
 0    5000U   F
 1    5000U   F
 2    5000U   F
 3    5000U   F
 4    5000U   F
 ..     ...  ..
 626  5000U   M
 627  5000U   M
 628  5000U   M
 629  5000U   M
 630  5000U   M
 
 [631 rows x 2 columns]}

	player	year	stint	team	lg	g	ab	r	h	X2b	...	rbi	sb	cs	bb	so	ibb	hbp	sh	sf	gidp
id
88641	2.0	96.5	7.0	82.0	31.5	70.0	47.5	40.5	39.0	50.5	...	51.0	24.5	17.5	44.5	59.0	66.0	65.5	16.0	70.0	76.5
88643	37.5	96.5	57.0	88.0	81.5	55.5	73.0	81.0	63.5	78.0	...	78.5	63.5	62.5	79.0	73.0	66.0	65.5	67.5	70.0	76.5
88645	47.5	96.5	57.0	40.5	81.5	36.0	91.0	81.0	84.5	78.0	...	78.5	63.5	62.5	79.0	89.0	66.0	65.5	67.5	70.0	76.5
88649	66.0	96.5	57.0	47.0	31.5	67.5	69.0	81.0	84.5	78.0	...	78.5	63.5	62.5	79.0	67.0	66.0	65.5	67.5	70.0	76.5
88650	61.5	96.5	57.0	40.5	81.5	51.0	64.5	81.0	63.5	78.0	...	78.5	63.5	62.5	79.0	59.0	66.0	65.5	67.5	70.0	76.5
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
89525	96.5	46.5	7.0	64.0	31.5	47.0	91.0	81.0	84.5	78.0	...	78.5	63.5	62.5	79.0	89.0	66.0	65.5	67.5	70.0	76.5
89526	96.5	46.5	57.0	13.5	31.5	70.0	91.0	81.0	84.5	78.0	...	78.5	63.5	62.5	79.0	89.0	66.0	65.5	67.5	70.0	76.5
89530	98.0	46.5	57.0	61.5	31.5	17.5	19.0	24.0	23.0	21.5	...	27.0	7.0	17.5	18.5	10.0	18.0	6.5	12.0	33.5	14.0
89533	99.0	46.5	57.0	31.5	31.5	23.0	22.0	18.5	14.0	17.5	...	18.0	14.0	62.5	22.0	27.0	11.0	21.0	67.5	15.5	10.5
89534	100.0	46.5	57.0	31.5	31.5	77.0	57.0	58.0	58.0	50.5	...	78.5	63.5	62.5	79.0	63.5	66.0	65.5	67.5	70.0	76.5

	mmsi	name	transit	segment	seg_length	avg_sog	min_sog	max_sog	pdgt10	st_time	end_time
0	1	Us Govt Ves	1	1	5.1	13.2	9.2	14.5	96.5	2/10/09 16:03	2/10/09 16:27
1	1	Dredge Capt Frank	1	1	13.5	18.6	10.4	20.6	100.0	4/6/09 14:31	4/6/09 15:20
2	1	Us Gov Vessel	1	1	4.3	16.2	10.3	20.5	100.0	4/6/09 14:36	4/6/09 14:55
3	1	Us Gov Vessel	2	1	9.2	15.4	14.5	16.1	100.0	4/10/09 17:58	4/10/09 18:34
4	1	Dredge Capt Frank	2	1	9.2	15.4	14.6	16.2	100.0	4/10/09 17:59	4/10/09 18:35
...	...	...	...	...	...	...	...	...	...	...	...
262521	999999999	Triple Attraction	3	1	5.3	20.0	19.6	20.4	100.0	6/15/10 12:49	6/15/10 13:05
262522	999999999	Triple Attraction	4	1	18.7	19.2	18.4	19.9	100.0	6/15/10 21:32	6/15/10 22:29
262523	999999999	Triple Attraction	6	1	17.4	17.0	14.7	18.4	100.0	6/17/10 19:16	6/17/10 20:17
262524	999999999	Triple Attraction	7	1	31.5	14.2	13.4	15.1	100.0	6/18/10 2:52	6/18/10 5:03
262525	999999999	Triple Attraction	8	1	19.8	18.6	16.1	19.5	100.0	6/18/10 10:19	6/18/10 11:22

	num_names	names	sov	flag	flag_type	num_loas	loa	max_loa	num_types	type
mmsi
1	8	Bil Holman Dredge/Dredge Capt Frank/Emo/Offsho...	Y	Unknown	Unknown	7	42.0/48.0/57.0/90.0/138.0/154.0/156.0	156.0	4	Dredging/MilOps/Reserved/Towing
9	3	000000009/Raven/Shearwater	N	Unknown	Unknown	2	50.0/62.0	62.0	2	Pleasure/Tug
21	1	Us Gov Vessel	Y	Unknown	Unknown	1	208.0	208.0	1	Unknown
74	2	Mcfaul/Sarah Bell	N	Unknown	Unknown	1	155.0	155.0	1	Unknown
103	3	Ron G/Us Navy Warship 103/Us Warship 103	Y	Unknown	Unknown	2	26.0/155.0	155.0	2	Tanker/Unknown
...	...	...	...	...	...	...	...	...	...	...
919191919	1	Oi	N	Unknown	Unknown	1	20.0	20.0	1	Pleasure
967191190	1	Pathfinder	N	Unknown	Unknown	1	31.0	31.0	2	BigTow/Towing
975318642	1	Island Express	N	Unknown	Unknown	1	20.0	20.0	1	Towing
987654321	2	Island Lookout/Island Tide	N	Unknown	Unknown	2	22.0/23.0	23.0	2	Fishing/Towing
999999999	1	Triple Attraction	N	Unknown	Unknown	1	30.0	30.0	1	Pleasure

	id	age	score
0	0	24.0	0.666117
1	1	23.0	0.249913
2	2	26.0	0.261396
3	3	30.0	0.651306
4	4	NaN	0.468143
5	5	NaN	0.461524

	num_names	names	sov	flag	flag_type	num_loas	loa	max_loa	num_types	type
mmsi
1	8	Bil Holman Dredge/Dredge Capt Frank/Emo/Offsho...	Y	Unknown	Unknown	7	42.0/48.0/57.0/90.0/138.0/154.0/156.0	156.0	4	Dredging/MilOps/Reserved/Towing

Manipulating DataFrames with Pandas¶

Importing data¶

Read extermal data¶

Missing values¶

Manipulating indices¶

Remove rows or columns via the `drop` method:¶

Join two Pandas DataFrames¶

Merging and joining DataFrame objects¶

Concatenation¶

Missing data¶

Data aggregation and GroupBy operations¶

	Taxon	Patient	Tissue	Stool
0	Firmicutes	1	632	305
1	Firmicutes	2	136	4182
2	Firmicutes	3	1174	703
3	Firmicutes	4	408	3946
4	Firmicutes	5	831	8605
...	...	...	...	...
70	Other	11	203	6
71	Other	12	392	6
72	Other	13	28	25
73	Other	14	12	22
74	Other	15	305	32

	0	1	2	3
0	Taxon	Patient	Tissue	Stool
1	Firmicutes	1	632	305
2	Firmicutes	2	136	4182
3	Firmicutes	3	1174	703
4	Firmicutes	4	408	3946
...	...	...	...	...
71	Other	11	203	6
72	Other	12	392	6
73	Other	13	28	25
74	Other	14	12	22
75	Other	15	305	32

	Taxon	Patient	Tissue	Stool
0	Firmicutes	1	632	305.0
1	Firmicutes	2	136	4182.0
2	Firmicutes	3	NaN	703.0
3	Firmicutes	4	408	3946.0
4	Firmicutes	5	831	8605.0
5	Firmicutes	6	693	50.0
6	Firmicutes	7	718	717.0
7	Firmicutes	8	173	33.0
8	Firmicutes	9	228	NaN
9	Firmicutes	10	162	3196.0
10	Firmicutes	11	372	-99999.0
11	Firmicutes	12	4255	4361.0
12	Firmicutes	13	107	1667.0
13	Firmicutes	14	?	223.0
14	Firmicutes	15	281	2377.0
15	Proteobacteria	1	1638	3886.0
16	Proteobacteria	2	2469	1821.0
17	Proteobacteria	3	839	661.0
18	Proteobacteria	4	4414	18.0
19	Proteobacteria	5	12044	83.0

	Taxon	Patient	Tissue	Stool
0	False	False	False	False
1	False	False	False	False
2	False	False	True	False
3	False	False	False	False
4	False	False	False	False
5	False	False	False	False
6	False	False	False	False
7	False	False	False	False
8	False	False	False	True
9	False	False	False	False
10	False	False	False	False
11	False	False	False	False
12	False	False	False	False
13	False	False	False	False
14	False	False	False	False
15	False	False	False	False
16	False	False	False	False
17	False	False	False	False
18	False	False	False	False
19	False	False	False	False

	Taxon	Patient	Tissue	Stool
0	Firmicutes	1	632.0	305.0
1	Firmicutes	2	136.0	4182.0
2	Firmicutes	3	NaN	703.0
3	Firmicutes	4	408.0	3946.0
4	Firmicutes	5	831.0	8605.0
5	Firmicutes	6	693.0	50.0
6	Firmicutes	7	718.0	717.0
7	Firmicutes	8	173.0	33.0
8	Firmicutes	9	228.0	NaN
9	Firmicutes	10	162.0	3196.0
10	Firmicutes	11	372.0	NaN
11	Firmicutes	12	4255.0	4361.0
12	Firmicutes	13	107.0	1667.0
13	Firmicutes	14	NaN	223.0
14	Firmicutes	15	281.0	2377.0
15	Proteobacteria	1	1638.0	3886.0
16	Proteobacteria	2	2469.0	1821.0
17	Proteobacteria	3	839.0	661.0
18	Proteobacteria	4	4414.0	18.0
19	Proteobacteria	5	12044.0	83.0

	player	year	stint	team	lg	g	ab	r	h	X2b	...	rbi	sb	cs	bb	so	ibb	hbp	sh	sf	gidp
id
88641	womacto01	2006	2	CHN	NL	19	50	6	14	1	...	2.0	1.0	1.0	4	4.0	0.0	0.0	3.0	0.0	0.0
88643	schilcu01	2006	1	BOS	AL	31	2	0	1	0	...	0.0	0.0	0.0	0	1.0	0.0	0.0	0.0	0.0	0.0
88645	myersmi01	2006	1	NYA	AL	62	0	0	0	0	...	0.0	0.0	0.0	0	0.0	0.0	0.0	0.0	0.0	0.0
88649	helliri01	2006	1	MIL	NL	20	3	0	0	0	...	0.0	0.0	0.0	0	2.0	0.0	0.0	0.0	0.0	0.0
88650	johnsra05	2006	1	NYA	AL	33	6	0	1	0	...	0.0	0.0	0.0	0	4.0	0.0	0.0	0.0	0.0	0.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
89525	benitar01	2007	2	FLO	NL	34	0	0	0	0	...	0.0	0.0	0.0	0	0.0	0.0	0.0	0.0	0.0	0.0
89526	benitar01	2007	1	SFN	NL	19	0	0	0	0	...	0.0	0.0	0.0	0	0.0	0.0	0.0	0.0	0.0	0.0
89530	ausmubr01	2007	1	HOU	NL	117	349	38	82	16	...	25.0	6.0	1.0	37	74.0	3.0	6.0	4.0	1.0	11.0
89533	aloumo01	2007	1	NYN	NL	87	328	51	112	19	...	49.0	3.0	0.0	27	30.0	5.0	2.0	0.0	3.0	13.0
89534	alomasa02	2007	1	NYN	NL	8	22	1	3	1	...	0.0	0.0	0.0	0	3.0	0.0	0.0	0.0	0.0	0.0

	player	year	stint	team	lg	g	ab	r	h	X2b	...	rbi	sb	cs	bb	so	ibb	hbp	sh	sf	gidp
womacto012006	womacto01	2006	2	CHN	NL	19	50	6	14	1	...	2.0	1.0	1.0	4	4.0	0.0	0.0	3.0	0.0	0.0
schilcu012006	schilcu01	2006	1	BOS	AL	31	2	0	1	0	...	0.0	0.0	0.0	0	1.0	0.0	0.0	0.0	0.0	0.0
myersmi012006	myersmi01	2006	1	NYA	AL	62	0	0	0	0	...	0.0	0.0	0.0	0	0.0	0.0	0.0	0.0	0.0	0.0
helliri012006	helliri01	2006	1	MIL	NL	20	3	0	0	0	...	0.0	0.0	0.0	0	2.0	0.0	0.0	0.0	0.0	0.0
johnsra052006	johnsra05	2006	1	NYA	AL	33	6	0	1	0	...	0.0	0.0	0.0	0	4.0	0.0	0.0	0.0	0.0	0.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
benitar012007	benitar01	2007	2	FLO	NL	34	0	0	0	0	...	0.0	0.0	0.0	0	0.0	0.0	0.0	0.0	0.0	0.0
benitar012007	benitar01	2007	1	SFN	NL	19	0	0	0	0	...	0.0	0.0	0.0	0	0.0	0.0	0.0	0.0	0.0	0.0
ausmubr012007	ausmubr01	2007	1	HOU	NL	117	349	38	82	16	...	25.0	6.0	1.0	37	74.0	3.0	6.0	4.0	1.0	11.0
aloumo012007	aloumo01	2007	1	NYN	NL	87	328	51	112	19	...	49.0	3.0	0.0	27	30.0	5.0	2.0	0.0	3.0	13.0
alomasa022007	alomasa02	2007	1	NYN	NL	8	22	1	3	1	...	0.0	0.0	0.0	0	3.0	0.0	0.0	0.0	0.0	0.0

	player	year	stint	team	lg	g	ab	r	h	X2b	...	rbi	sb	cs	bb	so	ibb	hbp	sh	sf	gidp
id
88641	womacto01	2006.0	2.0	CHN	NL	19.0	50.0	6.0	14.0	1.0	...	2.0	1.0	1.0	4.0	4.0	0.0	0.0	3.0	0.0	0.0
88642	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
88643	schilcu01	2006.0	1.0	BOS	AL	31.0	2.0	0.0	1.0	0.0	...	0.0	0.0	0.0	0.0	1.0	0.0	0.0	0.0	0.0	0.0
88644	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
88645	myersmi01	2006.0	1.0	NYA	AL	62.0	0.0	0.0	0.0	0.0	...	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0	0.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
89529	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
89530	ausmubr01	2007.0	1.0	HOU	NL	117.0	349.0	38.0	82.0	16.0	...	25.0	6.0	1.0	37.0	74.0	3.0	6.0	4.0	1.0	11.0
89531	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
89532	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	...	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN	NaN
89533	aloumo01	2007.0	1.0	NYN	NL	87.0	328.0	51.0	112.0	19.0	...	49.0	3.0	0.0	27.0	30.0	5.0	2.0	0.0	3.0	13.0

	player	year	stint	team	lg	g	ab	r	h	X2b	...	rbi	sb	cs	bb	so	ibb	hbp	sh	sf	gidp
zaungr012007	zaungr01	2007	1	TOR	AL	110	331	43	80	24	...	52.0	0.0	0.0	51	55.0	8.0	2.0	1.0	6.0	9.0
womacto012006	womacto01	2006	2	CHN	NL	19	50	6	14	1	...	2.0	1.0	1.0	4	4.0	0.0	0.0	3.0	0.0	0.0
witasja012007	witasja01	2007	1	TBA	AL	3	0	0	0	0	...	0.0	0.0	0.0	0	0.0	0.0	0.0	0.0	0.0	0.0
williwo022007	williwo02	2007	1	HOU	NL	33	59	3	6	0	...	2.0	0.0	0.0	0	25.0	0.0	0.0	5.0	0.0	1.0
wickmbo012007	wickmbo01	2007	1	ATL	NL	47	0	0	0	0	...	0.0	0.0	0.0	0	0.0	0.0	0.0	0.0	0.0	0.0
...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...	...
benitar012007	benitar01	2007	2	FLO	NL	34	0	0	0	0	...	0.0	0.0	0.0	0	0.0	0.0	0.0	0.0	0.0	0.0
benitar012007	benitar01	2007	1	SFN	NL	19	0	0	0	0	...	0.0	0.0	0.0	0	0.0	0.0	0.0	0.0	0.0	0.0
ausmubr012007	ausmubr01	2007	1	HOU	NL	117	349	38	82	16	...	25.0	6.0	1.0	37	74.0	3.0	6.0	4.0	1.0	11.0
aloumo012007	aloumo01	2007	1	NYN	NL	87	328	51	112	19	...	49.0	3.0	0.0	27	30.0	5.0	2.0	0.0	3.0	13.0
alomasa022007	alomasa02	2007	1	NYN	NL	8	22	1	3	1	...	0.0	0.0	0.0	0	3.0	0.0	0.0	0.0	0.0	0.0

	1
0
Archaea "Crenarchaeota" Thermoprotei Desulfurococcales Desulfurococcaceae Ignisphaera	7
Archaea "Crenarchaeota" Thermoprotei Desulfurococcales Pyrodictiaceae Pyrolobus	2
Archaea "Crenarchaeota" Thermoprotei Sulfolobales Sulfolobaceae Stygiolobus	3
Archaea "Crenarchaeota" Thermoprotei Thermoproteales Thermofilaceae Thermofilum	3
Archaea "Euryarchaeota" "Methanomicrobia" Methanocellales Methanocellaceae Methanocella	7
...	...
Bacteria "Thermotogae" Thermotogae Thermotogales Thermotogaceae Kosmotoga	9
Bacteria "Verrucomicrobia" Opitutae Opitutales Opitutaceae Alterococcus	1
Bacteria Cyanobacteria Cyanobacteria Chloroplast Bangiophyceae	2
Bacteria Cyanobacteria Cyanobacteria Chloroplast Chlorarachniophyceae	85
Bacteria Cyanobacteria Cyanobacteria Chloroplast Streptophyta	1388

	value	treatment	year
0	632	1.0	1994.0
1	1638	1.0	1997.0
2	569	1.0	1999.0
3	115	2.0	2013.0
4	433	2.0	2015.0
5	1130	2.0	2017.0
6	754	2.0	2019.0
7	555	2.0	2021.0

	Count
Taxon
Archaea "Crenarchaeota" Thermoprotei Acidilobales Acidilobaceae Acidilobus	2
Archaea "Crenarchaeota" Thermoprotei Acidilobales Caldisphaeraceae Caldisphaera	14
Archaea "Crenarchaeota" Thermoprotei Desulfurococcales Desulfurococcaceae Ignisphaera	7
Archaea "Crenarchaeota" Thermoprotei Desulfurococcales Desulfurococcaceae Sulfophobococcus	1
Archaea "Crenarchaeota" Thermoprotei Desulfurococcales Desulfurococcaceae Thermosphaera	2
...	...
Bacteria "Verrucomicrobia" Opitutae Opitutales Opitutaceae Alterococcus	1
Bacteria Cyanobacteria Cyanobacteria Chloroplast Bangiophyceae	2
Bacteria Cyanobacteria Cyanobacteria Chloroplast Chlorarachniophyceae	85
Bacteria Cyanobacteria Cyanobacteria Chloroplast Streptophyta	1388
Bacteria TM7 TM7_genera_incertae_sedis	2

	patient1	patient2
	Count	Count
Taxon
Archaea "Crenarchaeota" Thermoprotei Desulfurococcales Desulfurococcaceae Ignisphaera	7.0	23.0
Archaea "Crenarchaeota" Thermoprotei Desulfurococcales Pyrodictiaceae Pyrolobus	2.0	2.0
Archaea "Crenarchaeota" Thermoprotei Sulfolobales Sulfolobaceae Stygiolobus	3.0	10.0
Archaea "Crenarchaeota" Thermoprotei Thermoproteales Thermofilaceae Thermofilum	3.0	9.0
Archaea "Euryarchaeota" "Methanomicrobia" Methanocellales Methanocellaceae Methanocella	7.0	9.0
...	...	...
Bacteria "Proteobacteria" Gammaproteobacteria Oceanospirillales Oceanospirillales_incertae_sedis Spongiispira	NaN	1.0
Bacteria "Proteobacteria" Gammaproteobacteria Thiotrichales Piscirickettsiaceae Hydrogenovibrio	NaN	9.0
Bacteria "Proteobacteria" Gammaproteobacteria Thiotrichales Piscirickettsiaceae Sulfurivirga	NaN	1.0
Bacteria "Thermodesulfobacteria" Thermodesulfobacteria Thermodesulfobacteriales Thermodesulfobacteriaceae Thermodesulfatator	NaN	3.0
Bacteria TM7 TM7_genera_incertae_sedis	NaN	2.0

	patient	obs	week	site	id	treat	age	sex	twstrs
0	1	1	0	1	1	5000U	65	F	32
1	1	2	2	1	1	5000U	65	F	30
2	1	3	4	1	1	5000U	65	F	24
3	1	4	8	1	1	5000U	65	F	37
4	1	5	12	1	1	5000U	65	F	39
...	...	...	...	...	...	...	...	...	...
626	109	1	0	9	11	5000U	57	M	53
627	109	2	2	9	11	5000U	57	M	38
628	109	4	8	9	11	5000U	57	M	33
629	109	5	12	9	11	5000U	57	M	36
630	109	6	16	9	11	5000U	57	M	51

	obs	week	site	id	age	twstrs
patient
1	3.5	7.0	1.0	1.0	65.0	33.000000
2	3.5	7.0	1.0	2.0	70.0	47.666667
3	3.5	7.0	1.0	3.0	64.0	30.500000
4	2.5	3.5	1.0	4.0	59.0	60.000000
5	3.5	7.0	1.0	5.0	76.0	46.166667
...	...	...	...	...	...	...
105	3.5	7.0	9.0	7.0	79.0	43.666667
106	3.5	7.0	9.0	8.0	43.0	67.666667
107	3.5	7.0	9.0	9.0	50.0	42.000000
108	3.5	7.0	9.0	10.0	39.0	52.333333
109	3.6	7.6	9.0	11.0	57.0	42.200000

		patient	obs	id	age	twstrs
week	site
0	1	6.5	1.0	6.5	59.000000	43.083333
	2	19.5	1.0	7.5	53.928571	51.857143
	3	32.5	1.0	6.5	51.500000	38.750000
	4	42.5	1.0	4.5	59.250000	48.125000
	5	49.5	1.0	3.5	51.833333	49.333333
	6	60.0	1.0	8.0	51.866667	49.400000
	7	73.5	1.0	6.5	59.250000	44.333333
	8	89.0	1.0	10.0	57.263158	38.631579
	9	104.0	1.0	6.0	55.454545	52.727273
2	1	6.5	2.0	6.5	59.000000	31.083333
	2	19.0	2.0	7.0	52.923077	48.769231
	3	32.5	2.0	6.5	51.500000	32.416667
	4	42.5	2.0	4.5	59.250000	39.125000
	5	49.0	2.0	3.0	50.000000	44.200000
	6	60.0	2.0	8.0	51.866667	44.066667
	7	73.5	2.0	6.5	59.250000	32.916667
	8	88.5	2.0	9.5	58.562500	29.500000
	9	103.7	2.0	5.7	56.000000	41.600000
4	1	6.5	3.0	6.5	59.000000	33.333333
4	2	19.5	3.0	7.5	53.928571	48.785714

	obs	week	twstrs
0	-1.336306	-1.135550	-0.181369
1	-0.801784	-0.811107	-0.544107
2	-0.267261	-0.486664	-1.632322
3	0.267261	0.162221	0.725476
4	0.801784	0.811107	1.088214
...	...	...	...
626	-1.253831	-1.135467	1.180487
627	-0.771589	-0.836660	-0.459078
628	0.192897	0.059761	-1.005600
629	0.675140	0.657376	-0.677687
630	1.157383	1.254990	0.961878

	patient	obs	week	site	id	treat	age	sex	twstrs
18	4	1	0	1	4	Placebo	59	F	53
19	4	2	2	1	4	Placebo	59	F	61
20	4	3	4	1	4	Placebo	59	F	64
21	4	4	8	1	4	Placebo	59	F	62

Manipulating DataFrames with Pandas¶

Importing data¶

Read extermal data¶

Missing values¶

Manipulating indices¶

Remove rows or columns via the drop method:¶

Join two Pandas DataFrames¶

Merging and joining DataFrame objects¶

Concatenation¶

Missing data¶

Data aggregation and GroupBy operations¶

Remove rows or columns via the `drop` method:¶