import pandas as pd
pd.options.mode.chained_assignment = None # disable warnings

emp=pd.read_csv('data/employment.csv')
print(emp.head())
emp['Time'] = pd.to_datetime(emp['Time'],format = "%b-%Y")
# See https://docs.python.org/3/library/datetime.html#datetime.date for time format.

       Time  Employed  UnemploymentRate
0  Feb-1978    5892.7               7.5
1  Mar-1978    5945.8               6.6
2  Apr-1978    5958.8               6.3
3  May-1978    5949.3               6.3
4  Jun-1978    5941.3               6.2


import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(25, 6))
ax.plot(emp.Time, emp.Employed, color='blue')

[<matplotlib.lines.Line2D at 0x7f5129500820>]


fig, ax = plt.subplots(figsize=(25, 6))
ax.plot(emp.Time[-72:], emp.Employed[-72:], color='black')

[<matplotlib.lines.Line2D at 0x7f51291f3be0>]


from pandas.plotting import register_matplotlib_converters
from statsmodels.tsa.seasonal import STL
register_matplotlib_converters()
res = STL(emp.Employed,period=12).fit()
res.plot()
plt.show()


import numpy as np
time = np.arange(1,241)
trend = 0.2*time
seasonal = np.sin(np.pi*time/6)+2
remainder = np.random.uniform(1,2,240)


fig, (ax1,ax2,ax3) = plt.subplots(3,1,figsize=(25, 6))
ax1.plot(time, trend, color = 'black')
ax2.plot(time, seasonal, color = 'black')
ax3.plot(time, remainder, color = 'black')

[<matplotlib.lines.Line2D at 0x7f5128d84ca0>]


y_add = trend+seasonal+remainder
fig, ax = plt.subplots(figsize=(25,6))
ax.plot(time,y_add)

[<matplotlib.lines.Line2D at 0x7f5128d16fa0>]


y_mult = trend*seasonal*remainder
fig, ax = plt.subplots(figsize=(25,6))
ax.plot(time,y_mult)

[<matplotlib.lines.Line2D at 0x7f5128c85280>]


fig, ax = plt.subplots(figsize=(25,6))
ax.plot(time,y_mult)

[<matplotlib.lines.Line2D at 0x7f5104130ca0>]


fig, ax = plt.subplots(figsize=(25,6))
ax.plot(time,np.log(y_mult))

[<matplotlib.lines.Line2D at 0x7f5104090a60>]


air=pd.read_csv('data/AirPassengers.csv')
air['Month'] = pd.to_datetime(air['Month'],format = "%Y-%m")
fig, (ax1,ax2) = plt.subplots(2,figsize=(15, 5))
ax1.plot(emp.Time, emp.UnemploymentRate, color='black')
ax2.plot(air.Month, air.Passengers, color='black')

[<matplotlib.lines.Line2D at 0x7f510400ceb0>]


emp['Unemp5MA'] = emp['UnemploymentRate'].rolling(window=5, center = True).mean()
fig, (ax1,ax2) = plt.subplots(2,figsize=(25, 6))
ax1.plot(emp.Time, emp.UnemploymentRate, color='black')
ax2.plot(emp.Time, emp.Unemp5MA, color='black')

[<matplotlib.lines.Line2D at 0x7f5103f60af0>]


emp['Unemp5x5MA'] = emp['Unemp5MA'].rolling(window=5, center = True).mean()
fig, (ax1,ax2,ax3) = plt.subplots(3,figsize=(25, 5))
ax1.plot(emp.Time, emp.UnemploymentRate, color='black')
ax2.plot(emp.Time, emp.Unemp5MA, color='black')
ax3.plot(emp.Time, emp.Unemp5x5MA, color='black')

[<matplotlib.lines.Line2D at 0x7f5103e57f10>]


from pandas.api.indexers import FixedForwardWindowIndexer
indexer = FixedForwardWindowIndexer(window_size=2)
emp['Unemp12MA'] = emp['UnemploymentRate'].rolling(window=12, center = True).mean()
emp['Unemp2x12MA'] = emp['Unemp12MA'].rolling(indexer).mean()
fig, ax = plt.subplots(figsize=(25, 6))
ax.plot(emp.Time, emp.Unemp2x12MA, color='black')

[<matplotlib.lines.Line2D at 0x7f5103d85cd0>]


emp['Detrended']=emp['UnemploymentRate']-emp['Unemp2x12MA'] 
fig, ax = plt.subplots(figsize=(25, 6))
ax.plot(emp.Time, emp.Detrended, color='black')

[<matplotlib.lines.Line2D at 0x7f5103d76ca0>]


emp['Month'] = pd.DatetimeIndex(emp['Time']).month
seas = emp.groupby(by = [emp.Month]).mean()
emp = emp.merge(seas['Detrended'], how = 'left',left_on='Month', right_on='Month')
emp = emp.rename(columns = {"Detrended_x":"Detrended", "Detrended_y":"Seasonal"})
fig, ax = plt.subplots(figsize=(25, 6))
ax.plot(emp.Time, emp.Seasonal, color='black')

[<matplotlib.lines.Line2D at 0x7f5103e43550>]


emp['Remainder'] = emp['UnemploymentRate'] - emp['Unemp2x12MA'] - emp['Seasonal'] 
fig, (ax1,ax2,ax3,ax4) = plt.subplots(4,figsize=(25, 6))
ax1.plot(emp.Time, emp.UnemploymentRate, color='black')
ax2.plot(emp.Time, emp.Unemp2x12MA, color='black')
ax3.plot(emp.Time, emp.Seasonal, color='black')
ax4.plot(emp.Time, emp.Remainder, color='black')

[<matplotlib.lines.Line2D at 0x7f5103c37c40>]


emp_md = emp[['Time','Employed']]
emp_md['Emp12MA'] = emp_md['Employed'].rolling(window=12, center = True).mean()
emp_md['Trend'] = emp_md['Emp12MA'].rolling(indexer).mean()
emp_md['Detrended'] = emp_md['Employed']/emp_md['Trend']
emp_md['Month'] = pd.DatetimeIndex(emp_md['Time']).month
seas_md = emp_md.groupby(by = [emp_md.Month]).mean()
emp_md = emp_md.merge(seas_md['Detrended'], how = 'left',left_on='Month', right_on='Month')
emp_md = emp_md.rename(columns = {"Detrended_x":"Detrended", "Detrended_y":"Seasonal"})
emp_md['Remainder'] = emp_md['Employed'] / (emp_md['Trend'] - emp_md['Seasonal'])


fig, (ax1,ax2,ax3,ax4) = plt.subplots(4,figsize=(25, 6))
ax1.plot(emp_md.Time, emp_md.Employed, color='black')
ax2.plot(emp_md.Time, emp_md.Trend, color='black')
ax3.plot(emp_md.Time, emp_md.Seasonal, color='black')
ax4.plot(emp_md.Time, emp_md.Remainder, color='black')

[<matplotlib.lines.Line2D at 0x7f5103aad3a0>]

Time Series Decomposition Methods¶

Why Decompositions?¶

Data¶

Employment data¶

Employment data (last 6 years)¶

Decomposition¶

Additive and Multiplicative Decompositions¶

Simulation¶

Components¶

Additive¶

Multiplicative¶

Comments¶

Log Transformation¶

Multiplicative data¶

Multiplicative data (log scale)¶

Log transformation¶

Your Turn¶

Classical Decomposition¶

Moving average¶

Moving average in Python¶

Double moving average¶

Backshift notation¶

An order 3 MA¶

An 3x3 MA¶

Even MA order¶

A 2x12 MA in Python¶

Classical decomposition¶

De-trended Series¶

Deseasonalise data¶

In Python¶

Remainder¶

Additive Decomposition¶

Your turn¶

Shortcomings of Classical Decomposition¶

The X-11 Decomposition¶

The X-13ARIMA-SEATS Decomposition¶

The STL Decomposition¶

Wrap-up¶