Feng Li
School of Statistics and Mathematics
Central University of Finance and Economics
import pandas as pd
pd.options.mode.chained_assignment = None # disable warnings
emp=pd.read_csv('data/employment.csv')
print(emp.head())
emp['Time'] = pd.to_datetime(emp['Time'],format = "%b-%Y")
# See https://docs.python.org/3/library/datetime.html#datetime.date for time format.
Time Employed UnemploymentRate 0 Feb-1978 5892.7 7.5 1 Mar-1978 5945.8 6.6 2 Apr-1978 5958.8 6.3 3 May-1978 5949.3 6.3 4 Jun-1978 5941.3 6.2
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(25, 6))
ax.plot(emp.Time, emp.Employed, color='blue')
[<matplotlib.lines.Line2D at 0x7f5129500820>]
fig, ax = plt.subplots(figsize=(25, 6))
ax.plot(emp.Time[-72:], emp.Employed[-72:], color='black')
[<matplotlib.lines.Line2D at 0x7f51291f3be0>]
from pandas.plotting import register_matplotlib_converters
from statsmodels.tsa.seasonal import STL
register_matplotlib_converters()
res = STL(emp.Employed,period=12).fit()
res.plot()
plt.show()
Additive decomposition:
$$Y_t=T_t+S_t+R_t$$Multiplicative decomposition:
$$Y_t=T_t\times S_t\times R_t$$import numpy as np
time = np.arange(1,241)
trend = 0.2*time
seasonal = np.sin(np.pi*time/6)+2
remainder = np.random.uniform(1,2,240)
fig, (ax1,ax2,ax3) = plt.subplots(3,1,figsize=(25, 6))
ax1.plot(time, trend, color = 'black')
ax2.plot(time, seasonal, color = 'black')
ax3.plot(time, remainder, color = 'black')
[<matplotlib.lines.Line2D at 0x7f5128d84ca0>]
y_add = trend+seasonal+remainder
fig, ax = plt.subplots(figsize=(25,6))
ax.plot(time,y_add)
[<matplotlib.lines.Line2D at 0x7f5128d16fa0>]
y_mult = trend*seasonal*remainder
fig, ax = plt.subplots(figsize=(25,6))
ax.plot(time,y_mult)
[<matplotlib.lines.Line2D at 0x7f5128c85280>]
The additive and multplicative decompositions are linked by taking a log transformation. Consider:
$$Y_t=T_t\times S_t\times R_t$$Taking the logarithm of both sides
$$log(Y_t)=log(T_t)+ log(S_t)+ log(R_t)$$For this to work, all components must be non-negative
fig, ax = plt.subplots(figsize=(25,6))
ax.plot(time,y_mult)
[<matplotlib.lines.Line2D at 0x7f5104130ca0>]
fig, ax = plt.subplots(figsize=(25,6))
ax.plot(time,np.log(y_mult))
[<matplotlib.lines.Line2D at 0x7f5104090a60>]
air=pd.read_csv('data/AirPassengers.csv')
air['Month'] = pd.to_datetime(air['Month'],format = "%Y-%m")
fig, (ax1,ax2) = plt.subplots(2,figsize=(15, 5))
ax1.plot(emp.Time, emp.UnemploymentRate, color='black')
ax2.plot(air.Month, air.Passengers, color='black')
[<matplotlib.lines.Line2D at 0x7f510400ceb0>]
emp['Unemp5MA'] = emp['UnemploymentRate'].rolling(window=5, center = True).mean()
fig, (ax1,ax2) = plt.subplots(2,figsize=(25, 6))
ax1.plot(emp.Time, emp.UnemploymentRate, color='black')
ax2.plot(emp.Time, emp.Unemp5MA, color='black')
[<matplotlib.lines.Line2D at 0x7f5103f60af0>]
We can smooth more by taking a moving average again
emp['Unemp5x5MA'] = emp['Unemp5MA'].rolling(window=5, center = True).mean()
fig, (ax1,ax2,ax3) = plt.subplots(3,figsize=(25, 5))
ax1.plot(emp.Time, emp.UnemploymentRate, color='black')
ax2.plot(emp.Time, emp.Unemp5MA, color='black')
ax3.plot(emp.Time, emp.Unemp5x5MA, color='black')
[<matplotlib.lines.Line2D at 0x7f5103e57f10>]
An order 3 MA is given by
$$\begin{aligned}Z_t = &\frac{1}{3}\left(Y_{t-1}+Y_t+Y_{t+1}\right)\\=&\frac{1}{3}\left(LY_t+Y_t+L^{-1}Y_t\right)\\=&\frac{1}{3}(L+1+L^{-1})Y_t\end{aligned}$$Applying an order 3 MA twice gives
$$\begin{aligned}W_t = &\left(\frac{1}{3}\right)^2(L+1+L^{-1})(L+1+L^{-1})Y_t\\=&\frac{1}{9}(L^2+L+1+L+1+L^{-1}+1+L^{-1}+L^{-2})Y_t\\=&\frac{1}{9}(L^2+2L+3+2L^{-1}+L^{-2})Y_t\\=&\frac{1}{9}Y_{t-2}+\frac{2}{9}Y_{t-1}+\frac{3}{9}Y_{t}+\frac{2}{9}Y_{t+1}+\frac{1}{9}Y_{t+2}&\end{aligned}$$The terms towards the edges have a slightly lower weight.
from pandas.api.indexers import FixedForwardWindowIndexer
indexer = FixedForwardWindowIndexer(window_size=2)
emp['Unemp12MA'] = emp['UnemploymentRate'].rolling(window=12, center = True).mean()
emp['Unemp2x12MA'] = emp['Unemp12MA'].rolling(indexer).mean()
fig, ax = plt.subplots(figsize=(25, 6))
ax.plot(emp.Time, emp.Unemp2x12MA, color='black')
[<matplotlib.lines.Line2D at 0x7f5103d85cd0>]
emp['Detrended']=emp['UnemploymentRate']-emp['Unemp2x12MA']
fig, ax = plt.subplots(figsize=(25, 6))
ax.plot(emp.Time, emp.Detrended, color='black')
[<matplotlib.lines.Line2D at 0x7f5103d76ca0>]
emp['Month'] = pd.DatetimeIndex(emp['Time']).month
seas = emp.groupby(by = [emp.Month]).mean()
emp = emp.merge(seas['Detrended'], how = 'left',left_on='Month', right_on='Month')
emp = emp.rename(columns = {"Detrended_x":"Detrended", "Detrended_y":"Seasonal"})
fig, ax = plt.subplots(figsize=(25, 6))
ax.plot(emp.Time, emp.Seasonal, color='black')
[<matplotlib.lines.Line2D at 0x7f5103e43550>]
emp['Remainder'] = emp['UnemploymentRate'] - emp['Unemp2x12MA'] - emp['Seasonal']
fig, (ax1,ax2,ax3,ax4) = plt.subplots(4,figsize=(25, 6))
ax1.plot(emp.Time, emp.UnemploymentRate, color='black')
ax2.plot(emp.Time, emp.Unemp2x12MA, color='black')
ax3.plot(emp.Time, emp.Seasonal, color='black')
ax4.plot(emp.Time, emp.Remainder, color='black')
[<matplotlib.lines.Line2D at 0x7f5103c37c40>]
Carry out a multiplicative decomposition on the number of employed people data
emp_md = emp[['Time','Employed']]
emp_md['Emp12MA'] = emp_md['Employed'].rolling(window=12, center = True).mean()
emp_md['Trend'] = emp_md['Emp12MA'].rolling(indexer).mean()
emp_md['Detrended'] = emp_md['Employed']/emp_md['Trend']
emp_md['Month'] = pd.DatetimeIndex(emp_md['Time']).month
seas_md = emp_md.groupby(by = [emp_md.Month]).mean()
emp_md = emp_md.merge(seas_md['Detrended'], how = 'left',left_on='Month', right_on='Month')
emp_md = emp_md.rename(columns = {"Detrended_x":"Detrended", "Detrended_y":"Seasonal"})
emp_md['Remainder'] = emp_md['Employed'] / (emp_md['Trend'] - emp_md['Seasonal'])
fig, (ax1,ax2,ax3,ax4) = plt.subplots(4,figsize=(25, 6))
ax1.plot(emp_md.Time, emp_md.Employed, color='black')
ax2.plot(emp_md.Time, emp_md.Trend, color='black')
ax3.plot(emp_md.Time, emp_md.Seasonal, color='black')
ax4.plot(emp_md.Time, emp_md.Remainder, color='black')
[<matplotlib.lines.Line2D at 0x7f5103aad3a0>]
Originally developed by the Bank of Spain.
Further refined by the U.S. Census Bureau.
Full details can be found here (note this is a 300-page document).
This algorithm uses seasonal ARIMA modelling which we will cover in a few weeks.
Both X11 and X-13ARIMA-SEATS are implemented in Python statsmodels
.
from statsmodels.tsa import x13