This repository has been archived by the owner on Apr 28, 2020. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 0
/
time_series_cons_daily_SARIMA_dataprep+modelling.py
158 lines (117 loc) · 4.85 KB
/
time_series_cons_daily_SARIMA_dataprep+modelling.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
import pandas as pd
opsd = pd.read_csv('time_series_DE_daily.csv', index_col='date', parse_dates=True)
opsd.columns = opsd.columns.str.lower()
opsd.head(3)
# Subset electric consumption time series and resample by month
cons = opsd.loc[:, 'consumption'].dropna().resample('M').sum()
cons.describe()
cons.shape
cons.index[0]
cons.index[-1]
# Decompose time series
from statsmodels.tsa.seasonal import seasonal_decompose
cons_multi_dec = seasonal_decompose(cons, model='multiplicative', freq=12)
cons_addit_dec = seasonal_decompose(cons, model='additive', freq=12)
import matplotlib.pyplot as plt
# plt.rcParams.keys()
plt.rcParams.update({'figure.figsize':(15, 4)})
cons_multi_plot = cons_multi_dec.plot()
cons_addit_plot = cons_addit_dec.plot()
# residuals have same shape but higher range than in multiplicative
# Rolling windows - statistical stationarity
# cons.rolling(window=3, center=True).mean().head()
ws = 3
lw_th = 2
a = .3
lw = 2.2
ax = plt.plot(cons.index, cons, label='Raw data', color='r', linewidth=lw_th, alpha=a)
plt.plot(cons.rolling(window=ws, center=True).mean(), label='Rolling mean', linewidth=lw)
plt.plot(cons.rolling(window=ws, center=True).std(), label='Rolling std', linewidth=lw)
plt.title(f'Electricity consumption on a {ws} months rolling window', fontsize=16)
plt.xlabel('Year')
plt.ylabel('Electric consumption in Germany (GWh)')
plt.legend(fontsize=16, loc='best')
# Augmented Dickey-Fuller Test for stationarity
from statsmodels.tsa.stattools import adfuller
def ADF_Stationarity_Test(timeseries):
ADF_test = adfuller(timeseries, autolag='AIC')
pValue = ADF_test[1]
if (pValue < .05):
print('The time-series is stationary!\n')
else:
print('The time-series is NOT stationary!\n')
# Coefficients
dfResults = pd.Series(ADF_test[0:4], index=['ADF Test Statistic', 'P-Value', 'N-Lags Used', 'N-Observations Used'])
#Critical values
for key, value in ADF_test[4].items():
dfResults['Critical value (%s)'%key] = value
print('Augmented Dickey-Fuller Test results:')
print(dfResults)
ADF_Stationarity_Test(timeseries=cons) # d=0
# Transform time series to achieve stationarity
# cons_diff = cons.diff().dropna() # d = 1
# ADF_Stationarity_Test(timeseries=cons_diff)
# Autocorrelation
pd.plotting.lag_plot(cons)
### Seasonal ARIMA # SARIMA(p, d, q)(P, D, Q)s
# Autocorrelation and partial autocorrelation function
k_cons = int(len(cons)/13)
k_cons < len(cons)/k_cons
# Correlograms
from statsmodels.graphics.tsaplots import plot_acf, plot_pacf
cons_acf_plot = plot_acf(cons, lags=k_cons) # MA[q] = 1
cons_pacf_plot = plot_pacf(cons, lags=k_cons) # AR[p] = 1
# Seasonal differencing
ax = plt.plot(cons, label='Original series', color='r')
plt.plot(cons.diff(12), label='Seasonal differencing', color='g')
plt.xlabel('Year')
plt.ylabel('Electric consumption Germany(GWh)')
plt.legend(loc='center left')
plt.tick_params(labelrotation=90)
k = 12 # our S
k_acf_plot = plot_acf(cons, lags=k) # AR[P] = 1; MA[Q] = 1
# Define training and test datasets
training_perc = .8
cons_split = int(len(cons) * training_perc)
cons_train, cons_test = cons[:cons_split], cons[cons_split:]
from statsmodels.tsa.statespace.sarimax import SARIMAX
# Build and fit the model
p = 1
d = 0
q = 1
P = 1
D = 1
Q = 1
S = 12
#help(SARIMAX)
cons_model = SARIMAX(cons_train, order=(p, d, q),
seasonal_order=(P, D, Q, S),
enforce_stationarity=False,
enforce_inveritbility=False)
cons_fit = cons_model.fit(disp=False)
from pandas.plotting import register_matplotlib_converters
register_matplotlib_converters()
cons_diagnostic = cons_fit.plot_diagnostics(figsize=(14, 9))
# Validation with static method
cons_predict = cons_fit.get_prediction(start=cons.index[cons_split], end=cons.index[-1], dynamic=False)
cons_pred_confint = cons_predict.conf_int()
y_predict = cons_predict.predicted_mean
y_observed = cons[cons_split:]
import numpy as np
rmse = np.sqrt(((y_predict - y_observed)**2).mean()); print('The Mean Squared Error is {}'.format(round(rmse, 2)))
ax = cons.plot(color='b', label='Original series', figsize=(20, 6))
cons_predict.predicted_mean.plot(ax=ax, color = 'r', label='Prediction with static method', alpha=.5)
ax.set_ylim(30000, 60000)
ax.fill_between(cons_pred_confint.index,
cons_pred_confint.iloc[:, 0],
cons_pred_confint.iloc[:, 1], color = 'k', alpha=.2)
ax.legend(loc='upper left', fontsize=16)
# Forecast
cons_forecast = cons_fit.get_forecast(steps='2021-12-31')
cons_fore_confint = cons_forecast.conf_int()
ax = cons.plot(color='b', label='Original series', figsize=(20, 6))
cons_forecast.predicted_mean.plot(ax=ax, color = 'r', label='Forecasted series')
ax.fill_between(cons_fore_confint.index,
cons_fore_confint.iloc[:, 0],
cons_fore_confint.iloc[:, 1], color = 'k', alpha=.2)
ax.legend(loc='upper left', fontsize=16)