Import packages

In [1]:
import pandas as pd
import numpy as np

import plotly.plotly as ply
import cufflinks as cf

US Energy Production Data

In [2]:
ts = pd.read_csv('../../input/energy_production/IPG2211A2N.csv', index_col='DATE',
                 header=0,
                 names = ['DATE', 'value'],
                 parse_dates=['DATE'], )
In [3]:
start = pd.Timestamp('1985-01-01')
end = pd.Timestamp('2005-12-01')
ts = ts.loc[start:end, :]
In [4]:
ts.head()
Out[4]:
value
DATE
1985-01-01 72.6803
1985-02-01 70.8479
1985-03-01 62.6166
1985-04-01 57.6106
1985-05-01 55.4467

Plotting the raw time series

In [5]:
ts.iplot(title="Industrial Production 1985-2015", 
         xTitle='Year', yTitle='Production')
Out[5]:

Log Transformation of Time Series

In [6]:
np.log(ts).iplot(title="Log Transformation of Time Series", 
                 xTitle='Year', yTitle='Production')
Out[6]:

Log Transformation + Detrend of Time Series

In [7]:
from scipy.signal import detrend
pd.Series(detrend(np.log(ts.value)), index=ts.index).iplot(title="Log Transformation + Detrend", 
                                                           xTitle='Year', yTitle='Production', )
Out[7]:

Box-Cox Transformation

In [8]:
# from scipy.stats import boxcox
# pd.Series(detrend(boxcox((ts.value), lmbda=0.5, alpha=None)), index=ts.index).iplot(title="Industrial Production 1985-2018", 
#                                                                     xTitle='Year', yTitle='Production')

ACF Plot

In [9]:
from statsmodels.graphics.tsaplots import plot_acf
stat_ts = pd.Series(detrend(np.log(ts.value)), index=ts.index)
plot_acf(stat_ts)
Out[9]:

PACF Plot

In [10]:
from statsmodels.graphics.tsaplots import plot_pacf
plot_pacf(stat_ts)
c:\python36\lib\site-packages\statsmodels\regression\linear_model.py:1283: RuntimeWarning:

invalid value encountered in sqrt

Out[10]: