import japanize_matplotlib as jm
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from statsmodels.tsa.holtwinters import Holt
from statsmodels.tsa.holtwinters import SimpleExpSmoothing
jm.japanize()
Datos de Muestra #
Se utiliza el conjunto de datos de Consumo de Gas Natural (NATURALGAS). La unidad de medida de los valores es BCF (mil millones de pies cúbicos).
data = {
"value": {
"2013-01-01": 2878.8,
"2013-02-01": 2567.2,
"2013-03-01": 2521.1,
"2013-04-01": 1967.5,
"2013-05-01": 1752.5,
"2013-06-01": 1742.9,
"2013-07-01": 1926.3,
"2013-08-01": 1927.4,
"2013-09-01": 1767.0,
"2013-10-01": 1866.8,
"2013-11-01": 2316.9,
"2013-12-01": 2920.8,
"2014-01-01": 3204.1,
"2014-02-01": 2741.2,
"2014-03-01": 2557.9,
"2014-04-01": 1961.7,
"2014-05-01": 1810.2,
"2014-06-01": 1745.4,
"2014-07-01": 1881.0,
"2014-08-01": 1933.1,
"2014-09-01": 1809.3,
"2014-10-01": 1912.8,
"2014-11-01": 2357.5,
"2014-12-01": 2679.2,
"2015-01-01": 3115.0,
"2015-02-01": 2925.2,
"2015-03-01": 2591.3,
"2015-04-01": 2007.9,
"2015-05-01": 1858.2,
"2015-06-01": 1899.9,
"2015-07-01": 2067.7,
"2015-08-01": 2052.7,
"2015-09-01": 1901.3,
"2015-10-01": 1987.3,
"2015-11-01": 2249.1,
"2015-12-01": 2588.2,
"2016-01-01": 3091.7,
"2016-02-01": 2652.3,
"2016-03-01": 2356.3,
"2016-04-01": 2083.9,
"2016-05-01": 1965.8,
"2016-06-01": 2000.7,
"2016-07-01": 2186.6,
"2016-08-01": 2208.4,
"2016-09-01": 1947.8,
"2016-10-01": 1925.2,
"2016-11-01": 2159.5,
"2016-12-01": 2866.3,
"2017-01-01": 2913.8,
"2017-02-01": 2340.2,
"2017-03-01": 2523.3,
"2017-04-01": 1932.0,
"2017-05-01": 1892.0,
"2017-06-01": 1910.4,
"2017-07-01": 2141.6,
"2017-08-01": 2093.8,
"2017-09-01": 1920.5,
"2017-10-01": 2031.5,
"2017-11-01": 2357.3,
"2017-12-01": 3086.0,
"2018-01-01": 3340.9,
"2018-02-01": 2710.7,
"2018-03-01": 2796.7,
"2018-04-01": 2350.5,
"2018-05-01": 2055.0,
"2018-06-01": 2063.1,
"2018-07-01": 2350.7,
"2018-08-01": 2313.8,
"2018-09-01": 2156.1,
"2018-10-01": 2285.9,
"2018-11-01": 2715.9,
"2018-12-01": 2999.5,
"2019-01-01": 3424.3,
"2019-02-01": 3019.1,
"2019-03-01": 2927.8,
"2019-04-01": 2212.4,
"2019-05-01": 2134.0,
"2019-06-01": 2119.3,
"2019-07-01": 2393.9,
"2019-08-01": 2433.9,
"2019-09-01": 2206.3,
"2019-10-01": 2306.5,
"2019-11-01": 2783.8,
"2019-12-01": 3170.7,
"2020-01-01": 3320.6,
"2020-02-01": 3058.5,
"2020-03-01": 2722.0,
"2020-04-01": 2256.9,
"2020-05-01": 2072.2,
"2020-06-01": 2127.9,
"2020-07-01": 2464.1,
"2020-08-01": 2399.5,
"2020-09-01": 2151.2,
"2020-10-01": 2315.9,
"2020-11-01": 2442.0,
"2020-12-01": 3182.8,
"2021-01-01": 3343.9,
"2021-02-01": 3099.2,
"2021-03-01": 2649.4,
"2021-04-01": 2265.1,
"2021-05-01": 2117.4,
"2021-06-01": 2238.4,
"2021-07-01": 2412.2,
"2021-08-01": 2433.8,
"2021-09-01": 2142.3,
"2021-10-01": 2262.6,
"2021-11-01": 2693.3,
"2021-12-01": 3007.3,
"2022-01-01": 3612.1,
"2022-02-01": 3064.2,
"2022-03-01": 2785.4,
"2022-04-01": 2379.3,
"2022-05-01": 2247.8,
"2022-06-01": 2326.9,
"2022-07-01": 2597.9,
"2022-08-01": 2566.1,
"2022-09-01": 2263.3,
}
}
data = pd.DataFrame(data)
data.rename(columns={"value": "Natural Gas Consumption(BCF)"}, inplace=True)
data.index = pd.to_datetime(data.index)
data = data.asfreq("MS")
data.head()
Natural Gas Consumption(BCF) | |
---|---|
2013-01-01 | 2878.8 |
2013-02-01 | 2567.2 |
2013-03-01 | 2521.1 |
2013-04-01 | 1967.5 |
2013-05-01 | 1752.5 |
plt.figure(figsize=(12, 6))
sns.lineplot(data=data)
plt.grid()
plt.show()
División de Datos #
Se dividirán los datos para utilizar los valores más recientes (a partir de 2020) en la validación del rendimiento predictivo.
data_train = data[data.index < "2020-1-1"]
data_test = data[data.index >= "2020-1-1"]
plt.figure(figsize=(12, 6))
plt.plot(data_train.index, data_train.values, label="train", linewidth=2)
plt.plot(data_test.index, data_test.values, "-.", label="test")
plt.legend()
plt.grid()
Entrenamiento y Predicción del Modelo #
ses = SimpleExpSmoothing(data_train)
ses = ses.fit(smoothing_level=0.1)
ses_pred = ses.forecast(33)
holt = Holt(data_train)
holt = holt.fit(smoothing_level=0.1, smoothing_trend=0.2)
holt_pred = holt.forecast(33)
plt.figure(figsize=(12, 6))
plt.plot(data_train.index, data_train.values, label="train", linewidth=2)
plt.plot(data_test.index, data_test.values, "-.", label="test")
plt.plot(ses_pred.index, ses_pred.values, "-x", label="prediction(ses)")
plt.plot(holt_pred.index, holt_pred.values, "-x", label="prediction(Holt)")
plt.legend()
plt.grid()