1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
| import warnings
from statsmodels.tsa.holtwinters import ExponentialSmoothing, SimpleExpSmoothing
from statsmodels.tsa.statespace.sarimax import SARIMAX
from statsmodels.tsa.arima.model import ARIMA
from scipy.stats import boxcox
from scipy.special import inv_boxcox
warnings.filterwarnings("ignore")
def compute_mase(actual, predicted, train, m=7):
e = np.abs(actual - predicted)
naive_errors = np.abs(train.values[m:] - train.values[:-m])
scale = naive_errors.mean()
return e.mean() / scale if scale > 0 else np.nan
def preprocess_ts(series, method):
if method == "なし":
return series.copy(), lambda x: x
if method == "対数":
return np.log1p(series), lambda x: np.expm1(x)
if method == "差分":
first = series.iloc[0]
return series.diff().dropna(), lambda x: np.cumsum(np.concatenate([[first], x]))[1:]
if method == "週次":
weekly = series.resample("W").mean()
return weekly, lambda x: x # スケール変換なし
raise ValueError(method)
def forecast_ts(train, h, name, sp=7):
if name == "季節ナイーブ":
last = train.values[-sp:]
return np.tile(last, int(np.ceil(h / sp)))[:h]
if name == "SES":
return SimpleExpSmoothing(train, initialization_method="estimated").fit().forecast(h).values
if name == "HW加法":
return ExponentialSmoothing(
train, trend="add", seasonal="add", seasonal_periods=sp,
initialization_method="estimated",
).fit().forecast(h).values
if name == "ARIMA":
return ARIMA(train, order=(1, 1, 1)).fit().forecast(steps=h).values
if name == "SARIMAX":
return SARIMAX(
train, order=(1, 1, 1), seasonal_order=(1, 1, 1, sp),
enforce_stationarity=False, enforce_invertibility=False,
).fit(disp=False).forecast(steps=h).values
raise ValueError(name)
preps = ["なし", "対数", "差分"]
models = ["季節ナイーブ", "SES", "HW加法", "ARIMA", "SARIMAX"]
results = []
for prep in preps:
try:
tr_s, inv_fn = preprocess_ts(series, prep)
except Exception:
for m in models:
results.append((prep, m, np.nan))
continue
if prep == "差分":
train_tr = tr_s.iloc[:len(train_raw) - 1]
else:
train_tr = tr_s.iloc[:len(train_raw)]
for model_name in models:
try:
pred = forecast_ts(train_tr, h, model_name, sp=7)
pred_raw = np.asarray(inv_fn(pred), dtype=float)
mase = compute_mase(test_raw.values, pred_raw, train_raw, m=7)
except Exception:
mase = np.nan
results.append((prep, model_name, mase))
# 週次リサンプリング(SARIMAXは除く: 週次にするとsp不要)
prep = "週次"
weekly_series = series.resample("W").mean()
weekly_train = weekly_series.iloc[:-4]
weekly_test = weekly_series.iloc[-4:]
for model_name in ["季節ナイーブ", "SES", "HW加法", "ARIMA"]:
try:
pred = forecast_ts(weekly_train, 4, model_name, sp=4)
mase_w = compute_mase(weekly_test.values, pred, weekly_train, m=4)
except Exception:
mase_w = np.nan
results.append((prep, model_name, mase_w))
df = pd.DataFrame(results, columns=["前処理", "モデル", "MASE"])
df["パイプライン"] = df["前処理"] + " + " + df["モデル"]
import seaborn as sns
pivot = df.pivot_table(index="前処理", columns="モデル", values="MASE")
prep_order = ["なし", "対数", "差分", "週次"]
model_order = ["季節ナイーブ", "SES", "HW加法", "ARIMA", "SARIMAX"]
pivot = pivot.reindex(index=[p for p in prep_order if p in pivot.index],
columns=[m for m in model_order if m in pivot.columns])
fig, ax = plt.subplots(figsize=(10, 4))
sns.heatmap(pivot, annot=True, fmt=".2f", cmap="RdYlGn_r",
linewidths=0.5, ax=ax, vmin=0, vmax=3,
cbar_kws={"label": "MASE(低いほど良い)"})
ax.set_title("曜日パターン: 前処理 × モデル MASE ヒートマップ")
ax.set_xlabel("")
ax.set_ylabel("")
fig.tight_layout()
plt.show()
|