Softmax Regression

อัปเดต 2020-02-12 อ่าน 2 นาที

สรุป

Softmax regression ขยายลอจิสติกรีเกรสชันไปยังหลายคลาสและให้ความน่าจะเป็นของทุกคลาสพร้อมกัน
เอาต์พุตอยู่ในช่วง $[0,1]$ และรวมกันเป็น 1 จึงนำไปตั้ง threshold หรือคำนวณต้นทุนได้โดยตรง
ฝึกโดยลดทอน cross-entropy ซึ่งปรับความน่าจะเป็นที่พยากรณ์ให้ตรงกับความจริง
ใน scikit-learn ระบุ LogisticRegression(multi_class="multinomial") ก็ได้ Softmax regression และยังใช้ L1/L2 ได้

สัญชาตญาณ #

การเข้าใจวิธีนี้ควรดูสมมติฐานของโมเดล ลักษณะข้อมูล และผลของการตั้งค่าพารามิเตอร์ต่อการทั่วไปของโมเดล

คำอธิบายโดยละเอียด #

สูตรสำคัญ #

สำหรับคลาส $K$ คลาส ความน่าจะเป็นของคลาส $k$ คือ

$$ P(y = k \mid \mathbf{x}) = \frac{\exp\left(\mathbf{w}_k^\top \mathbf{x} + b_k\right)} {\sum_{j=1}^{K} \exp\left(\mathbf{w}_j^\top \mathbf{x} + b_j\right)}. $$

ลดทอน cross-entropy

$$ L = - \sum_{i=1}^{n} \sum_{k=1}^{K} \mathbb{1}(y_i = k) \log P(y = k \mid \mathbf{x}_i). $$

การเพิ่มโทษ L1/L2 ช่วยควบคุมการเรียนรู้เกินได้เหมือนลอจิสติกรีเกรสชันแบบสองคลาส

ทดลองด้วย Python #

ตัวอย่างต่อไปนี้ฝึก softmax regression กับข้อมูล 3 คลาสและวาดเส้นแบ่ง

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
from __future__ import annotations

import japanize_matplotlib
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.colors import ListedColormap
from sklearn.datasets import make_classification
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

def run_softmax_regression_demo(
    n_samples: int = 300,
    n_classes: int = 3,
    random_state: int = 42,
    label_title: str = "บริเวณการจำแนกของ Softmax Regression",
    xlabel: str = "คุณลักษณะที่ 1",
    ylabel: str = "คุณลักษณะที่ 2",
) -> dict[str, float]:
    """Train a softmax regression model and visualise decision regions."""
    japanize_matplotlib.japanize()
    X, y = make_classification(
        n_samples=n_samples,
        n_features=2,
        n_informative=2,
        n_redundant=0,
        n_clusters_per_class=1,
        n_classes=n_classes,
        random_state=random_state,
    )

    clf = LogisticRegression(multi_class="multinomial", solver="lbfgs")
    clf.fit(X, y)

    accuracy = float(accuracy_score(y, clf.predict(X)))

    x1_min, x1_max = X[:, 0].min() - 1.0, X[:, 0].max() + 1.0
    x2_min, x2_max = X[:, 1].min() - 1.0, X[:, 1].max() + 1.0
    grid_x1, grid_x2 = np.meshgrid(
        np.linspace(x1_min, x1_max, 400),
        np.linspace(x2_min, x2_max, 400),
    )
    grid_points = np.c_[grid_x1.ravel(), grid_x2.ravel()]
    preds = clf.predict(grid_points).reshape(grid_x1.shape)

    cmap = ListedColormap(["#ff9896", "#98df8a", "#aec7e8", "#f7b6d2", "#c5b0d5"])
    fig, ax = plt.subplots(figsize=(7, 6))
    ax.contourf(
        grid_x1,
        grid_x2,
        preds,
        alpha=0.3,
        cmap=cmap,
        levels=np.arange(-0.5, n_classes + 0.5, 1),
    )
    scatter = ax.scatter(X[:, 0], X[:, 1], c=y, edgecolor="k", cmap=cmap)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    ax.set_title(label_title)
    legend = ax.legend(*scatter.legend_elements(), title="class", loc="best")
    ax.add_artist(legend)
    fig.tight_layout()
    plt.show()

    return {"accuracy": accuracy}

metrics = run_softmax_regression_demo(
    label_title="บริเวณการจำแนกของ Softmax Regression",
    xlabel="คุณลักษณะที่ 1",
    ylabel="คุณลักษณะที่ 2",
)
print(f"ความแม่นยำขณะฝึก: {metrics['accuracy']:.3f}")

บริเวณการจำแนกของ Softmax Regression สำหรับ 3 คลาส

เอกสารอ้างอิง #

Bishop, C. M. (2006). Pattern Recognition and Machine Learning. Springer.
Murphy, K. P. (2012). Machine Learning: A Probabilistic Perspective. MIT Press.