Motivation

线性回归用于分类问题存在两个问题

  1. 预测结果 没有限制,可能小于0或大于1,无法解释为概率
  2. 平方损失函数不适合分类问题(非凸)

Model

符号含义
样本i的特征向量(维度为d,特征均为连续数值变量
样本i的真实标签(0/1/2/…)
某样本,经model计算为j类的概率
样本i的交叉熵损失

注意区分i,j

梯度下降求解

实现

import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
 
from sklearn.base import BaseEstimator, ClassifierMixin
 
## 生成多分类数据
X, y = make_classification(
    n_samples=200, n_features=20, n_redundant=0, n_classes=3,
    n_informative=4, n_clusters_per_class=1, random_state=42
)
 
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
 
## 实现多分类逻辑回归
class SoftmaxRegression(BaseEstimator, ClassifierMixin):
    def __init__(self, lr=0.01, n_iter=1000):
        self.lr = lr
        self.n_iter = n_iter
 
    def _softmax(self, Z):
        exp_Z = np.exp(Z)
        return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)
 
    def fit(self, X, y):
        n, d = X.shape
        self.classes_ = np.unique(y)
        K = len(self.classes_)
 
        # 初始化参数
        self.W = np.zeros((d, K))
        self.b = np.zeros(K)
 
        # one-hot 编码标签
        Y = np.eye(K)[y]
 
        for _ in range(self.n_iter):
            Z = X @ self.W + self.b
            Y_hat = self._softmax(Z)
 
            grad_W = (X.T @ (Y_hat - Y)) / n
            grad_b = np.mean(Y_hat - Y, axis=0)
 
            self.W -= self.lr * grad_W
            self.b -= self.lr * grad_b
 
        return self
 
    def predict_proba(self, X):
        Z = X @ self.W + self.b
        return self._softmax(Z)
 
    def predict(self, X):
        return np.argmax(self.predict_proba(X), axis=1)
 
model = SoftmaxRegression(lr=0.001, n_iter=10) # SoftmaxRegression 
model.fit(X_train, y_train)
y_pred = model.predict(X_train)
 
print("Accuracy:", accuracy_score(y_train, y_pred))