Motivation
线性回归用于分类问题存在两个问题
- 预测结果 没有限制,可能小于0或大于1,无法解释为概率
- 平方损失函数不适合分类问题(非凸)
Model
| 符号 | 含义 |
|---|---|
| 样本i的特征向量(维度为d,特征均为连续数值变量 | |
| 样本i的真实标签(0/1/2/…) | |
| 某样本,经model计算为j类的概率 | |
| 样本i的交叉熵损失 |
注意区分i,j
梯度下降求解
实现
import numpy as np
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.base import BaseEstimator, ClassifierMixin
## 生成多分类数据
X, y = make_classification(
n_samples=200, n_features=20, n_redundant=0, n_classes=3,
n_informative=4, n_clusters_per_class=1, random_state=42
)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
## 实现多分类逻辑回归
class SoftmaxRegression(BaseEstimator, ClassifierMixin):
def __init__(self, lr=0.01, n_iter=1000):
self.lr = lr
self.n_iter = n_iter
def _softmax(self, Z):
exp_Z = np.exp(Z)
return exp_Z / np.sum(exp_Z, axis=1, keepdims=True)
def fit(self, X, y):
n, d = X.shape
self.classes_ = np.unique(y)
K = len(self.classes_)
# 初始化参数
self.W = np.zeros((d, K))
self.b = np.zeros(K)
# one-hot 编码标签
Y = np.eye(K)[y]
for _ in range(self.n_iter):
Z = X @ self.W + self.b
Y_hat = self._softmax(Z)
grad_W = (X.T @ (Y_hat - Y)) / n
grad_b = np.mean(Y_hat - Y, axis=0)
self.W -= self.lr * grad_W
self.b -= self.lr * grad_b
return self
def predict_proba(self, X):
Z = X @ self.W + self.b
return self._softmax(Z)
def predict(self, X):
return np.argmax(self.predict_proba(X), axis=1)
model = SoftmaxRegression(lr=0.001, n_iter=10) # SoftmaxRegression
model.fit(X_train, y_train)
y_pred = model.predict(X_train)
print("Accuracy:", accuracy_score(y_train, y_pred))