本文通过python实现了集成学习中的Bagging和AdaBOOST算法,并将代码进行了封装,方便读者调用。 示例使用的数据为了与不使用集成算法的模型的准确率区分开来,所以使用较少特征的数据,因而准确率不是特别高,不过与未使用集成算法的模型相比,准确率已经优出不少。 by CyrusMay 2020 06 12 这世界全部的漂亮机器学习 集成学习篇——python实现Bagging和AdaBOOST算法
摘要
Bagging算法
import numpy as np import pandas as pd class Cyrus_bagging(object): def __init__(self,estimator,n_estimators = 20): self.estimator = estimator self.n_estimators = n_estimators self.models = None def fit(self,x,y): x = np.array(x) y = np.array(y).reshape((-1,)) indices = np.arange(x.shape[0]) self.models = [] for i in range(self.n_estimators): index = np.random.choice(indices,x.shape[0]) x0 = x[index] y0 = y[index] self.models.append(self.estimator.fit(x0,y0)) def predict(self,x): res = np.zeros([x.shape[0],self.n_estimators]) for i in range(self.n_estimators): res[:,i] = self.models[i].predict(x) result = [] for i in range(res.shape[0]): pd_res = pd.Series(res[i,:]).value_counts() result.append(int(pd_res.argmax())) return np.array(result)
from sklearn.neighbors import KNeighborsClassifier from sklearn.metrics import classification_report knn = KNeighborsClassifier() model = Cyrus_bagging(knn) model.fit(x_train,y_train) y_pre = model.predict(x_test) print(classification_report(y_test,y_pre))
precision recall f1-score support 0 1.00 1.00 1.00 11 1 0.67 0.67 0.67 9 2 0.70 0.70 0.70 10 avg / total 0.80 0.80 0.80 30
Adaboost算法
import numpy as np import pandas as pd from sklearn.metrics import accuracy_score class CyrusAdaBoost(object): def __init__(self,estimator,n_estimators = 20): self.estimator = estimator self.n_estimators = n_estimators self.error_rate = None self.model = None def update_w(self,y,pre_y,w): error_rate = 1 - accuracy_score(y,pre_y) for i in range(w.shape[0]): if y[i] == pre_y[i]: w[i] = w[i]*np.exp(-error_rate) else: w[i] = w[i]*np.exp(error_rate) return w/w.sum() def cal_label(self,result,alpha): label = [] for i in range(result.shape[0]): count = np.zeros(int(result[i,:].max()+1)) for j in range(result.shape[1]): count[int(result[i,j])] += alpha[j] label.append(count.argmax()) return np.array(label) def fit(self,x,y): x = np.array(x) y = np.array(y).reshape((-1,)) self.error_rate = [] self.model = [] w0 = np.ones(x.shape[0]) w0 = w0/w0.sum() indices = np.arange(x.shape[0]) for i in range(self.n_estimators): index = np.random.choice(indices,size = x.shape[0],p = w0) x0 = x[index] y0 = y[index] model0 = self.estimator.fit(x0,y0) pre_y0 = model0.predict(x0) error_rate = 1 - accuracy_score(y0,pre_y0) self.error_rate.append(error_rate) self.model.append(model0) w0 = self.update_w(y0,pre_y0,w0) def predict(self,x): res = np.zeros([x.shape[0],self.n_estimators]) for i in range(self.n_estimators): res[:,i] = self.model[i].predict(x) alpha = 1 - np.array(self.error_rate) return self.cal_label(res,alpha)
from sklearn.tree import DecisionTreeClassifier model = CyrusAdaBoost(estimator=DecisionTreeClassifier(),n_estimators=50) model.fit(x_train,y_train) y_pre = model.predict(x_test) print(accuracy_score(y_pre,y_test))
0.932
不过你的可爱模样
——————五月天(爱情的模样)——————
本网页所有视频内容由 imoviebox边看边下-网页视频下载, iurlBox网页地址收藏管理器 下载并得到。
ImovieBox网页视频下载器 下载地址: ImovieBox网页视频下载器-最新版本下载
本文章由: imapbox邮箱云存储,邮箱网盘,ImageBox 图片批量下载器,网页图片批量下载专家,网页图片批量下载器,获取到文章图片,imoviebox网页视频批量下载器,下载视频内容,为您提供.
阅读和此文章类似的: 全球云计算