做网站一条龙,提供网站建设教学视频,申请免费网站空间,城阳网站开发公司注意#xff1a;数据由实习单位老师提供#xff08;需要自行搜索下载#xff09;#xff0c;页面美化为下载模板。
项目介绍#xff1a;前端页面输入影响成绩的属性#xff0c;预测出成绩#xff0c;并作可视化展示——属性对成绩的影响。使用python pyspark 进行数据预…注意数据由实习单位老师提供需要自行搜索下载页面美化为下载模板。
项目介绍前端页面输入影响成绩的属性预测出成绩并作可视化展示——属性对成绩的影响。使用python pyspark 进行数据预处理、探索性数据分析可视化、调用模型、对比模、型调、优评估等。
成果展示
1.页面功能展示 2.输入影响成绩因素值——预测成绩 3.可视化部分 4.pyspark代码
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from Cython import inline
from matplotlib.font_manager import FontProperties
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import ElasticNet
from sklearn.ensemble import RandomForestRegressor
from sklearn.ensemble import ExtraTreesRegressor
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.svm import SVR
from sklearn import preprocessing, metrics, svm
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import MinMaxScaler
from sklearn import preprocessing, metrics, svm
from sklearn.metrics import mean_squared_error, mean_absolute_error, median_absolute_error
import scipy
import pickle
import seaborn as sns
from sympy.physics.quantum.circuitplot import matplotlib
sns.set(font_scale1.5)
import warnings
warnings.filterwarnings(ignore)# 初始化数据
plt.rcParams[font.sans-serif] [SimHei] # 中文字体设置-黑体
plt.rcParams[axes.unicode_minus] False # 解决保存图像是负号-显示为方块的问题
sns.set(fontSimHei) # 解决Seaborn中文显示问题
plt.rcParams[figure.dpi] 100
plt.rcParams[figure.figsize] (5,3)
plt.style.use(seaborn-darkgrid)student pd.read_csv(../data/student-mat.csv)
data pd.read_csv(../data/student-mat.csv)
dfpd.read_csv(../data/student-mat.csv)
#print(df.columns)
#student[G3].describe()
#print(student.isna().sum()) # 统计数据集各列缺失值个数
#student.info() #来查看一下变量的数据类型
most_correlated1 student.corr().abs()[G3].sort_values(ascendingFalse)
most_correlated1 most_correlated1[:15]
print(most_correlated1)student pd.get_dummies(student)
#print(student.columns)
# 选取相关性最强的8个
most_correlated student.corr().abs()[G3].sort_values(ascendingFalse)
most_correlated most_correlated[:15]
print(most_correlated)ydata[G3]
# 选取G3属性值
labels data[G3]
print(most_correlated.index)
# 删除schoolG1和G2属性
datadata[[G3,failures, Medu, age,Fedu,goout,traveltime,romantic,higher]]
featuredata.columns
data data.drop(labels[G3],axiscolumns)
print(data)
# 对离散变量进行独热编码
data pd.get_dummies(data)
print(data.columns)
#y pd.get_dummies(y )X_train,X_test,y_train,y_testtrain_test_split(data,y,test_size0.15,random_state42)model5LinearRegression(copy_XTrue, fit_interceptTrue, n_jobs1, normalizeFalse)
model5model5.fit(X_train,y_train)
y_pred5model5.predict(X_test)print(线性回归可解释方差值{}.format(round(metrics.explained_variance_score(y_test, y_pred5), 2)))
print(线性回归平均绝对误差{}.format(round(metrics.mean_absolute_error(y_test, y_pred5), 2)))
print(线性回归均方误差{}.format(round(np.sqrt(np.mean((y_pred5- y_test) ** 2)))))
print(线性回归 R方值{}.format(round(metrics.r2_score(y_test, y_pred5), 2)))LR_modelLinearRegression(copy_XTrue, fit_interceptTrue, n_jobs1, normalizeFalse)
# 拟合
modelLR_model.fit(X_train, y_train)
filename ../modelR/LR_Model #保存为当前文件夹下model文件夹里面 命名XGB_Model
# 序列化 对象以二进制方式保存到硬盘 write b二进制
pickle.dump(model, open(filename, wb))# 加载模型
with open(../modelR/LR_Model, rb) as model:# 反序列化对象模型 read b二进制loaded_model pickle.load(model)# print(X_test.head(1))# 使用加载的模型进行预测predictions loaded_model.predict(X_test.head(5))print(predictions)