Amazing-Python-Scripts
57 строк · 1.6 Кб
1# import libraries
2import pickle3from sklearn.metrics import mean_absolute_error4from sklearn.model_selection import GridSearchCV5from sklearn.model_selection import cross_val_score6from sklearn.ensemble import RandomForestRegressor7from sklearn.model_selection import train_test_split8import pandas as pd9import numpy as np10
11# Load dataset
12df = pd.read_csv('./Salary Predictor/dataset/cleaned_dataset.csv')13
14df_model = df[['avg_salary', 'Sector', 'python_yn',15'job_sim', 'R_yn', 'tableau', 'power bi', 'ml', 'dl']]16
17# Categorical encoding
18df_dum = pd.get_dummies(df_model)19
20# division into training and test set
21X = df_dum.drop('avg_salary', axis=1)22y = df_dum.avg_salary.values23X_train, X_test, y_train, y_test = train_test_split(24X, y, test_size=0.2, random_state=42)25
26# Model Generation
27regressor = RandomForestRegressor()28
29
30np.mean(cross_val_score(regressor, X_train, y_train,31scoring='neg_mean_absolute_error', cv=5))32
33# Hyperparameter tuning
34parameters = {35"n_estimators": range(10, 400, 10),36"criterion": ['mse', 'mae'],37"max_features": ['auto', 'sqrt', 'log2']38}
39
40gs = GridSearchCV(regressor, param_grid=parameters,41scoring='neg_mean_absolute_error', cv=5)42gs.fit(X_train, y_train)43
44gs.best_score_45y_pred = gs.best_estimator_.predict(X_test)46
47# Accuracy measurement
48mean_absolute_error(y_test, y_pred)49
50# Save the model
51filename = './Salary Predictor/models/random_forest2_model.sav'52pickle.dump(gs.best_estimator_, open(filename, 'wb'))53
54# saving the columns
55model_columns = list(X.columns)56with open('./Salary Predictor/models/model_columns1.pkl', 'wb') as file:57pickle.dump(model_columns, file)58