codekarim is just a bunch of reminders for fenuapps.com
# -*- coding: utf-8 -*- """ Created on Tue Jan 15 11:37:53 2019 https://www.ritchieng.com/machine-learning-evaluate-linear-regression-mo... @author: K """ # imports import pandas as pd import seaborn as sns import statsmodels.formula.api as smf from sklearn.linear_model import LinearRegression from sklearn import metrics from sklearn.cross_validation import train_test_split import numpy as np # allow plots to appear directly in the notebook #%matplotlib inline from IPython import get_ipython get_ipython().run_line_magic('matplotlib', 'inline') data = pd.read_csv('http://www-bcf.usc.edu/~gareth/ISL/Advertising.csv', index_col=0) data.head() # visualize the relationship between the features and the response using scatterplots #sns.pairplot(data, x_vars=['TV','radio','newspaper'], y_vars='sales', height=7, aspect=0.7) ### STATSMODELS ### # create a fitted model lm1 = smf.ols(formula='sales ~ TV', data=data).fit() # print the coefficients #print("STATSMODELS params: ",lm1.params) # Statsmodels Prediction # you have to create a DataFrame since the Statsmodels formula interface expects it X_new = pd.DataFrame({'TV': [50]}) # predict for a new observation print("Statsmodels Prediction for TV 50: ",lm1.predict(X_new)) ### SCIKIT-LEARN ### # create X and y feature_cols = ['TV','radio', 'newspaper'] X = data[feature_cols] y = data.sales # instantiate and fit lm2 = LinearRegression() lm2.fit(X, y) # print the coefficients #print("SCIKIT-LEARN params: ",lm2.intercept_,lm2.coef_) # Scikit-learn Prediction # predict for a new observation print("SCIKIT-LEARN Prediction for TV 44, 40, 45: : ",lm2.predict([[44, 40, 45]])) # visualisation des données deux à deux #sns.pairplot(data, x_vars=['TV','radio','newspaper'], y_vars='sales', height=7, aspect=0.7, kind='reg') ### SCIKIT-LEARN Split data ### # Split data X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=1) # Instantiate model lm2 = LinearRegression() # Fit Model lm2.fit(X_train, y_train) # Predict y_pred = lm2.predict(X_test) # RMSE print("RMSE = ", np.sqrt(metrics.mean_squared_error(y_test, y_pred)))