Regression lineaire avec des listes en entrées

Category:

# -*- coding: utf-8 -*-
"""
Regression lineaire avec des listes en entrées
"""

import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm


#Farm size in hectares
X=[1,1,2,2,2.3,3,3,3.5,4,4.3]
#Crop yield in tons
Y=[6.9,6.7,13.8,14.7,16.5,18.7,17.4,22,29.4,34.5]
"""
#    By default, OLS implementation of statsmodels does not include an intercept 
#     in the model unless we are using formulas.
#    We need to explicitly specify the use of intercept in OLS method by 
#     adding a constant term.
X_1 = sm.add_constant(X)
#print(X_1)
model = sm.OLS(Y,X_1)
results = model.fit()
#print(results.params) => [-1.32137039  7.42581241] y = b.1 + a.x  avec a=7.42581241 and b=-1.32137039

Y_predicted = results.predict(X_1)

fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(111)
ax.set_title('En rouge les données prédites')


plt.scatter(X,Y, c='blue')
plt.xlabel("Farm size in hectares")
plt.ylabel("Crop yield in tons")
plt.plot(X,Y_predicted, "r")



new_X=[5,5.5,6,7]
new_X_1 = sm.add_constant(new_X)
Ypred = results.predict(new_X_1)
plt.scatter(new_X,Ypred, c='red')

plt.show()
#print(results.predict(new_X_1))
"""
######################################

import statsmodels.formula.api as sm1
import pandas as pd
df1=pd.DataFrame(X,columns=['X'])
df1['Y']=Y

results_formula = sm1.ols(formula='Y ~ X', data=df1).fit()
params = results_formula.params
b = params.Intercept
a = params.X
print(" a, b ", a, b)

df_new = pd.DataFrame([5,5.5,6,7],columns=['X'])
Ypred = results_formula.predict(df_new)
#print(results_formula.predict(df_new))


fig = plt.figure(figsize=(10,5))
ax = fig.add_subplot(111)
ax.set_title('En rouge les données prédites')


plt.scatter(X,Y, c='blue')
plt.xlabel("Farm size in hectares")
plt.ylabel("Crop yield in tons")
x = np.linspace(0, 7.2, 1000)
plt.plot(x, a*x + b, linestyle='dashed')
plt.scatter(df_new,Ypred, c='red')
plt.show()