codekarim is just a bunch of reminders for fenuapps.com
import pandas as pd import numpy as np import matplotlib.pyplot as plt import seaborn as sns import statsmodels.api as sm from sklearn.model_selection import train_test_split from sklearn.linear_model import LinearRegression from sklearn.metrics import r2_score import warnings #from IPython import get_ipython #ipy = get_ipython() #if ipy is not None: # ipy.run_line_magic('matplotlib', 'inline') sns.set() #%matplotlib inline df = pd.read_csv("./SalaryData1.csv") print(df.shape) print(df.isnull().values.any()) train_set, test_set = train_test_split(df, test_size=0.2, random_state=42) #df_copy = train_set.copy() #print(df_copy.describe()) #print(df_copy.corr()) train_set.plot.scatter(x='YearsExperience', y='Salary') #train_set = train_set.drop(["Salary"], axis=1) Xtrain = train_set["YearsExperience"].to_frame() yTrain = train_set["Salary"].to_frame() #Xtrain = Xtrain.reshape(1,-1) #yTrain = yTrain.reshape(1,-1) # sex_train = data['Sex'].map({'male':0,'female':1}).to_frame() Xtest = test_set["YearsExperience"].to_frame() ytest = test_set["Salary"].to_frame() X = np.array(Xtrain) y = np.array(yTrain) #Xtest = Xtest.reshape(1,-1) #ytest = ytest.reshape(1,-1) #train_labels = df_copy["Salary"] lin_reg = LinearRegression() lin_reg.fit(Xtrain, yTrain) #print("Coefficients: ", lin_reg.coef_) #print("Intercept: ", lin_reg.intercept_) salary_pred = lin_reg.predict(10) #salary_pred = lin_reg.predict(10) print("salary_pred: ", salary_pred) score = lin_reg.score(Xtest, ytest) print("score: ", score)