Skip to content

Linear Regression

import pandas as pd
## Create DataSet
dataset = pd.DataFrame.from_dict(
        [
            {"name": "Suresh", "duration": 2, "marks": 20, "pass": False},
            {"name": "Ramesh", "duration": 8, "marks": 80, "pass": True},
            {"name": "Akshay", "duration": 4, "marks": 60, "pass": True},
            {"name": "Mahesh", "duration": 9, "marks": 80, "pass": True},
            {"name": "Shekhar", "duration": 7, "marks": 70, "pass": True},
            {"name": "SomeOne", "duration": 3, "marks": 30, "pass": False},
        ]
    )
dataset
name duration marks pass
0 Suresh 2 20 False
1 Ramesh 8 80 True
2 Akshay 4 60 True
3 Mahesh 9 80 True
4 Shekhar 7 70 True
5 SomeOne 3 30 False

Define dependent(y) and independent(x) variable

y = dataset['marks']
x = dataset['duration']
# Import numpy
import numpy as np
x
0    2
1    8
2    4
3    9
4    7
5    3
Name: duration, dtype: int64
## For .fit() function, independent variable should be in 2D
# Convert to 2D
new_x = x.values.reshape(len(x), 1)
# Now we have 2D value
new_x
array([[2],
       [8],
       [4],
       [9],
       [7],
       [3]])

Create and Train Model, and predict

# Import Module
from sklearn.linear_model import LinearRegression
# Create Model
model = LinearRegression()
# Train Model
model.fit(new_x, y)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None, normalize=False)
# Print Coefficient
model.coef_
array([8.43373494])
# Print Intercept
model.intercept_
10.281124497991982
# Predict marks for 5 Hrs
model.predict([[5]])
array([52.4497992])

Save model for future

# Import module
from sklearn.externals import joblib
/home/suresh/learn/mlops/env/lib64/python3.7/site-packages/sklearn/externals/joblib/__init__.py:15: FutureWarning: sklearn.externals.joblib is deprecated in 0.21 and will be removed in 0.23. Please import this functionality directly from joblib, which can be installed with: pip install joblib. If this warning is raised when loading pickled models, you may need to re-serialize those models with scikit-learn 0.21+.
  warnings.warn(msg, category=FutureWarning)

#  This saves model in file - MarksPrediction.pk1
joblib.dump(model, 'MarksPrediction.pk1')
['MarksPrediction.pk1']

Use a saved model

# Create a new model from the PK1 file
new_model = joblib.load('MarksPrediction.pk1')
# Predict using this model
new_model.predict([[5]])
array([52.4497992])
Back to top