EX-1
EX-1
RegNo: 953622104040
EX-1
1a. Build Linear Regression Model using Least Square.
#least regression
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
data = pd.read_csv('data.csv')
X = data.iloc[:, 0]
Y = data.iloc[:, 1]
plt.scatter(X, Y)
plt.show()
n = len(X)
sum_x = sum(X)
sum_y = sum(Y)
sum_xy = 0
for i in range(n):
sum_xy = sum_xy + X[i] * Y[i]
sum_sq_x = 0
for i in range(n):
sum_sq_x = sum_sq_x + (X[i] * X[i])
m = (n * sum_xy - sum_x * sum_y) / (n * sum_sq_x - (sum_x) ** 2)
c = (sum_y - m * sum_x) / n
print("The intercept is:",m)
print("The coefficient is:",c)
y_pred = [m * x + c for x in X]
plt.scatter(X, Y)
plt.plot([min(X),max(X)],[min(y_pred),max(y_pred)],color='red')
plt.show()
OUTPUT:
Name: Jeyasuriya K
RegNo: 953622104040
1c. Build Multiple Linear Regression Model using Scikit learn and stats model.
#using statsmodels and Sklearn
import pandas as pd
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
df = pd.read_csv("Advertising.csv")
x = df.drop('sales', axis=1)
y = df['sales']
x = sm.add_constant(x)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
model_ols = sm.OLS(y_train, x_train).fit()
intercept_ols = model_ols.params[0]
coefficients_ols = model_ols.params[1:]
print("Intercept:",intercept_ols)
print("Coefficient:",coefficients_ols)
y_pred_ols = model_ols.predict(x_test)
mse_ols = mean_squared_error(y_test, y_pred_ols)
print("Mean Squared Error using OLS:",mse_ols)
plt.scatter(y_test, y_pred_ols)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], linestyle='--', color='red',
linewidth=2)
plt.xlabel("Actual Sales")
plt.ylabel("Predicted Sales")
plt.title("OLS Regression Results")
plt.show()
Output:
Name: Jeyasuriya K
RegNo: 953622104040
1d. Build Linear Regression Model using Pre-defined Packages with training and testing data.
# Predefined packages
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
df = pd.read_csv("Advertising.csv")
x = df.drop('sales', axis=1)
y = df['sales']
x = pd.get_dummies(x, drop_first=True)
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
model = LinearRegression()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
mse = mean_squared_error(y_test, y_pred)
print("M:",model.intercept_)
print("C:",model.coef_)
print(f"Mean Squared Error: {mse}")
plt.scatter(y_test,y_pred)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], linestyle='--', color='red',
linewidth=2)
plt.show()
Output:
Name: Jeyasuriya K
RegNo: 953622104040
# BayesianRidge
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import BayesianRidge
from sklearn.metrics import mean_squared_error
df = pd.read_csv("Advertising.csv")
x = df.drop('sales', axis=1)
y = df['sales']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
model = BayesianRidge()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
mse = mean_squared_error(y_test, y_pred)
print("Intercept:",model.intercept_)
print("Coefficient:",model.coef_)
print(f"Mean Squared Error: {mse}")
plt.scatter(y_test,y_pred)
plt.plot([min(y_test), max(y_test)], [min(y_test), max(y_test)], linestyle='--', color='red',
linewidth=2)
plt.show()
Output:
Name: Jeyasuriya K
RegNo: 953622104040
#Logisitic Regression
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.preprocessing import StandardScaler
df=pd.read_csv("breast_cancer.csv")
x = df.drop(['diagnosis'], axis=1)
y = df['diagnosis']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)
scaler = StandardScaler()
x_train = scaler.fit_transform(x_train)
x_test = scaler.transform(x_test)
model = LogisticRegression()
model.fit(x_train, y_train)
y_pred = model.predict(x_test)
accuracy = accuracy_score(y_test, y_pred)
print('Accuracy:', accuracy)
Output: