Data analytics

Download as pdf or txt
Download as pdf or txt
You are on page 1of 10

Exp1 (Linear regression)

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

# Load dataset from CSV

# Ensure your CSV has columns 'X' and 'Y'

data = pd.read_csv('C:/Users/HP/Desktop/data1.csv')

X = data['X'].values.reshape(-1, 1) # Features (Independent Variable)

Y = data['Y'].values # Labels (Dependent Variable)

# Split the data into training and testing sets

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Create Linear Regression model and train it

regressor = LinearRegression()

regressor.fit(X_train, Y_train)

# Predicting the Test set results

Y_pred = regressor.predict(X_test)

# Visualize the results

plt.scatter(X_train, Y_train, color='blue', label='Training data') # Training data

plt.scatter(X_test, Y_test, color='green', label='Test data (Actual)') # Test data (actual)

plt.scatter(X_test, Y_pred, color='red', label='Test data (Predicted)') # Predicted test data

plt.plot(X_train, regressor.predict(X_train), color='red', label='Regression Line') # Regression line

plt.title('Linear Regression')

plt.xlabel('X (Input)')

plt.ylabel('Y (Output)')

plt.legend()

plt.show()
# Output predicted values

print("Predicted values for X_test:", Y_pred)

Output

Predicted values for X_test: [1.92857143]


Exp 5 (K Means Clustering)

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from sklearn.cluster import KMeans

# Load dataset from CSV

df = pd.read_csv('C:/Users/HP/Desktop/data2.csv')

# Convert to numpy array

X = df[['Feature1', 'Feature2']].values

# Number of clusters

k=3

# Create and fit KMeans model

kmeans = KMeans(n_clusters=k, random_state=42)

kmeans.fit(X)

# Predict cluster labels

y_kmeans = kmeans.predict(X)

# Plot the data points with cluster labels

plt.scatter(X[:, 0], X[:, 1], c=y_kmeans, cmap='viridis', marker='o', label='Data Points')

# Plot centroids

plt.scatter(kmeans.cluster_centers_[:, 0], kmeans.cluster_centers_[:, 1], c='red', marker='X', s=200,


label='Centroids')

plt.title('K-Means Clustering')

plt.xlabel('Feature 1')
plt.ylabel('Feature 2')

plt.legend()

plt.show()

Output
Exp 3 (Logistic Regression)

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LogisticRegression

from sklearn.metrics import accuracy_score

# Load dataset from CSV

data = pd.read_csv('C:/Users/HP/Desktop/data3.csv')

# Extract features and target

X = data[['Feature']].values # Feature (independent variable)

Y = data['Target'].values # Target (dependent variable, 0 or 1)

# Split the data into training and testing sets

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Create Logistic Regression model and train it

classifier = LogisticRegression()

classifier.fit(X_train, Y_train)

# Predicting the Test set results

Y_pred = classifier.predict(X_test)

# Calculate Accuracy

accuracy = accuracy_score(Y_test, Y_pred)

# Visualize the results

plt.scatter(X_train, Y_train, color='blue', label='Training data') # Training data

plt.scatter(X_test, Y_test, color='green', label='Test data (Actual)') # Test data (actual)


plt.scatter(X_test, Y_pred, color='red', label='Test data (Predicted)') # Predicted test data

# Logistic Regression decision boundary

X_range = np.linspace(min(X), max(X), 100).reshape(-1, 1) # Fine-grained X values for smoother


curve

Y_prob = classifier.predict_proba(X_range)[:, 1] # Probabilities for the positive class

plt.plot(X_range, Y_prob, color='red', label='Logistic Regression Curve')

plt.title('Logistic Regression')

plt.xlabel('X (Input)')

plt.ylabel('Probability/Output')

plt.legend()

plt.show()

# Output predicted values and accuracy

print("Predicted values for X_test:", Y_pred)

print("Accuracy:", accuracy)

Output

Predicted values for X_test: [1]

Accuracy: 0.0
Exp 2 (Multiple Linear Regression)

import pandas as pd

import numpy as np

import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split

from sklearn.linear_model import LinearRegression

# Load dataset from CSV

df = pd.read_csv('C:/Users/HP/Desktop/data4.csv')

# Extract features (X) and target (Y)

X = df[['Feature1', 'Feature2']].values # Features (independent variables)

Y = df['Target'].values # Target (dependent variable)

# Split the data into training and testing sets

X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

# Create Multiple Linear Regression model and train it

regressor = LinearRegression()

regressor.fit(X_train, Y_train)

# Predicting the Test set results

Y_pred = regressor.predict(X_test)

# Visualize the results

plt.scatter(Y_test, Y_pred, color='blue', label='Predicted vs Actual') # Predicted vs Actual

plt.plot([min(Y_test), max(Y_test)], [min(Y_test), max(Y_test)], color='red', label='Best fit line') #


Regression line

plt.title('Multiple Linear Regression')

plt.xlabel('Actual values')

plt.ylabel('Predicted values')
plt.legend()

plt.show()

Output

Predicted values for X_test: [3.]


Exp 4 (Apriori Algorithm)

import pandas as pd

from mlxtend.frequent_patterns import apriori, association_rules

data = {'Transaction': ['T1', 'T2', 'T3', 'T4', 'T5'],

'Items': [['Bread', 'Milk'], ['Bread', 'Diaper', 'Beer', 'Eggs'],

['Milk', 'Diaper', 'Beer', 'Cola'], ['Bread', 'Milk', 'Diaper', 'Beer'],

['Bread', 'Milk', 'Cola']]}

df = pd.DataFrame(data)

from mlxtend.preprocessing import TransactionEncoder

te = TransactionEncoder()

te_ary = te.fit(df['Items']).transform(df['Items'])

df_encoded = pd.DataFrame(te_ary, columns=te.columns_)

frequent_itemsets = apriori(df_encoded, min_support=0.4, use_colnames=True)

rules = association_rules(frequent_itemsets, metric="confidence", min_threshold=0.5)

print("Frequent Itemsets:")

print(frequent_itemsets)

print("\nAssociation Rules:")

print(rules)
Output

You might also like