0% found this document useful (0 votes)
16 views

Big Data Assignment - 7

This document loads and analyzes the Iris and Digits datasets using PyTorch. It standardizes the features, splits the data into training and test sets, defines neural network models for classification, trains the models over 85 epochs while tracking loss and accuracy, and plots the results. Key steps include data loading and preprocessing, defining and training Multi-Layer Perceptron models with three hidden layers of 100 units each, and evaluating model performance on training and test data.

Uploaded by

Rajeev Mukhesh
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
16 views

Big Data Assignment - 7

This document loads and analyzes the Iris and Digits datasets using PyTorch. It standardizes the features, splits the data into training and test sets, defines neural network models for classification, trains the models over 85 epochs while tracking loss and accuracy, and plots the results. Key steps include data loading and preprocessing, defining and training Multi-Layer Perceptron models with three hidden layers of 100 units each, and evaluating model performance on training and test data.

Uploaded by

Rajeev Mukhesh
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 7

In 

[2]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.datasets import load_iris,load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.autograd import Variable
import tqdm

In [33]:

# IRIS Dataset
data_iris = load_iris()
X = data_iris['data']
y = data_iris['target']
species = data_iris['target_names']
features = data_iris['feature_names']

# Standardize the features


scaler = StandardScaler()
X_scaled_value = scaler.fit_transform(X)

# Train Test Data split


X_train_K, X_test_K, y_train_K, y_test_K = train_test_split(
X_scaled_value, y, test_size=0.4, random_state=2)

In [34]:

Out[34]:

array([[5.1, 3.5, 1.4, 0.2],

[4.9, 3. , 1.4, 0.2],

[4.7, 3.2, 1.3, 0.2],

[4.6, 3.1, 1.5, 0.2],

[5. , 3.6, 1.4, 0.2],

[5.4, 3.9, 1.7, 0.4],

[4.6, 3.4, 1.4, 0.3],

[5. , 3.4, 1.5, 0.2],

[4.4, 2.9, 1.4, 0.2],

[4.9, 3.1, 1.5, 0.1],

[5.4, 3.7, 1.5, 0.2],

[4.8, 3.4, 1.6, 0.2],

[4.8, 3. , 1.4, 0.1],

[4.3, 3. , 1.1, 0.1],

[5.8, 4. , 1.2, 0.2],

[5.7, 4.4, 1.5, 0.4],

[5.4, 3.9, 1.3, 0.4],

[5.1, 3.5, 1.4, 0.3],


In [35]:

Out[35]:

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,

0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,

1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,

2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

In [36]:

fig, (axs_1, axs_2) = plt.subplots(1, 2, figsize=(14, 5))


for target, target_name in enumerate(species):
X_plot = X[y == target]
axs_1.plot(X_plot[:, 0], X_plot[:, 1], linestyle='none', marker='o', label=target_name)
axs_1.set_xlabel(features[0])
axs_1.set_ylabel(features[1])
axs_1.axis('equal')
axs_1.legend();

for target, target_name in enumerate(species):


X_plot = X[y == target]
axs_2.plot(X_plot[:, 2], X_plot[:, 3],linestyle='none', marker='o', label=target_name)
axs_2.set_xlabel(features[2])
axs_2.set_ylabel(features[3])
axs_2.axis('equal')
axs_2.legend();
In [37]:

class Model(nn.Module):
def __init__(self, input_dim):
super(Model, self).__init__()
self.layer1 = nn.Linear(input_dim, 100)
self.layer2 = nn.Linear(100, 100)
self.layer3 = nn.Linear(100, 3)

def forward(self, x):


x = F.relu(self.layer1(x))
x = F.relu(self.layer2(x))
x = F.softmax(self.layer3(x), dim=1)
return x

In [38]:

model = Model(X_train_K.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)
loss_fn = nn.CrossEntropyLoss()

In [39]:

EPOCHS = 85
X_train_K = Variable(torch.from_numpy(X_train_K)).float()
y_train_K = Variable(torch.from_numpy(y_train_K)).long()
X_test_K = Variable(torch.from_numpy(X_test_K)).float()
y_test_K = Variable(torch.from_numpy(y_test_K)).long()

loss_lst = np.zeros((EPOCHS,))
accuracy_lst = np.zeros((EPOCHS,))

for epoch in tqdm.trange(EPOCHS):


y_pred = model(X_train_K)
loss = loss_fn(y_pred, y_train_K)
loss_lst[epoch] = loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()

with torch.no_grad():
y_pred = model(X_test_K)
correct = (torch.argmax(y_pred, dim=1) == y_test_K).type(torch.FloatTensor)
accuracy_lst[epoch] = correct.mean()

100%|██████████| 85/85 [00:00<00:00, 342.12it/s]

In [40]:

fig, (axs_1, axs_2) = plt.subplots(2, figsize=(12, 6), sharex=True)

axs_1.plot(accuracy_lst)
axs_1.set_ylabel("Accuracy")
axs_2.plot(loss_lst)
axs_2.set_ylabel("Loss")
axs_2.set_xlabel("Epochs");

In [41]:

# DIGITS Dataset

data_digits = load_digits()
X = data_digits['data']
y = data_digits['target']
species = data_digits['target_names']
features = data_digits['feature_names']

# Features Standardization
scaler = StandardScaler()
X_scaled_value = scaler.fit_transform(X)

# Test Train Data split


X_train_K, X_test_K, y_train_K, y_test_K = train_test_split(
X_scaled_value, y, test_size=0.4, random_state=2)
In [42]:

Out[42]:

array([[ 0., 0., 5., ..., 0., 0., 0.],

[ 0., 0., 0., ..., 10., 0., 0.],

[ 0., 0., 0., ..., 16., 9., 0.],

...,

[ 0., 0., 1., ..., 6., 0., 0.],

[ 0., 0., 2., ..., 12., 0., 0.],

[ 0., 0., 10., ..., 12., 1., 0.]])

In [43]:

Out[43]:

array([0, 1, 2, ..., 8, 9, 8])

In [44]:

fig, (axs_1, axs_2) = plt.subplots(1, 2, figsize=(14, 5))


for target, target_name in enumerate(species):
X_plot = X[y == target]
axs_1.plot(X_plot[:, 0], X_plot[:, 1], linestyle='none', marker='o', label=target_name)
axs_1.set_xlabel(features[0])
axs_1.set_ylabel(features[1])
axs_1.axis('equal')
axs_1.legend();

for target, target_name in enumerate(species):


X_plot = X[y == target]
axs_2.plot(X_plot[:, 2], X_plot[:, 3],linestyle='none', marker='o', label=target_name)
axs_2.set_xlabel(features[2])
axs_2.set_ylabel(features[3])
axs_2.axis('equal')
axs_2.legend();
In [45]:

class Model(nn.Module):
def __init__(self, input_dim):
super(Model, self).__init__()
self.layer1 = nn.Linear(input_dim, 100)
self.layer2 = nn.Linear(100, 100)
self.layer3 = nn.Linear(100, 10)

def forward(self, x):


x = F.relu(self.layer1(x))
x = F.relu(self.layer2(x))
x = F.softmax(self.layer3(x), dim=1)
return x

In [46]:

model = Model(X_train_K.shape[1])
optimizer = torch.optim.Adam(model.parameters(), lr=0.002)
loss_fn = nn.CrossEntropyLoss()

In [47]:

EPOCHS = 85
X_train_K = Variable(torch.from_numpy(X_train_K)).float()
y_train_K = Variable(torch.from_numpy(y_train_K)).long()
X_test_K = Variable(torch.from_numpy(X_test_K)).float()
y_test_K = Variable(torch.from_numpy(y_test_K)).long()

loss_lst = np.zeros((EPOCHS,))
accuracy_lst = np.zeros((EPOCHS,))

for epoch in tqdm.trange(EPOCHS):


y_pred = model(X_train_K)
loss = loss_fn(y_pred, y_train_K)
loss_lst[epoch] = loss.item()
optimizer.zero_grad()
loss.backward()
optimizer.step()

with torch.no_grad():
y_pred = model(X_test_K)
correct = (torch.argmax(y_pred, dim=1) == y_test_K).type(torch.FloatTensor)
accuracy_lst[epoch] = correct.mean()

100%|██████████| 85/85 [00:00<00:00, 165.13it/s]

In [48]:

fig, (axs_1, axs_2) = plt.subplots(2, figsize=(12, 6), sharex=True)

axs_1.plot(accuracy_lst)
axs_1.set_ylabel("Accuracy")
axs_2.plot(loss_lst)
axs_2.set_ylabel("Loss")
axs_2.set_xlabel("Epochs");

In [ ]:

You might also like