0% found this document useful (0 votes)
10 views

Deep Learning Assignments

Uploaded by

Ashutosh A
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
10 views

Deep Learning Assignments

Uploaded by

Ashutosh A
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as PDF, TXT or read online on Scribd
You are on page 1/ 13

Ashutosh Anand

DL Assignment Lab 3 SuperStore

202318035

DATASET 1: SUPERSTORE
In [ ]: import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

In [ ]: # Load the dataset, specifying the encoding format


superstore_data = pd.read_csv('/content/Sample - Superstore.csv', encoding='ISO-8859-1')

# Preview the first few rows of the dataset


superstore_data.head()
Out[ ]: Row Order Order Ship Customer Customer
Ship Date Segment Country Ci
ID ID Date Mode ID Name

CA-
Second Claire United
0 1 2016- 11/8/2016 11/11/2016 CG-12520 Consumer Henderso
Class Gute States
152156

CA-
Second Claire United
1 2 2016- 11/8/2016 11/11/2016 CG-12520 Consumer Henderso
Class Gute States
152156

CA-
Second Darrin United L
2 3 2016- 6/12/2016 6/16/2016 DV-13045 Corporate
Class Van Huff States Angel
138688

US-
Standard Sean United Fo
3 4 2015- 10/11/2015 10/18/2015 SO-20335 Consumer
Class O'Donnell States Lauderda
108966

US-
Standard Sean United Fo
4 5 2015- 10/11/2015 10/18/2015 SO-20335 Consumer
Class O'Donnell States Lauderda
108966

5 rows × 21 columns

In [ ]: # Display the count of missing values in each column


print(superstore_data.isna().sum())

# Remove rows where the 'Profit' column has missing values


superstore_data_cleaned = superstore_data.dropna(subset=['Profit'])

Row ID 0
Order ID 0
Order Date 0
Ship Date 0
Ship Mode 0
Customer ID 0
Customer Name 0
Segment 0
Country 0
City 0
State 0
Postal Code 0
Region 0
Product ID 0
Category 0
Sub-Category 0
Product Name 0
Sales 0
Quantity 0
Discount 0
Profit 0
dtype: int64
In [ ]: # Convert 'Order Date' and 'Ship Date' to datetime format, handling errors
superstore_data['Order Date'] = pd.to_datetime(superstore_data['Order Date'], format='%Y-%m-%d
superstore_data['Ship Date'] = pd.to_datetime(superstore_data['Ship Date'], format='%Y-%m-%d'

In [ ]: # Filter out rows with non-positive 'Sales' or 'Profit' values


superstore_data_filtered = superstore_data[(superstore_data['Sales'] > 0) & (superstore_data[

In [ ]: # Statistical summary of the dataset


superstore_data.describe()

Out[ ]: Row ID Order Date Ship Date Postal Code Sales Quantity

count 8058.000000 8058 8058 8058.000000 8058.000000 8058.000000 8

2016-05-01 2016-05-04
mean 4967.272648 55016.047779 223.480623 3.794738
01:06:07.237527808 23:59:49.277736704

2014-01-03 2014-01-07
min 1.000000 1040.000000 0.990000 1.000000
00:00:00 00:00:00

2015-05-26 2015-05-29
25% 2483.250000 21843.250000 17.940000 2.000000
00:00:00 00:00:00

2016-06-28 2016-07-02
50% 4965.000000 53711.000000 50.965000 3.000000
00:00:00 00:00:00

2017-05-15 2017-05-19
75% 7427.250000 90045.000000 191.976000 5.000000
00:00:00 18:00:00

2017-12-30 2018-01-05
max 9994.000000 99301.000000 17499.950000 14.000000
00:00:00 00:00:00

std 2872.723630 NaN NaN 33295.003380 600.340641 2.244696

In [ ]: superstore_data.info()
<class 'pandas.core.frame.DataFrame'>
Index: 8058 entries, 0 to 9993
Data columns (total 21 columns):
# Column Non-Null Count Dtype
--- ------ -------------- -----
0 Row ID 8058 non-null int64
1 Order ID 8058 non-null object
2 Order Date 8058 non-null datetime64[ns]
3 Ship Date 8058 non-null datetime64[ns]
4 Ship Mode 8058 non-null object
5 Customer ID 8058 non-null object
6 Customer Name 8058 non-null object
7 Segment 8058 non-null object
8 Country 8058 non-null object
9 City 8058 non-null object
10 State 8058 non-null object
11 Postal Code 8058 non-null int64
12 Region 8058 non-null object
13 Product ID 8058 non-null object
14 Category 8058 non-null object
15 Sub-Category 8058 non-null object
16 Product Name 8058 non-null object
17 Sales 8058 non-null float64
18 Quantity 8058 non-null int64
19 Discount 8058 non-null float64
20 Profit 8058 non-null float64
dtypes: datetime64[ns](2), float64(3), int64(3), object(13)
memory usage: 1.4+ MB

In [ ]: # Plotting the correlation heatmap for numerical columns


plt.figure(figsize=(10, 6))
numeric_cols = superstore_data_filtered.select_dtypes(include='number') # Extract numerical c
correlation_matrix = numeric_cols.corr()

# Generate the heatmap


sns.heatmap(correlation_matrix, annot=True, cmap='coolwarm', fmt=".2f")
plt.title('Heatmap of Correlation Matrix')
plt.show()
In [ ]: # Scatter plot to visualize relationship between 'Sales' and 'Profit'
plt.figure(figsize=(10, 6))
plt.scatter(superstore_data_filtered['Sales'], superstore_data_filtered['Profit'], alpha=0.5,
plt.title('Relationship Between Sales and Profit')
plt.xlabel('Sales Amount')
plt.ylabel('Profit Earned')
plt.grid(True)
plt.show()
In [ ]: # Apply one-hot encoding to categorical features
encoded_data = pd.get_dummies(superstore_data_filtered, columns=['Ship Mode', 'Segment', 'Cate

# Drop unnecessary columns that won't be used in the model


columns_to_drop = ['Order ID', 'Customer ID', 'Customer Name', 'City', 'State', 'Postal Code'
encoded_data_cleaned = encoded_data.drop(columns=columns_to_drop)

# Define the feature matrix (X) and target variable (y)


X = encoded_data_cleaned.drop(columns=['Profit'])
y = encoded_data_cleaned['Profit']

# Split the data into training and test sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the feature matrix


scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

In [ ]: # Initialize and train the linear regression model


linear_regressor = LinearRegression()
linear_regressor.fit(X_train_scaled, y_train)

# Make predictions using the test set


predicted_values = linear_regressor.predict(X_test_scaled)

In [ ]: # Calculate performance metrics


mean_sq_error = mean_squared_error(y_test, predicted_values)
r_squared = r2_score(y_test, predicted_values)

# Display the results


print(f"Mean Squared Error (MSE): {mean_sq_error}")
print(f"R-squared (R²): {r_squared}")

Mean Squared Error: 10086.950159098482


R-squared: 0.6454160278125762
In [ ]: # Residual plot to visualize the difference between actual and predicted values
plt.figure(figsize=(10, 6))
plt.scatter(y_test, y_test - predicted_values, alpha=0.5, color='blue')
plt.title('Residual Plot: Actual vs Residuals')
plt.xlabel('Actual Profit')
plt.ylabel('Residuals (Actual - Predicted)')
plt.axhline(0, color='red', linestyle='--', linewidth=1)
plt.grid(True)
plt.show()

In [ ]: import tensorflow as tf

# Define the neural network model


neural_network = tf.keras.Sequential([
tf.keras.layers.Dense(64, input_shape=(X_train_scaled.shape[1],), activation='relu'),
tf.keras.layers.BatchNormalization(), # Apply batch normalization after the first dense
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.BatchNormalization(), # Apply batch normalization after the second dense
tf.keras.layers.Dense(1) # Output layer for predicting a continuous value
])

# Compile the model with the Adam optimizer and mean squared error loss
neural_network.compile(optimizer='adam', loss='mean_squared_error')

# Fit the model to the training data with validation split


training_history = neural_network.fit(X_train_scaled, y_train, epochs=50, batch_size=32, valid

# Predict values on the test set


predicted_values_nn = neural_network.predict(X_test_scaled)

# Calculate and print the Mean Squared Error for the model
mean_squared_error_nn = mean_squared_error(y_test, predicted_values_nn)
print(f"Mean Squared Error (NN): {mean_squared_error_nn}")
/usr/local/lib/python3.10/dist-packages/keras/src/layers/core/dense.py:87: UserWarning: Do not
pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer us
ing an `Input(shape)` object as the first layer in the model instead.
super().__init__(activity_regularizer=activity_regularizer, **kwargs)
Epoch 1/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 3s 4ms/step - loss: 47668.7617 - val_loss: 51688.6797
Epoch 2/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - loss: 64383.8750 - val_loss: 46871.4219
Epoch 3/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - loss: 33989.1836 - val_loss: 41950.8047
Epoch 4/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 40321.8438 - val_loss: 37491.6797
Epoch 5/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 35579.7109 - val_loss: 31717.7617
Epoch 6/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 29560.2715 - val_loss: 28373.3945
Epoch 7/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - loss: 34109.1094 - val_loss: 25534.1855
Epoch 8/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 51172.4766 - val_loss: 20007.9590
Epoch 9/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - loss: 35456.3477 - val_loss: 17684.2188
Epoch 10/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 27014.1738 - val_loss: 16791.4980
Epoch 11/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 25342.3340 - val_loss: 15307.5625
Epoch 12/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 19282.0391 - val_loss: 13753.0234
Epoch 13/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 32910.4805 - val_loss: 9830.8936
Epoch 14/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 28342.3164 - val_loss: 8664.4775
Epoch 15/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 21314.8379 - val_loss: 7692.1582
Epoch 16/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 29441.5254 - val_loss: 5153.7827
Epoch 17/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - loss: 20392.1309 - val_loss: 5785.3110
Epoch 18/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 30496.3496 - val_loss: 3993.5117
Epoch 19/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - loss: 27584.6875 - val_loss: 3594.0852
Epoch 20/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 20475.7793 - val_loss: 3755.9656
Epoch 21/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - loss: 61953.0586 - val_loss: 1671.9246
Epoch 22/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - loss: 26770.6309 - val_loss: 2593.4043
Epoch 23/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 10425.8662 - val_loss: 2277.5725
Epoch 24/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 19270.7734 - val_loss: 1250.8818
Epoch 25/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 14116.5947 - val_loss: 1505.1302
Epoch 26/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 14105.9600 - val_loss: 1167.1968
Epoch 27/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 25164.5859 - val_loss: 2049.8232
Epoch 28/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - loss: 26295.7676 - val_loss: 2879.0378
Epoch 29/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - loss: 28493.4883 - val_loss: 1048.2556
Epoch 30/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 17539.0742 - val_loss: 4008.9102
Epoch 31/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step - loss: 8479.7100 - val_loss: 1161.5667
Epoch 32/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 15669.1748 - val_loss: 1503.3073
Epoch 33/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 28940.2637 - val_loss: 1834.2437
Epoch 34/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 13616.1494 - val_loss: 1704.3368
Epoch 35/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 5860.9961 - val_loss: 2200.2449
Epoch 36/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 5ms/step - loss: 16617.9785 - val_loss: 4047.9802
Epoch 37/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 32050.4023 - val_loss: 2532.6729
Epoch 38/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - loss: 16179.7148 - val_loss: 4513.4199
Epoch 39/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - loss: 21568.2441 - val_loss: 5153.5933
Epoch 40/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 24783.0625 - val_loss: 5456.6978
Epoch 41/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 15170.2783 - val_loss: 1733.7618
Epoch 42/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 32876.7031 - val_loss: 1131.5521
Epoch 43/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 2ms/step - loss: 14524.8955 - val_loss: 1701.3094
Epoch 44/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 0s 2ms/step - loss: 9640.7227 - val_loss: 4312.9932
Epoch 45/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 15651.7090 - val_loss: 2375.5364
Epoch 46/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 14777.8525 - val_loss: 3953.7329
Epoch 47/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 4ms/step - loss: 5551.8149 - val_loss: 4933.4780
Epoch 48/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 6877.3472 - val_loss: 2246.7551
Epoch 49/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 14761.7148 - val_loss: 8128.3403
Epoch 50/50
162/162 ━━━━━━━━━━━━━━━━━━━━ 1s 3ms/step - loss: 13291.8027 - val_loss: 6345.6904
51/51 ━━━━━━━━━━━━━━━━━━━━ 0s 3ms/step
Mean Squared Error: 6065.888140869391

In [ ]: # Calculate R-squared for the model predictions


r_squared_nn = r2_score(y_test, predicted_values_nn)
print(f"R-squared (NN): {r_squared_nn}")

R-squared: 0.7867673897551717

In [ ]: import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset

In [ ]: import torch
from torch.utils.data import DataLoader, TensorDataset

# Convert NumPy arrays to PyTorch tensors with appropriate data types


X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train.values, dtype=torch.float32).unsqueeze(1) # Reshape tar
y_test_tensor = torch.tensor(y_test.values, dtype=torch.float32).unsqueeze(1)

# Create datasets and dataloaders for training and testing


train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [ ]: import torch
import torch.nn as nn

class FeedForwardNN(nn.Module):
def __init__(self):
super(FeedForwardNN, self).__init__()
# Define the network layers
self.fc1 = nn.Linear(X_train_tensor.shape[1], 64)
self.bn1 = nn.BatchNorm1d(64)
self.fc2 = nn.Linear(64, 64)
self.bn2 = nn.BatchNorm1d(64)
self.fc_out = nn.Linear(64, 1)

def forward(self, x):


# Forward pass through the network
x = torch.relu(self.bn1(self.fc1(x)))
x = torch.relu(self.bn2(self.fc2(x)))
x = self.fc_out(x)
return x

# Create an instance of the model


model_instance = FeedForwardNN()

In [ ]: import torch.optim as optim

# Set up the loss function and optimizer


loss_function = nn.MSELoss() # Mean Squared Error for regression tasks
optimizer = optim.Adam(model_instance.parameters(), lr=0.001)

In [ ]: # Training the neural network model


num_epochs = 50
for epoch in range(num_epochs):
model_instance.train() # Set model to training mode
total_loss = 0.0
for batch_inputs, batch_targets in train_loader:
optimizer.zero_grad() # Reset gradients
predictions = model_instance(batch_inputs) # Perform forward pass
loss = loss_function(predictions, batch_targets) # Calculate loss
loss.backward() # Compute gradients
optimizer.step() # Update model parameters

total_loss += loss.item()

# Print average loss for the epoch


print(f'Epoch {epoch+1}/{num_epochs}, Average Loss: {total_loss / len(train_loader):.4f}'
Epoch 1/50, Loss: 51424.657269770556
Epoch 2/50, Loss: 48310.33356105691
Epoch 3/50, Loss: 44932.61543251264
Epoch 4/50, Loss: 41313.72389772623
Epoch 5/50, Loss: 39324.14825605638
Epoch 6/50, Loss: 36726.14981592764
Epoch 7/50, Loss: 35337.64955350668
Epoch 8/50, Loss: 34768.07926850272
Epoch 9/50, Loss: 33027.254523400035
Epoch 10/50, Loss: 31722.777753622224
Epoch 11/50, Loss: 30838.23672727075
Epoch 12/50, Loss: 30333.480208746278
Epoch 13/50, Loss: 29343.359767498358
Epoch 14/50, Loss: 29310.228390079912
Epoch 15/50, Loss: 28139.486955699354
Epoch 16/50, Loss: 27291.915266886797
Epoch 17/50, Loss: 26410.431213529984
Epoch 18/50, Loss: 26361.390899658203
Epoch 19/50, Loss: 26027.35403684106
Epoch 20/50, Loss: 25200.32362018245
Epoch 21/50, Loss: 25254.22382528475
Epoch 22/50, Loss: 24776.36201099358
Epoch 23/50, Loss: 25340.636858647413
Epoch 24/50, Loss: 23679.249445395893
Epoch 25/50, Loss: 24117.204031160563
Epoch 26/50, Loss: 23072.884546261022
Epoch 27/50, Loss: 22699.725368084295
Epoch 28/50, Loss: 22319.26638869484
Epoch 29/50, Loss: 21812.148398522102
Epoch 30/50, Loss: 21496.75541520827
Epoch 31/50, Loss: 21163.72158677507
Epoch 32/50, Loss: 21015.17931615244
Epoch 33/50, Loss: 20904.659133457902
Epoch 34/50, Loss: 20409.528865398748
Epoch 35/50, Loss: 20188.139873542408
Epoch 36/50, Loss: 19548.72394433352
Epoch 37/50, Loss: 18877.12769566904
Epoch 38/50, Loss: 18564.116300073
Epoch 39/50, Loss: 17895.596304260856
Epoch 40/50, Loss: 18856.125500669576
Epoch 41/50, Loss: 18531.74821426845
Epoch 42/50, Loss: 17332.796170376314
Epoch 43/50, Loss: 17418.803755354173
Epoch 44/50, Loss: 17068.957090774384
Epoch 45/50, Loss: 16144.931223501073
Epoch 46/50, Loss: 15548.156839956151
Epoch 47/50, Loss: 15317.066974904277
Epoch 48/50, Loss: 15231.355718631556
Epoch 49/50, Loss: 14943.487428721815
Epoch 50/50, Loss: 14945.049020597251

In [ ]: # Set the model to evaluation mode


model_instance.eval()
predicted_values_test = []

# Disable gradient computation for evaluation


with torch.no_grad():
for batch_inputs, _ in test_loader:
predictions = model_instance(batch_inputs)
predicted_values_test.append(predictions)

# Concatenate the predictions and convert them to numpy array


predicted_values_test = torch.cat(predicted_values_test).numpy()
# Compute and display the evaluation metrics
test_mse = mean_squared_error(y_test, predicted_values_test)
print(f'Mean Squared Error on Test Data: {test_mse:.4f}')
test_r2 = r2_score(y_test, predicted_values_test)
print(f"R-squared on Test Data: {test_r2:.4f}")

Mean Squared Error on Test Set: 2675.018969037517


R-squared on Test Set: 0.9059657441786345

You might also like