NF Assighment4

Welcome To Colab - Colab https://colab.research.google.com/#scrollTo=7-debQKryIG3&printMod...
import pandas as pd
# Load the dataset

data = pd.read_csv('annthyroid_21feat_normalised.csv')
# Preprocess the data if needed

# (e.g., handle missing values, scale the features)
# Split the data into features and labels

X = data.drop(columns=['class']) # Features
y = data['class'] # Labels (assuming 'class' column indicates anomaly)
# Instantiate the PCA Model

pca_model = PCAModel()
# Train the model

pca_model.train(X, None, num_features=2) # No validation data needed for PCA
# Compute anomaly scores for the entire dataset

anomaly_scores = pca_model.compute_anomaly_score(X)
# Print or further analyze the anomaly scores

print("Anomaly scores:")
print(anomaly_scores)
Explained variation per principal component: 0.46743936111388296

Anomaly scores:
0 0.048096
1 0.006211
2 0.002051
3 0.001602
4 0.005633
...
7195 0.045009
7196 0.005748
7197 0.045752
7198 0.002172
7199 0.047087
Length: 7200, dtype: float64
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, classification_report
from sklearn.ensemble import RandomForestClassifier
# Load the dataset

1 of 5 4/30/2024, 12:05 PM
# Preprocess the data if needed

# (e.g., handle missing values, scale the features)
# Split the data into features and labels

X = data.drop(columns=['class']) # Features
y = data['class'] # Labels (assuming 'class' column indicates anomaly)
# Split the data into training and validation sets

X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)
# Instantiate and train the model (example with Random Forest)

model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)
# Predict labels on the validation set

y_pred = model.predict(X_val)
# Calculate performance metrics

accuracy = accuracy_score(y_val, y_pred)
f1 = f1_score(y_val, y_pred, average='weighted')
report = classification_report(y_val, y_pred)
# Print or log the metrics

print("Accuracy:", accuracy)
print("F1 Score:", f1)
print("Classification Report:\n", report)
# Optionally, you can include other metrics such as precision, recall, confusion matrix, etc.
Accuracy: 0.9993055555555556
F1 Score: 0.9993036997916849
Classification Report:
precision recall f1-score support
0 1.00 1.00 1.00 1352

1 1.00 0.99 0.99 88
accuracy 1.00 1440

macro avg 1.00 0.99 1.00 1440
weighted avg 1.00 1.00 1.00 1440
import matplotlib.pyplot as plt

import seaborn as sns
from sklearn.metrics import confusion_matrix
# Plot confusion matrix

cm = confusion_matrix(y_val, y_pred)
2 of 5 4/30/2024, 12:05 PM
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Normal', 'Anomaly'], yticklabels
plt.xlabel('Predicted Label')
plt.ylabel('True Label')
plt.title('Confusion Matrix')
plt.show()
# Plot feature importances (if using RandomForestClassifier)

if isinstance(model, RandomForestClassifier):
feature_importances = model.feature_importances_
feature_names = X.columns
sorted_idx = feature_importances.argsort()
plt.figure(figsize=(10, 8))
plt.barh(range(len(sorted_idx)), feature_importances[sorted_idx], align='center')
plt.yticks(range(len(sorted_idx)), [feature_names[i] for i in sorted_idx])
plt.xlabel('Feature Importance')
plt.ylabel('Feature')
plt.title('Feature Importances')
plt.show()
# Plot ROC curve (if applicable)

# Note: For multi-class classification, you may need to use one-vs-rest or one-vs-all strategy
# and calculate ROC curve and AUC for each class separately
# Example:
# from sklearn.metrics import roc_curve, auc
# fpr, tpr, thresholds = roc_curve(y_val, y_pred)
# roc_auc = auc(fpr, tpr)
# plt.figure()
# plt.plot(fpr, tpr, color='darkorange', lw=2, label='ROC curve (area = %0.2f)' % roc_auc)
# plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
# plt.xlim([0.0, 1.0])
# plt.ylim([0.0, 1.05])
# plt.xlabel('False Positive Rate')
# plt.ylabel('True Positive Rate')
# plt.title('Receiver Operating Characteristic (ROC)')
# plt.legend(loc="lower right")
# plt.show()
# Additional plots or charts as needed
3 of 5 4/30/2024, 12:05 PM
4 of 5 4/30/2024, 12:05 PM
5 of 5 4/30/2024, 12:05 PM

NF Assighment4

Uploaded by

Copyright:

Available Formats

NF Assighment4

Uploaded by

Document Information

Original Description:

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

NF Assighment4

Uploaded by

Copyright:

Available Formats

Welcome To Colab - Colab https://colab.research.google.com/#scrollTo=7-debQKryIG3&printMod...

# Load the dataset

# Preprocess the data if needed

# Split the data into features and labels

# Instantiate the PCA Model

# Train the model

# Compute anomaly scores for the entire dataset

# Print or further analyze the anomaly scores

Explained variation per principal component: 0.46743936111388296

# Load the dataset

# Preprocess the data if needed

# Split the data into features and labels

# Split the data into training and validation sets

# Instantiate and train the model (example with Random Forest)

# Predict labels on the validation set

# Calculate performance metrics

# Print or log the metrics

0 1.00 1.00 1.00 1352

accuracy 1.00 1440

import matplotlib.pyplot as plt

# Plot confusion matrix

# Plot feature importances (if using RandomForestClassifier)

# Plot ROC curve (if applicable)

# Additional plots or charts as needed

You might also like