0% found this document useful (0 votes)

2 views13 pages

Code and Output of Cancer Detection Model

The document contains a Python script for a classification pipeline using Support Vector Machines (SVM) on tumor sample data. It includes a Data class for managing data input and preprocessing, feature selection methods, and functions for training and evaluating the model. The script also provides visualization of feature contributions and reports accuracy metrics for the classification results.

Uploaded by

jaikarabhishek12599

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

0% found this document useful (0 votes)

2 views13 pages

Code and Output of Cancer Detection Model

Uploaded by

jaikarabhishek12599

We take content rights seriously. If you suspect this is your content, claim it here.

Available Formats

Download as PDF, TXT or read online on Scribd

You are on page 1/ 13

Code:

#!/usr/bin/env python
import numpy as np
from tqdm import tqdm
import csv, logging, re
from sklearn.svm import SVC
from collections import Counter
from sklearn import preprocessing
import matplotlib.pyplot as plt
from sklearn.feature_selection import mutual_info_classif, f_classif
from sklearn.feature_selection import SelectKBest, SelectPercentile
from sklearn.metrics import precision_score, recall_score,
accuracy_score, classification_report

class Data(object):
"""Class responsible for interfacing with our data, e.g., getting the
data, stats, etc."""

def _init_(self, res_path, cls_path, dataType):

self.dataType = dataType
self._get_classes(cls_path)
self._get_tumor_samples(res_path)
self._clean()

def _get_classes(self, path):

print(f"Getting {self.dataType} classes")
with open(path, 'r') as f:
reader = [l.strip() for l in tqdm(f.readlines())]
self.number_of_samples = int(reader[0].split(' ')[0])
self.number_of_classes = int(reader[0].split(' ')[1])
self.classes = reader[1].split(' ')
self.Y = np.array(reader[2].split(' '))

def _get_tumor_samples(self, path):

print(f"Getting {self.dataType} samples")
with open(path, 'r') as inputFile:
lines = [l.strip().split('\t') for l in tqdm(inputFile.readlines())]
data = np.array(lines[3:], dtype=object)
self.feature_names = data[:, 1]
data = data[:, 2:]
data = np.delete(data, list(range(1, data.shape[1], 2)), axis=1)
self.X = data.astype(float).T

def _get_binary(self, name):

"""Returns binary labels for a given class name."""
try:
index = self.classes.index(name)
return np.array([1 if c == str(index) else 0 for c in self.Y])
except ValueError:
return np.zeros_like(self.Y, dtype=int)

def _describe(self):
print(f"\n------ Data {self.dataType} Description -----")
print(f"X len = {len(self.X)}")
print(f"Y len = {len(self.Y)}")
print(f"# Samples = {self.number_of_samples}")
print(f"# Classes = {self.number_of_classes}")
print("---------------------------------\n")

def _clean(self):
"""Remove invalid labels (e.g., class 14 if it is not needed)."""
invalid_indices = np.where(self.Y == '14')[0]
if len(invalid_indices) > 0:
print("Removing invalid entries...")
self.Y = np.delete(self.Y, invalid_indices, 0)
self.X = np.delete(self.X, invalid_indices, 0)

def feature_selection(X, y, k_val):

"""Selects the top k best features using ANOVA F-score."""
best_indices = SelectKBest(f_classif, k=k_val).fit(X,
y).get_support(indices=True)
return best_indices
def plot_coefficients(classifier, feature_names, class_name,
top_features=20):
"""Plots the top features contributing to classification."""
coef = classifier.coef_[0]
top_positive_coefficients = np.argsort(coef)[-top_features:]
top_negative_coefficients = np.argsort(coef)[:top_features]
top_coefficients = np.hstack([top_negative_coefficients,
top_positive_coefficients])

# Create plot
plt.figure(figsize=(30, 15))
colors = ['#cccccc' if c < 0 else 'teal' for c in coef[top_coefficients]]
plt.bar(np.arange(2 * top_features), coef[top_coefficients],
color=colors)
feature_names = np.array(feature_names)[top_coefficients]
plt.xticks(np.arange(1, 1 + 2 * top_features), feature_names,
rotation='vertical', ha='right')
plt.savefig(f"graphs/plot_{class_name}.png")

def run_test(train, test):

"""Runs the classification pipeline."""
train._describe()
test._describe()

normalizer = preprocessing.StandardScaler().fit(train.X)
train.X = normalizer.transform(train.X)
test.X = normalizer.transform(test.X)

y_train = train.Y.astype(int)
y_test = test.Y.astype(int)
X_train = train.X
X_test = test.X

accuracy = []
for x in range(1, 51):
best_features = set()
for cls in train.classes:
binary_labels = train._get_binary(cls)
features = feature_selection(train.X, binary_labels, x)
best_features.update(features)

best_features = list(best_features)
X_train_selected = train.X[:, best_features]
X_test_selected = test.X[:, best_features]

model = SVC(kernel="linear", probability=True)

model.fit(X_train_selected, y_train)
results = model.predict(X_test_selected)

acc = accuracy_score(y_test, results)

accuracy.append(acc)

print(f"Iteration {x}: Accuracy = {acc:.4f}")

print(classification_report(y_test, results))

print("Max Accuracy:", np.max(accuracy))

print("Best feature count:", np.argmax(accuracy) + 1)

if _name_ == '_main_':
logging.basicConfig(level=logging.INFO)

train = Data('data/Training_res.txt', 'data/Training_cls.txt', 'train')

test = Data('data/Test_res.txt', 'data/Test_cls.txt', 'test')

run_test(train, test)

Output:
Datasets:
Bladder

Breast

Cns
Colorectal

Leukemia
Lung

Lymphoma
Melanoma

Mesotheliona
Ovary

Pancreas
Prostate

Renal
Uterus

Trex User Guide
No ratings yet
Trex User Guide
335 pages
Pattern Recognition Lab
No ratings yet
Pattern Recognition Lab
24 pages
Mercedes-Benz Greener Manufacturing Ai
0% (1)
Mercedes-Benz Greener Manufacturing Ai
16 pages
ECON2206 Assignment 2 William Chau z3376203
No ratings yet
ECON2206 Assignment 2 William Chau z3376203
5 pages
IT, Support Company Profile
No ratings yet
IT, Support Company Profile
15 pages
16BCB0126 VL2018195002535 Pe003
No ratings yet
16BCB0126 VL2018195002535 Pe003
40 pages
Brain Tumor Multi-Classification With PSO: Import As Import As Import
No ratings yet
Brain Tumor Multi-Classification With PSO: Import As Import As Import
18 pages
DM ML Practical
No ratings yet
DM ML Practical
13 pages
Tensor Flow and Keras Sample Programs
No ratings yet
Tensor Flow and Keras Sample Programs
22 pages
Notebook - Main Code
No ratings yet
Notebook - Main Code
4 pages
All in One
No ratings yet
All in One
13 pages
1
No ratings yet
1
13 pages
LAB-4 Report
No ratings yet
LAB-4 Report
21 pages
ML
No ratings yet
ML
11 pages
Skin Disease Detection Using Transformers
No ratings yet
Skin Disease Detection Using Transformers
35 pages
Deep Learning Perceptron
No ratings yet
Deep Learning Perceptron
10 pages
Skin PRJ
No ratings yet
Skin PRJ
5 pages
ML Lab Manual
No ratings yet
ML Lab Manual
12 pages
Breast Cancer Classification Using DTC
No ratings yet
Breast Cancer Classification Using DTC
1 page
SVM K NN MLP With Sklearn Jupyter NoteBo
No ratings yet
SVM K NN MLP With Sklearn Jupyter NoteBo
22 pages
Nibedita Dehury, 123CE0079, ASSIGNMENT 9
No ratings yet
Nibedita Dehury, 123CE0079, ASSIGNMENT 9
18 pages
ML5 Implementation
No ratings yet
ML5 Implementation
32 pages
ML Experiment WithDataset
No ratings yet
ML Experiment WithDataset
23 pages
ML Lab
No ratings yet
ML Lab
7 pages
Final ML Programs 075005
No ratings yet
Final ML Programs 075005
15 pages
ML 7
No ratings yet
ML 7
6 pages
Aiml Ex 4-7
No ratings yet
Aiml Ex 4-7
8 pages
V
No ratings yet
V
8 pages
ML II Lab
No ratings yet
ML II Lab
5 pages
Assignment #1: K Nearest Neighbor Classifier: Name: Srikanth Mujjiga (Roll No: 2015-50-831
No ratings yet
Assignment #1: K Nearest Neighbor Classifier: Name: Srikanth Mujjiga (Roll No: 2015-50-831
8 pages
Naive
No ratings yet
Naive
5 pages
1 KNN - Jupyter Notebook
No ratings yet
1 KNN - Jupyter Notebook
3 pages
Assignment 2.4.1 Multiclass Classification
No ratings yet
Assignment 2.4.1 Multiclass Classification
5 pages
EX - NO:3: Algorithm
No ratings yet
EX - NO:3: Algorithm
11 pages
Aam Codes
No ratings yet
Aam Codes
8 pages
ML RECORD EX 5,6,7,8,9 (Without Border)
No ratings yet
ML RECORD EX 5,6,7,8,9 (Without Border)
13 pages
Deep Learning Practical Assignment:: Q-1) Code
No ratings yet
Deep Learning Practical Assignment:: Q-1) Code
59 pages
DWDM Lab 3
No ratings yet
DWDM Lab 3
10 pages
ML Short Code - Under Updating
No ratings yet
ML Short Code - Under Updating
4 pages
MLP - Week 5 - MNIST - Perceptron - Ipynb - Colaboratory
No ratings yet
MLP - Week 5 - MNIST - Perceptron - Ipynb - Colaboratory
31 pages
ML Programs
No ratings yet
ML Programs
14 pages
Heart: Our "Goal" Predict The Presence of Heart Disease in The Patient
100% (1)
Heart: Our "Goal" Predict The Presence of Heart Disease in The Patient
73 pages
Machine Learning Lab Manual
No ratings yet
Machine Learning Lab Manual
9 pages
Machine
100% (1)
Machine
45 pages
Shobit Sharma (2124399) ML Lab File PDF
No ratings yet
Shobit Sharma (2124399) ML Lab File PDF
19 pages
ML Journal External
No ratings yet
ML Journal External
14 pages
MlLabManualdocx 2024 09 04 22 02 58
No ratings yet
MlLabManualdocx 2024 09 04 22 02 58
19 pages
B22EE010 Report
No ratings yet
B22EE010 Report
9 pages
EE 559 HW2Code PDF
No ratings yet
EE 559 HW2Code PDF
7 pages
MACHINE LEARNING Manual
No ratings yet
MACHINE LEARNING Manual
36 pages
ML Lab Experiment Shortened With Same Output
No ratings yet
ML Lab Experiment Shortened With Same Output
6 pages
DL Lab 12212039
No ratings yet
DL Lab 12212039
72 pages
Strangers
No ratings yet
Strangers
8 pages
Machine Learning: Supervised /unsupervised
No ratings yet
Machine Learning: Supervised /unsupervised
33 pages
Setup: This Notebook Contains All The Sample Code and Solutions To The Exercises in Chapter 3
No ratings yet
Setup: This Notebook Contains All The Sample Code and Solutions To The Exercises in Chapter 3
30 pages
Knee Osteoarthritis Classification Using Xception, Mobilenet, Attention, Sqeeze and Excitation
No ratings yet
Knee Osteoarthritis Classification Using Xception, Mobilenet, Attention, Sqeeze and Excitation
19 pages
Programs Lab Bca
No ratings yet
Programs Lab Bca
16 pages
Breat Cancer Detection Using Thermograpgy
No ratings yet
Breat Cancer Detection Using Thermograpgy
15 pages
AIML Practical 02 22105A2021
No ratings yet
AIML Practical 02 22105A2021
8 pages
Roll NO 2020
No ratings yet
Roll NO 2020
8 pages
MLLab Manual
No ratings yet
MLLab Manual
24 pages
Ann Experiential Learning
No ratings yet
Ann Experiential Learning
43 pages
Plesiochronous Digital Hierarchy PDH and PDF
No ratings yet
Plesiochronous Digital Hierarchy PDH and PDF
3 pages
Isro Admit Card
No ratings yet
Isro Admit Card
2 pages
STEP 7 - System and Standard Functions For TI-S7-Converter
No ratings yet
STEP 7 - System and Standard Functions For TI-S7-Converter
106 pages
An SAP Consultant - SAP Adobe Form - Steps To Create Simple ADOBE Form and Calling It From ABAP Program
100% (1)
An SAP Consultant - SAP Adobe Form - Steps To Create Simple ADOBE Form and Calling It From ABAP Program
8 pages
Revit Families: A Step-by-Step Introduction: Learning Objectives
No ratings yet
Revit Families: A Step-by-Step Introduction: Learning Objectives
37 pages
Excel Guide in Tamil
No ratings yet
Excel Guide in Tamil
18 pages
"Dhanhar Masala Exim PVT - LTD": Project Report ON
No ratings yet
"Dhanhar Masala Exim PVT - LTD": Project Report ON
5 pages
Ordinal Analysis With An Introduction To Proof Theory Toshiyasu Arai PDF Download
No ratings yet
Ordinal Analysis With An Introduction To Proof Theory Toshiyasu Arai PDF Download
88 pages
JNTUK R20 B Tech CSE 1-2 Computer Organization Unit 3 Reference 2 Notes
No ratings yet
JNTUK R20 B Tech CSE 1-2 Computer Organization Unit 3 Reference 2 Notes
27 pages
SD 438177223 180416 1637 1430
No ratings yet
SD 438177223 180416 1637 1430
1 page
Pre Registration Summary: Declaration
No ratings yet
Pre Registration Summary: Declaration
2 pages
Type-Safe Generic Data Structures in C
No ratings yet
Type-Safe Generic Data Structures in C
11 pages
Evermotion Archmodels Vol 40 PDF
No ratings yet
Evermotion Archmodels Vol 40 PDF
2 pages
A Case For Redundant Arrays of Inexpensive Disks
No ratings yet
A Case For Redundant Arrays of Inexpensive Disks
20 pages
Pro Evolution Soccer 6
No ratings yet
Pro Evolution Soccer 6
9 pages
What Are The Benefits of Buying Verified Soccer Tips
No ratings yet
What Are The Benefits of Buying Verified Soccer Tips
2 pages
Fsae DS
No ratings yet
Fsae DS
2 pages
Shopper's Stop
No ratings yet
Shopper's Stop
15 pages
SG0 001
No ratings yet
SG0 001
87 pages
Oracle® Database Patch 22191349 - Oracle Grid Infrastructure Patch Set Update 12.1.0.2
No ratings yet
Oracle® Database Patch 22191349 - Oracle Grid Infrastructure Patch Set Update 12.1.0.2
7 pages
Different Classification of Computer Architecture
0% (1)
Different Classification of Computer Architecture
5 pages
35c3-9383-Compromising Online Accounts by Cracking Voicemail Systems
No ratings yet
35c3-9383-Compromising Online Accounts by Cracking Voicemail Systems
56 pages
Placement Records 2022
No ratings yet
Placement Records 2022
5 pages
Microsoft Dynamics SL
No ratings yet
Microsoft Dynamics SL
140 pages
3.1 Hadoop Ecosystem
No ratings yet
3.1 Hadoop Ecosystem
48 pages
Certified Professional Exam Preparation - Exercise Reference Guide
No ratings yet
Certified Professional Exam Preparation - Exercise Reference Guide
3 pages
17th Convocation-Important Instructions-1
No ratings yet
17th Convocation-Important Instructions-1
2 pages

Code and Output of Cancer Detection Model

Uploaded by

Code and Output of Cancer Detection Model

Uploaded by

Code:

def _init_(self, res_path, cls_path, dataType):

def _get_classes(self, path):

def _get_tumor_samples(self, path):

def _get_binary(self, name):

def feature_selection(X, y, k_val):

def run_test(train, test):

model = SVC(kernel="linear", probability=True)

acc = accuracy_score(y_test, results)

print(f"Iteration {x}: Accuracy = {acc:.4f}")

print("Max Accuracy:", np.max(accuracy))

train = Data('data/Training_res.txt', 'data/Training_cls.txt', 'train')

You might also like