Normal Abnormal Ear - Ipynb - Colab

Download as pdf or txt
Download as pdf or txt
You are on page 1of 10

‫ م‬9:49 2024/‫‏‬9/‫‏‬19 normal abnormal ear.

ipynb - Colab

# IMPORTANT: RUN THIS CELL IN ORDER TO IMPORT YOUR KAGGLE DATA SOURCES
# TO THE CORRECT LOCATION (/kaggle/input) IN YOUR NOTEBOOK,
# THEN FEEL FREE TO DELETE THIS CELL.
# NOTE: THIS NOTEBOOK ENVIRONMENT DIFFERS FROM KAGGLE'S PYTHON
# ENVIRONMENT SO THERE MAY BE MISSING LIBRARIES USED BY YOUR
# NOTEBOOK.

import os
import sys
from tempfile import NamedTemporaryFile
from urllib.request import urlopen
from urllib.parse import unquote, urlparse
from urllib.error import HTTPError
from zipfile import ZipFile
import tarfile
import shutil

CHUNK_SIZE = 40960
DATA_SOURCE_MAPPING = 'otoscopedata:https%3A%2F%2Fstorage.googleapis.com%2Fkaggle-data-sets%2F979822%2F165

KAGGLE_INPUT_PATH='/kaggle/input'
KAGGLE_WORKING_PATH='/kaggle/working'
KAGGLE_SYMLINK='kaggle'

!umount /kaggle/input/ 2> /dev/null


shutil.rmtree('/kaggle/input', ignore_errors=True)
os.makedirs(KAGGLE_INPUT_PATH, 0o777, exist_ok=True)
os.makedirs(KAGGLE_WORKING_PATH, 0o777, exist_ok=True)

try:
os.symlink(KAGGLE_INPUT_PATH, os.path.join("..", 'input'), target_is_directory=True)
except FileExistsError:
pass
try:
os.symlink(KAGGLE_WORKING_PATH, os.path.join("..", 'working'), target_is_directory=True)
except FileExistsError:
pass

for data_source_mapping in DATA_SOURCE_MAPPING.split(','):


directory, download_url_encoded = data_source_mapping.split(':')
download_url = unquote(download_url_encoded)
filename = urlparse(download_url).path
destination_path = os.path.join(KAGGLE_INPUT_PATH, directory)
try:
with urlopen(download_url) as fileres, NamedTemporaryFile() as tfile:
total_length = fileres.headers['content-length']
print(f'Downloading {directory}, {total_length} bytes compressed')
dl = 0
data = fileres.read(CHUNK_SIZE)
while len(data) > 0:
dl += len(data)
tfile.write(data)
done = int(50 * dl / int(total_length))
sys.stdout.write(f"\r[{'=' * done}{' ' * (50-done)}] {dl} bytes downloaded")
sys.stdout.flush()
data = fileres.read(CHUNK_SIZE)
if filename.endswith('.zip'):
with ZipFile(tfile) as zfile:
zfile.extractall(destination_path)
else:
with tarfile.open(tfile.name) as tarfile:
tarfile.extractall(destination path)
https://colab.research.google.com/drive/1kwS4e4gKmjjoqfW7iEUSd0qw_vdzCqtW#scrollTo=FiRJovSrlyGU&printMode=true 1/10
‫ م‬9:49 2024/‫‏‬9/‫‏‬19 normal abnormal ear.ipynb - Colab
( _p )
print(f'\nDownloaded and uncompressed: {directory}')
except HTTPError as e:
print(f'Failed to load (likely expired) {download_url} to path {destination_path}')
continue
except OSError as e:
print(f'Failed to load {download_url} to path {destination_path}')
continue

print('Data source import complete.')

Downloading otoscopedata, 201519065 bytes compressed


[==================================================] 201519065 bytes downloaded
Downloaded and uncompressed: otoscopedata
Data source import complete.

# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra


import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory


# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input

import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
# for filename in filenames:
# print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output whe
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current se

import shutil
import tensorflow as tf

from tensorflow.keras.preprocessing.image import ImageDataGenerator

from tensorflow.keras.models import Sequential


from tensorflow.keras.layers import Conv2D, MaxPool2D, Flatten, Dense, BatchNormalization, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.callbacks import ModelCheckpoint, EarlyStopping

from tensorflow.keras.applications.resnet50 import ResNet50


from tensorflow.keras.models import Model, save_model

import seaborn as sns


import matplotlib.pyplot as plt
from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

#os.makedirs('/kaggle/working/dataset/normal')
#os.makedirs('/kaggle/working/dataset/abnormal')
import os
# Added exist_ok=True to avoid the error if the directories already exist
os.makedirs('/kaggle/working/dataset/normal', exist_ok=True)
os.makedirs('/kaggle/working/dataset/abnormal', exist_ok=True)

https://colab.research.google.com/drive/1kwS4e4gKmjjoqfW7iEUSd0qw_vdzCqtW#scrollTo=FiRJovSrlyGU&printMode=true 2/10
‫ م‬9:49 2024/‫‏‬9/‫‏‬19 normal abnormal ear.ipynb - Colab
path = "/kaggle/input/otoscopedata/tympanic_membrane_dataset/normal"
dest_path_root = "/kaggle/working/dataset/normal"
# Iterate over the subdirectories
for root, dirs, files in os.walk(path):
# Iterate over the files in each subdirectory
for file in files:
# Construct the source and destination paths
src_path = os.path.join(root, file)
dest_path = os.path.join(dest_path_root, file)

# Move the file to the destination path


shutil.copyfile(src_path, dest_path)

path = "/kaggle/input/otoscopedata/tympanic_membrane_dataset/abnormal"
dest_path_root = "/kaggle/working/dataset/abnormal"
# Iterate over the subdirectories
for root, dirs, files in os.walk(path):
# Iterate over the files in each subdirectory
for file in files:
# Construct the source and destination paths
src_path = os.path.join(root, file)
dest_path = os.path.join(dest_path_root, file)

# Move the file to the destination path


shutil.copyfile(src_path, dest_path)

IMG_SIZE = 224
root = "/kaggle/working/dataset"
batch_size = 32

img_gen = ImageDataGenerator(
rescale=1./255,
validation_split=0.3
)

train_datagen = img_gen.flow_from_directory(
directory=root,
batch_size=batch_size,
target_size=(IMG_SIZE,IMG_SIZE),
class_mode="binary",
subset="training",
shuffle=True,
seed=42,
)

test_datagen = img_gen.flow_from_directory(
directory=root,
batch_size=batch_size,
target_size=(IMG_SIZE,IMG_SIZE),
class_mode="binary",
subset="validation",
shuffle=False,
seed=42,
)

Found 670 images belonging to 2 classes.


Found 286 images belonging to 2 classes.

from tensorflow.keras.optimizers import SGD,RMSprop


model = ResNet50(include_top=False, weights='imagenet', input_shape=[224,224,3])
for layer in model.layers[:-8]:

https://colab.research.google.com/drive/1kwS4e4gKmjjoqfW7iEUSd0qw_vdzCqtW#scrollTo=FiRJovSrlyGU&printMode=true 3/10
‫ م‬9:49 2024/‫‏‬9/‫‏‬19 normal abnormal ear.ipynb - Colab
layer.trainable = False

x = Conv2D(128, (3, 3), activation='relu')(model.output)


x = MaxPool2D(pool_size=(2, 2))(x)
x = Flatten()(x)
x = Dense(128,activation='relu')(x)
x = Dropout(0.2)(x)
x = Dense(1,activation='sigmoid')(x)

model = Model(inputs=model.input, outputs=x)

model.compile(
optimizer=RMSprop(learning_rate=0.01),
loss='binary_crossentropy',
metrics = ['accuracy']
)

batch_size = batch_size

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_w


94765736/94765736 ━━━━━━━━━━━━━━━━━━━━ 1s 0us/step

early_stopping = EarlyStopping(monitor= 'val_loss', patience=3)

history = model.fit(x=train_datagen,
steps_per_epoch=train_datagen.samples // batch_size,
validation_data=test_datagen,
validation_steps = test_datagen.samples // batch_size,
epochs=5,
callbacks=[early_stopping])

Epoch 1/5
20/20 ━━━━━━━━━━━━━━━━━━━━ 211s 11s/step - accuracy: 0.5770 - loss: 0.6819 - val_accuracy: 0.4922 -
Epoch 2/5
20/20 ━━━━━━━━━━━━━━━━━━━━ 13s 321ms/step - accuracy: 0.6562 - loss: 0.6679 - val_accuracy: 0.0000e+
Epoch 3/5
20/20 ━━━━━━━━━━━━━━━━━━━━ 240s 10s/step - accuracy: 0.5878 - loss: 0.6796 - val_accuracy: 0.4922 -
Epoch 4/5
20/20 ━━━━━━━━━━━━━━━━━━━━ 13s 377ms/step - accuracy: 0.5000 - loss: 0.7027 - val_accuracy: 0.0000e+

save_model(model,"ResNet50.h5")

WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_mode

def plot_accuracy(history):
plt.plot(history.history['accuracy'],label='train accuracy')
plt.plot(history.history['val_accuracy'],label='validation accuracy')
plt.title('ResNet50 accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(loc='best')
plt.savefig('ResNet50-Accuracy')
plt.show()

def plot_loss(history):
plt.plot(history.history['loss'],label="train loss")
plt.plot(history.history['val_loss'],label="validation loss")
plt.title('ResNet50 loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')

https://colab.research.google.com/drive/1kwS4e4gKmjjoqfW7iEUSd0qw_vdzCqtW#scrollTo=FiRJovSrlyGU&printMode=true 4/10
‫ م‬9:49 2024/‫‏‬9/‫‏‬19 normal abnormal ear.ipynb - Colab
plt.legend(loc='best')
plt.savefig('ResNet50-Loss')
plt.show()

plot_accuracy(history)
plot_loss(history)

labels=test_datagen.classes
y_pred=model.predict(test_datagen)
prediction=tf.math.round(y_pred).numpy()
prediction=prediction.argmax(axis=1)

9/9 ━━━━━━━━━━━━━━━━━━━━ 82s 8s/step

https://colab.research.google.com/drive/1kwS4e4gKmjjoqfW7iEUSd0qw_vdzCqtW#scrollTo=FiRJovSrlyGU&printMode=true 5/10
‫ م‬9:49 2024/‫‏‬9/‫‏‬19 normal abnormal ear.ipynb - Colab
def print_confusion_matrix(real,predicted):
cmap="Blues"
cm_plot_labels = ['normal','abnormal']
cm = confusion_matrix(y_true=labels, y_pred=prediction)
df_cm = pd.DataFrame(cm,cm_plot_labels,cm_plot_labels)
sns.set(font_scale=1.1) # for label size
plt.figure(figsize = (15,10))
s=sns.heatmap(df_cm, annot=True,fmt='g',cmap=cmap) # font size
plt.ylabel('True label')
plt.xlabel('Predicted label')
plt.savefig('ResNet50 confusion_matrix.png')
plt.show()

print_confusion_matrix(labels,prediction)

accuracy_score(labels,prediction)

0.4405594405594406

second model

import os
import cv2
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split

https://colab.research.google.com/drive/1kwS4e4gKmjjoqfW7iEUSd0qw_vdzCqtW#scrollTo=FiRJovSrlyGU&printMode=true 6/10
‫ م‬9:49 2024/‫‏‬9/‫‏‬19 normal abnormal ear.ipynb - Colab
from sklearn.metrics import accuracy_score, confusion_matrix
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.layers import Dense, Flatten, Dropout, GlobalAveragePooling2D
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
import seaborn as sns

# Set random seed for reproducibility


np.random.seed(42)

# Load images and labels


def load_data(path_normal, path_abnormal):
X, y = [], []

for folder, label in zip([path_normal, path_abnormal], [0, 1]):


for file in os.listdir(folder):
image_path = os.path.join(folder, file)
# Added error handling for invalid images
try:
img = cv2.imread(image_path)
if img is not None:
img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
img = cv2.resize(img, (224, 224)) # Resize to match model input
X.append(img)
y.append(label)
else:
print(f"Failed to load image: {image_path}")
except Exception as e:
print(f"Error loading image: {image_path} - {e}")

return np.array(X), np.array(y)

# Define paths
path_normal = '/kaggle/input/otoscopedata/tympanic_membrane_dataset/normal'
path_abnormal = '/kaggle/input/otoscopedata/tympanic_membrane_dataset/abnormal'

# Load data
X, y = load_data(path_normal, path_abnormal)

# Split into training and testing sets


X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)

# Normalize images
X_train = X_train / 255.0
X_test = X_test / 255.0

print("Training set shape:", X_train.shape)


print("Test set shape:", X_test.shape)

Failed to load image: /kaggle/input/otoscopedata/tympanic_membrane_dataset/abnormal/earVentilationTu


Failed to load image: /kaggle/input/otoscopedata/tympanic_membrane_dataset/abnormal/foreignObjectEar
Failed to load image: /kaggle/input/otoscopedata/tympanic_membrane_dataset/abnormal/pseudoMembranes
Failed to load image: /kaggle/input/otoscopedata/tympanic_membrane_dataset/abnormal/tympanoskleros
Failed to load image: /kaggle/input/otoscopedata/tympanic_membrane_dataset/abnormal/otitisexterna
Failed to load image: /kaggle/input/otoscopedata/tympanic_membrane_dataset/abnormal/aom
Failed to load image: /kaggle/input/otoscopedata/tympanic_membrane_dataset/abnormal/csom
Failed to load image: /kaggle/input/otoscopedata/tympanic_membrane_dataset/abnormal/earwax
Training set shape: (428, 224, 224, 3)
Test set shape: (107, 224, 224, 3)

https://colab.research.google.com/drive/1kwS4e4gKmjjoqfW7iEUSd0qw_vdzCqtW#scrollTo=FiRJovSrlyGU&printMode=true 7/10
‫ م‬9:49 2024/‫‏‬9/‫‏‬19 normal abnormal ear.ipynb - Colab
train_datagen = ImageDataGenerator(
rotation_range=20,
width_shift_range=0.1,
height_shift_range=0.1,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=True,
fill_mode='nearest'
)

test_datagen = ImageDataGenerator(rescale=1./255)

base_model = EfficientNetB0(weights='imagenet', include_top=False, input_shape=(224, 224, 3))


x = base_model.output
x = GlobalAveragePooling2D()(x) # Global average pooling
x = Dropout(0.5)(x) # Dropout layer for regularization
x = Dense(1, activation='sigmoid')(x) # Output layer for binary classification

model = Model(inputs=base_model.input, outputs=x)

# Freeze the base model layers


for layer in base_model.layers:
layer.trainable = False

model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

Downloading data from https://storage.googleapis.com/keras-applications/efficientnetb0_notop.h5


16705208/16705208 ━━━━━━━━━━━━━━━━━━━━ 0s 0us/step

early_stopping = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)


model_checkpoint = ModelCheckpoint('best_model.keras', save_best_only=True) # Changed the file extension

history = model.fit(
train_datagen.flow(X_train, y_train, batch_size=32),
validation_data=test_datagen.flow(X_test, y_test),
epochs=20,
steps_per_epoch=len(X_train) // 32,
validation_steps=len(X_test) // 32,
callbacks=[early_stopping, model_checkpoint],
verbose=1
)

Epoch 1/20
/usr/local/lib/python3.10/dist-packages/keras/src/trainers/data_adapters/py_dataset_adapter.py:121:
self._warn_if_super_not_called()
13/13 ━━━━━━━━━━━━━━━━━━━━ 63s 3s/step - accuracy: 0.7008 - loss: 0.6481 - val_accuracy: 1.0000 - va
Epoch 2/20
1/13 ━━━━━━━━━━━━━━━━━━━━ 24s 2s/step - accuracy: 1.0000 - loss: 0.4813/usr/lib/python3.10/contextl
self.gen.throw(typ, value, traceback)
13/13 ━━━━━━━━━━━━━━━━━━━━ 4s 131ms/step - accuracy: 1.0000 - loss: 0.4813 - val_accuracy: 1.0000 -
Epoch 3/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 44s 3s/step - accuracy: 1.0000 - loss: 0.4214 - val_accuracy: 1.0000 - va
Epoch 4/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 3s 102ms/step - accuracy: 1.0000 - loss: 0.3066 - val_accuracy: 1.0000 -
Epoch 5/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 77s 3s/step - accuracy: 1.0000 - loss: 0.2792 - val_accuracy: 1.0000 - va
Epoch 6/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 4s 182ms/step - accuracy: 1.0000 - loss: 0.1993 - val_accuracy: 1.0000 -
Epoch 7/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 43s 3s/step - accuracy: 1.0000 - loss: 0.1935 - val_accuracy: 1.0000 - va
Epoch 8/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 3s 162ms/step - accuracy: 1.0000 - loss: 0.1340 - val_accuracy: 1.0000 -

https://colab.research.google.com/drive/1kwS4e4gKmjjoqfW7iEUSd0qw_vdzCqtW#scrollTo=FiRJovSrlyGU&printMode=true 8/10
‫ م‬9:49 2024/‫‏‬9/‫‏‬19 normal abnormal ear.ipynb - Colab
Epoch 9/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 47s 4s/step - accuracy: 1.0000 - loss: 0.1407 - val_accuracy: 1.0000 - va
Epoch 10/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 3s 100ms/step - accuracy: 1.0000 - loss: 0.1222 - val_accuracy: 1.0000 -
Epoch 11/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 45s 3s/step - accuracy: 1.0000 - loss: 0.1072 - val_accuracy: 1.0000 - va
Epoch 12/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 3s 117ms/step - accuracy: 1.0000 - loss: 0.0958 - val_accuracy: 1.0000 -
Epoch 13/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 77s 3s/step - accuracy: 1.0000 - loss: 0.0877 - val_accuracy: 1.0000 - va
Epoch 14/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 3s 106ms/step - accuracy: 1.0000 - loss: 0.0719 - val_accuracy: 1.0000 -
Epoch 15/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 42s 3s/step - accuracy: 1.0000 - loss: 0.0728 - val_accuracy: 1.0000 - va
Epoch 16/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 3s 106ms/step - accuracy: 1.0000 - loss: 0.0680 - val_accuracy: 1.0000 -
Epoch 17/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 79s 3s/step - accuracy: 1.0000 - loss: 0.0606 - val_accuracy: 1.0000 - va
Epoch 18/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 15s 989ms/step - accuracy: 1.0000 - loss: 0.0556 - val_accuracy: 1.0000 -
Epoch 19/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 67s 3s/step - accuracy: 1.0000 - loss: 0.0507 - val_accuracy: 1.0000 - va
Epoch 20/20
13/13 ━━━━━━━━━━━━━━━━━━━━ 13s 799ms/step - accuracy: 1.0000 - loss: 0.0485 - val_accuracy: 1.0000 -

save_model(model,"EfficientNetB0.h5")

WARNING:absl:You are saving your model as an HDF5 file via `model.save()` or `keras.saving.save_mode

def plot_accuracy(history):
plt.plot(history.history['accuracy'],label='train accuracy')
plt.plot(history.history['val_accuracy'],label='validation accuracy')
plt.title('EfficientNetB0 accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(loc='best')
plt.savefig('EfficientNetB0-Accuracy')
plt.show()

def plot_loss(history):
plt.plot(history.history['loss'],label="train loss")
plt.plot(history.history['val_loss'],label="validation loss")
plt.title('EfficientNetB0 loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(loc='best')
plt.savefig('EfficientNetB0-Loss')
plt.show()

plot_accuracy(history)
plot_loss(history)

https://colab.research.google.com/drive/1kwS4e4gKmjjoqfW7iEUSd0qw_vdzCqtW#scrollTo=FiRJovSrlyGU&printMode=true 9/10
‫ م‬9:49 2024/‫‏‬9/‫‏‬19 normal abnormal ear.ipynb - Colab

https://colab.research.google.com/drive/1kwS4e4gKmjjoqfW7iEUSd0qw_vdzCqtW#scrollTo=FiRJovSrlyGU&printMode=true 10/10

You might also like