Preparing Environment for creating a Deep learning Model with
Dental Clinic Data
By - Aaditya Balakrishnan
Step 1 Installed Python v 3.11.4
Step 2 Installed Visual Studio code
Step 3 Installed Anaconda installer for jupyter notebook
Step 4 Installed Python extension in vs code
Step 5 Setting up virtual environment
Entered command in terminal
conda create --name yourenvname python=3.11.4
conda env list
conda install ipykernel
conda install pandas
conda install matplotlib
conda install Tensorflow
conda install sklearn
conda install plotly
Imported the given file – DOCS.csv
Sample data
Coding in VS code
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from tensorflow.keras import layers, models
(importing file in csv format)
data = pd.read_csv("DOCS.csv")
print (data.head)
data = data.dropna(how='any')
print(data.shape)
(Changing values in number format by encoding label)
from sklearn.preprocessing import LabelEncoder
le=LabelEncoder()
data['DOCTORNAME']=le.fit_transform(data['DOCTORNAME'])
data['PRACTICENAME']=le.fit_transform(data['PRACTICENAME'])
data['NOTATION']=le.fit_transform(data['NOTATION'])
data['PRODUCT']=le.fit_transform(data['PRODUCT'])
data.head()
(Changing string to INT)
def handle_non_numerical_data(data):
columns = data.columns.values
for column in columns:
text_digit_vals = {}
def convert_to_int(val):
return text_digit_vals[val]
if data[column].dtype != np.int64 and data[column].dtype != np.float64:
column_contents = data[column].values.tolist()
unique_elements = set(column_contents)
x=0
for unique in unique_elements:
if unique not in text_digit_vals:
text_digit_vals[unique] = x
x+=1
data[column] = list(map(convert_to_int, data[column]))
return data
data = handle_non_numerical_data(data)
print(data.head())
(One way)
(Perfforming Logistic regression by separating data into train and test model)
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
import time
x=data.loc[:,"NOTATION"].values
y=data.loc[:,"PRODUCT"].values
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=0)
from sklearn.preprocessing import StandardScaler
sc_x = StandardScaler()
sc_y = StandardScaler()
x_train=np.array(x_train).reshape(-1,1)
x_train= sc_x.fit_transform(x_train)
y_train=np.array(y_train).reshape(-1,1)
x_test = np.array(x_test).reshape(-1,1)
x_test = sc_x.fit_transform(x_test)
y_test_org = y_test
y_test = np.array(y_test).reshape(-1,1)
from sklearn.preprocessing import StandardScaler
sc=StandardScaler()
x_train=sc.fit_transform(x_train)
x_test=sc.fit_transform(x_test)
(Alternative way)
def load_data_from_excel(file_path, target_column):
# Assuming the first row contains the column names
data = pd.read_excel(file_path, engine='openpyxl')
# Separate features and labels
features = data.drop(columns=[target_column])
labels = data[target_column]
# Convert labels to one-hot encoded format (if needed)
# For binary classification, you can use `pd.get_dummies(labels)` as well
labels = pd.get_dummies(labels)
return features, labels
# Replace 'data.xlsx' with the path to your Excel file and 'target_column_name' with the name of
the target column.
file_path = 'DOCS.xlsx'
target_column_name = 'PRODUCT'
features, labels = load_data_from_excel(file_path, target_column_name)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size=0.2, random_state=42)
def create_deep_learning_model(input_shape, num_classes):
model = models.Sequential([
layers.Dense(64, activation='relu', input_shape=input_shape),
layers.Dense(128, activation='relu'),
layers.Dropout(0.5),
layers.Dense(num_classes, activation='softmax')
])
return model
# Get the number of features and classes for input shape and output layer size
num_features = X_train.shape[1]
num_classes = y_train.shape[1]
model = create_deep_learning_model(input_shape=(num_features,), num_classes=num_classes)
def compile_model(model, learning_rate=0.001):
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
loss='categorical_crossentropy',
metrics=['accuracy'])
compile_model(model)
def compile_model(model, learning_rate=0.001):
model.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=learning_rate),
loss='categorical_crossentropy',
metrics=['accuracy'])
compile_model(model)