0% found this document useful (0 votes)
8 views

Debugging

This document contains code for training a siamese neural network model for audio-visual matching. It loads audio and image embeddings, creates training/test/validation datasets, defines the siamese network architecture, trains the model using triplet loss, and evaluates the trained model on a validation set.

Uploaded by

heat mass
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
0% found this document useful (0 votes)
8 views

Debugging

This document contains code for training a siamese neural network model for audio-visual matching. It loads audio and image embeddings, creates training/test/validation datasets, defines the siamese network architecture, trains the model using triplet loss, and evaluates the trained model on a validation set.

Uploaded by

heat mass
Copyright
© © All Rights Reserved
We take content rights seriously. If you suspect this is your content, claim it here.
Available Formats
Download as TXT, PDF, TXT or read online on Scribd
You are on page 1/ 9

# %%

from classes import *


from functions import *
import pickle
import pandas as pd
import numpy as np
import os
from sklearn.model_selection import train_test_split
import random
from torch.utils.data import Dataset, DataLoader
import matplotlib.pyplot as plt

# %%
from unidecode import unidecode

# %%
audio_files = os.listdir('datasets/audio')

with open('datasets/audio_embeddings.pickle', 'rb') as file:


audio_embeddings = pickle.load(file)

audio_names = [key.split('/')[0] for key in audio_embeddings.keys()]

audio_df = pd.DataFrame([{'name': key.split('/')[0], 'audio_embedding': value} for


key, value in audio_embeddings.items()])

audio_df['name'] = audio_df['name'].apply(unidecode)

audio_df['audio_embedding'] = audio_df['audio_embedding'].apply(lambda x:
x/np.linalg.norm(x))

# %%
with open('datasets/image_embeddings.pickle', 'rb') as file:
image_embeddings = pickle.load(file)

image_df = pd.DataFrame([{'name': key.split('/')[0], 'image_embedding': value} for


key, value in image_embeddings.items()])

image_df['name'] = image_df['name'].apply(unidecode)

# %%
matches_df = pd.merge(image_df, audio_df, on='name', how='outer')

matches_df.columns = ['name', 'anchor', 'positive']

# %%
coincidences = 0
negatives = []
for i, row in matches_df.iterrows():
while True:
sample = matches_df.sample(n=1)
sample.reset_index(inplace=True, drop=True)
if sample['name'][0] != row['name']:
negatives.append(sample['positive'][0])
break
else:
coincidences += 1

matches_df['negative'] = negatives

# %%
train_set, test_set , _, _2 = train_test_split(matches_df, matches_df['name'],
test_size=0.2)
test_set, validation_set, _, _2 = train_test_split(test_set, test_set['name'],
test_size=0.5)

# %%

# %% [markdown]
# ### Siamese Network Model With Triplet Loss Training

# %%
siamese_model = SiameseNetwork([512, 192], [[256, 512, 256], [256, 512, 256]], 256)

# %%
training_triplet_dataset = TripletDataset(train_set)
testing_triplet_dataset = TripletDataset(test_set)
validation_triplet_dataset = TripletDataset(validation_set)

# %%
train_triplet_dataloader = DataLoader(training_triplet_dataset, batch_size=32,
shuffle=True)
test_triplet_dataloader = DataLoader(testing_triplet_dataset, batch_size=32,
shuffle=True)
validation_triplet_dataloader = DataLoader(validation_triplet_dataset,
batch_size=32, shuffle=True)

# %% [markdown]
# ##### Training with Early Stopping

# %%
optimizer = optim.SGD(siamese_model.parameters(), lr=0.1)
# optimizer = optim.Adam(siamese_model.parameters(), lr=0.001)
epochs = 150

# %%
training_losses = []
testing_losses = []
early_stopping_indicators = 0
for epoch in range(epochs):

total_loss = 0.0
total_testing_loss = 0.0
training_batches = 0
testing_batches = 0
for anchor_batch, positive_batch, negative_batch in train_triplet_dataloader:
training_batches += 1
optimizer.zero_grad()
loss = triplet_loss(siamese_model, anchor_batch, positive_batch,
negative_batch, margin=1.0)
loss.backward()
optimizer.step()
total_loss += loss.item()

# validation_losses.append(validation_loss)

for anchor_batch, positive_batch, negative_batch in test_triplet_dataloader:


testing_batches += 1
testing_loss = triplet_loss(siamese_model, anchor_batch, positive_batch,
negative_batch, margin=1.0)
total_testing_loss += testing_loss.item()

total_training_loss_per_batch = total_loss/training_batches
training_losses.append(total_training_loss_per_batch)
total_testing_loss_per_batch = total_testing_loss/testing_batches
testing_losses.append(total_testing_loss_per_batch)
print(f"epoch: {epoch + 1} Training Loss per batch:
{total_training_loss_per_batch}, Testing Loss per batch:
{total_testing_loss_per_batch}\n")
if (epoch > 20) and (np.mean(testing_losses[-20:-10] < np.mean(testing_losses[-
10:]))):
print('Early stopping')
break

# %% [markdown]
# ##### A little extra training

# %%
# for epoch in range(epochs):
# for epoch in range(50, 100):
# total_loss = 0.0
# total_testing_loss = 0.0
# training_batches = 0
# testing_batches = 0
# for anchor_batch, positive_batch, negative_batch in train_triplet_dataloader:
# training_batches += 1
# optimizer.zero_grad()
# loss = triplet_loss(siamese_model, anchor_batch, positive_batch,
negative_batch, margin=1.0)
# loss.backward()
# optimizer.step()
# total_loss += loss.item()

# # validation_losses.append(validation_loss)

# for anchor_batch, positive_batch, negative_batch in test_triplet_dataloader:


# testing_batches += 1
# testing_loss = triplet_loss(siamese_model, anchor_batch, positive_batch,
negative_batch, margin=1.0)
# total_testing_loss += testing_loss.item()

# total_training_loss_per_batch = total_loss/training_batches
# training_losses.append(total_training_loss_per_batch)
# total_testing_loss_per_batch = total_testing_loss/testing_batches
# testing_losses.append(total_testing_loss_per_batch)
# print(f"epoch: {epoch + 1} Training Loss per batch:
{total_training_loss_per_batch}, Testing Loss per batch:
{total_testing_loss_per_batch}\n")
# if (epoch > 20) and (np.mean(testing_losses[-20:-10] <
np.mean(testing_losses[-10:]))):
# print('Early stopping')
# break

# # %%

# %%

epoch_list = [i+1 for i in range(epoch + 1)]

# %%
plt.plot(epoch_list, training_losses, label='Training Loss')
plt.plot(epoch_list, testing_losses, label='Testing Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()

ia = identification_accuracy(siamese_model, validation_triplet_dataloader)
# %%
print()
# %% [markdown]
# ### 1:2 Identification Accuracy

# %%
# i = 0
# for anchor_batch, positive_batch, negative_batch in train_triplet_dataloader:

# ab = anchor_batch
# break

# %%
# ab

# %%
# siamese_model(anchor_batch, )

# # %%

# # %%

# # %%

# # %% [markdown]
# # ### Building A Classifier On Top Of The Siamese Network

# # %%
# class SimpleBinaryClassifier(nn.Module):
# def __init__(self):
# super(BinaryClassifier, self).__init__()
# self.fc = nn.Linear(1, 1) # Input size and output size are both 1 for a
single number input

# def forward(self, x):


# x = self.fc(x)
# return x

# # %%
# binary_classifier = SimpleBinaryClassifier(input_size=256, hidden_size=64)

# # %%
# optimizer = optim.Adam(binary_classifier.parameters(), lr=0.001)

# # %%
# criterion = nn.BCEWithLogitsLoss()

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%
# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%
# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%

# # %%
# # freezing the trained siamese model
# for param in siamese_model.parameters():
# param.requires_grad = False

# # %%
# binary_classifier = SiameseBinaryClassifier(siamese_model)

# # %%
# total_params = sum([param.numel() for param in binary_classifier.parameters()])

# # %%
# total_params

# # %%
# trainable_params = sum([param.numel() for param in binary_classifier.parameters()
if param.requires_grad])

# # %%
# trainable_params

# # %% [markdown]
# # ### Training The Classifier

# # %%
# optimizer = optim.Adam(binary_classifier.parameters(), lr=0.001)
# criterion = nn.BCEWithLogitsLoss()
# epochs = 50

# # %%

# for epoch in range(epochs):

# total_loss = 0.0
# total_correct = 0
# total_samples = 0

# for anchor_batch, positive_batch, negative_batch in train_triplet_dataloader:


# optimizer.zero_grad()

# # anchor_positive_pairs = torch.cat((anchor_batch, positive_batch),


dim=0)
# # anchor_negative_pairs = torch.cat((anchor_batch, negative_batch),
dim=0)
# # positive_outputs = siamese_model(anchor_batch, positive_batch)
# # negative_outputs = siamese_model(anchor_batch, negative_batch)
# # outputs = torch.cat((positive_outputs, negative_outputs), dim=0)
# # outputs = outputs.detach()
# # positive_labels = torch.ones(anchor_batch.size(0), 1)
# # negative_labels = torch.zeros(anchor_batch.size(0), 1)
# # labels = torch.cat((positive_labels, negative_labels), dim=0)
# anchor_positive_outputs = siamese_model(anchor_batch, positive_batch)

# # Forward pass for anchor and negative pairs


# anchor_negative_outputs = siamese_model(anchor_batch, negative_batch)

# # Combine outputs
# inputs = torch.cat((anchor_positive_outputs, anchor_negative_outputs),
dim=0)

# # Create labels
# positive_labels = torch.ones(anchor_batch.size(0), 1) # Labels for
positive pairs
# negative_labels = torch.zeros(anchor_batch.size(0), 1) # Labels for
negative pairs
# labels = torch.cat((positive_labels, negative_labels), dim=0)

# # Detach anchor-positive and anchor-negative outputs


# anchor_positive_outputs_detached = anchor_positive_outputs.detach()
# anchor_negative_outputs_detached = anchor_negative_outputs.detach()

# # Combine detached outputs


# outputs = torch.cat((anchor_positive_outputs_detached,
anchor_negative_outputs_detached), dim=0)

# # Compute loss
# loss = criterion(outputs.squeeze(), labels.squeeze())
# # loss = criterion(outputs.squeeze(), labels.squeeze())
# loss.backward()

# optimizer.step()

# total_loss += loss.item()

# predicted_labels = (outputs > 0.5).float()


# total_correct += (predicted_labels == labels).sum().item()
# total_samples += labels.size(0)

# print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss /


len(train_triplet_dataloader)}, Accuracy: {total_correct / total_samples}")

# # %%

You might also like