Debugging
Debugging
# %%
from unidecode import unidecode
# %%
audio_files = os.listdir('datasets/audio')
audio_df['name'] = audio_df['name'].apply(unidecode)
audio_df['audio_embedding'] = audio_df['audio_embedding'].apply(lambda x:
x/np.linalg.norm(x))
# %%
with open('datasets/image_embeddings.pickle', 'rb') as file:
image_embeddings = pickle.load(file)
image_df['name'] = image_df['name'].apply(unidecode)
# %%
matches_df = pd.merge(image_df, audio_df, on='name', how='outer')
# %%
coincidences = 0
negatives = []
for i, row in matches_df.iterrows():
while True:
sample = matches_df.sample(n=1)
sample.reset_index(inplace=True, drop=True)
if sample['name'][0] != row['name']:
negatives.append(sample['positive'][0])
break
else:
coincidences += 1
matches_df['negative'] = negatives
# %%
train_set, test_set , _, _2 = train_test_split(matches_df, matches_df['name'],
test_size=0.2)
test_set, validation_set, _, _2 = train_test_split(test_set, test_set['name'],
test_size=0.5)
# %%
# %% [markdown]
# ### Siamese Network Model With Triplet Loss Training
# %%
siamese_model = SiameseNetwork([512, 192], [[256, 512, 256], [256, 512, 256]], 256)
# %%
training_triplet_dataset = TripletDataset(train_set)
testing_triplet_dataset = TripletDataset(test_set)
validation_triplet_dataset = TripletDataset(validation_set)
# %%
train_triplet_dataloader = DataLoader(training_triplet_dataset, batch_size=32,
shuffle=True)
test_triplet_dataloader = DataLoader(testing_triplet_dataset, batch_size=32,
shuffle=True)
validation_triplet_dataloader = DataLoader(validation_triplet_dataset,
batch_size=32, shuffle=True)
# %% [markdown]
# ##### Training with Early Stopping
# %%
optimizer = optim.SGD(siamese_model.parameters(), lr=0.1)
# optimizer = optim.Adam(siamese_model.parameters(), lr=0.001)
epochs = 150
# %%
training_losses = []
testing_losses = []
early_stopping_indicators = 0
for epoch in range(epochs):
total_loss = 0.0
total_testing_loss = 0.0
training_batches = 0
testing_batches = 0
for anchor_batch, positive_batch, negative_batch in train_triplet_dataloader:
training_batches += 1
optimizer.zero_grad()
loss = triplet_loss(siamese_model, anchor_batch, positive_batch,
negative_batch, margin=1.0)
loss.backward()
optimizer.step()
total_loss += loss.item()
# validation_losses.append(validation_loss)
total_training_loss_per_batch = total_loss/training_batches
training_losses.append(total_training_loss_per_batch)
total_testing_loss_per_batch = total_testing_loss/testing_batches
testing_losses.append(total_testing_loss_per_batch)
print(f"epoch: {epoch + 1} Training Loss per batch:
{total_training_loss_per_batch}, Testing Loss per batch:
{total_testing_loss_per_batch}\n")
if (epoch > 20) and (np.mean(testing_losses[-20:-10] < np.mean(testing_losses[-
10:]))):
print('Early stopping')
break
# %% [markdown]
# ##### A little extra training
# %%
# for epoch in range(epochs):
# for epoch in range(50, 100):
# total_loss = 0.0
# total_testing_loss = 0.0
# training_batches = 0
# testing_batches = 0
# for anchor_batch, positive_batch, negative_batch in train_triplet_dataloader:
# training_batches += 1
# optimizer.zero_grad()
# loss = triplet_loss(siamese_model, anchor_batch, positive_batch,
negative_batch, margin=1.0)
# loss.backward()
# optimizer.step()
# total_loss += loss.item()
# # validation_losses.append(validation_loss)
# total_training_loss_per_batch = total_loss/training_batches
# training_losses.append(total_training_loss_per_batch)
# total_testing_loss_per_batch = total_testing_loss/testing_batches
# testing_losses.append(total_testing_loss_per_batch)
# print(f"epoch: {epoch + 1} Training Loss per batch:
{total_training_loss_per_batch}, Testing Loss per batch:
{total_testing_loss_per_batch}\n")
# if (epoch > 20) and (np.mean(testing_losses[-20:-10] <
np.mean(testing_losses[-10:]))):
# print('Early stopping')
# break
# # %%
# %%
# %%
plt.plot(epoch_list, training_losses, label='Training Loss')
plt.plot(epoch_list, testing_losses, label='Testing Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.show()
ia = identification_accuracy(siamese_model, validation_triplet_dataloader)
# %%
print()
# %% [markdown]
# ### 1:2 Identification Accuracy
# %%
# i = 0
# for anchor_batch, positive_batch, negative_batch in train_triplet_dataloader:
# ab = anchor_batch
# break
# %%
# ab
# %%
# siamese_model(anchor_batch, )
# # %%
# # %%
# # %%
# # %% [markdown]
# # ### Building A Classifier On Top Of The Siamese Network
# # %%
# class SimpleBinaryClassifier(nn.Module):
# def __init__(self):
# super(BinaryClassifier, self).__init__()
# self.fc = nn.Linear(1, 1) # Input size and output size are both 1 for a
single number input
# # %%
# binary_classifier = SimpleBinaryClassifier(input_size=256, hidden_size=64)
# # %%
# optimizer = optim.Adam(binary_classifier.parameters(), lr=0.001)
# # %%
# criterion = nn.BCEWithLogitsLoss()
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # %%
# # freezing the trained siamese model
# for param in siamese_model.parameters():
# param.requires_grad = False
# # %%
# binary_classifier = SiameseBinaryClassifier(siamese_model)
# # %%
# total_params = sum([param.numel() for param in binary_classifier.parameters()])
# # %%
# total_params
# # %%
# trainable_params = sum([param.numel() for param in binary_classifier.parameters()
if param.requires_grad])
# # %%
# trainable_params
# # %% [markdown]
# # ### Training The Classifier
# # %%
# optimizer = optim.Adam(binary_classifier.parameters(), lr=0.001)
# criterion = nn.BCEWithLogitsLoss()
# epochs = 50
# # %%
# total_loss = 0.0
# total_correct = 0
# total_samples = 0
# # Combine outputs
# inputs = torch.cat((anchor_positive_outputs, anchor_negative_outputs),
dim=0)
# # Create labels
# positive_labels = torch.ones(anchor_batch.size(0), 1) # Labels for
positive pairs
# negative_labels = torch.zeros(anchor_batch.size(0), 1) # Labels for
negative pairs
# labels = torch.cat((positive_labels, negative_labels), dim=0)
# # Compute loss
# loss = criterion(outputs.squeeze(), labels.squeeze())
# # loss = criterion(outputs.squeeze(), labels.squeeze())
# loss.backward()
# optimizer.step()
# total_loss += loss.item()
# # %%