assignment-9

Download as pdf or txt
Download as pdf or txt
You are on page 1of 4

assignment-9

November 12, 2024

[2]: import torch


import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
from collections import Counter
from itertools import chain

# Sample parallel sentences for demonstration (replace with actual data)


source_sentences = ["hello", "how are you", "good morning"]
target_sentences = ["hola", "cómo estás", "buenos días"]

# Vocabulary building function


def build_vocab(sentences):
counter = Counter(chain.from_iterable(s.split() for s in sentences))
vocab = {word: idx + 3 for idx, (word, _) in enumerate(counter.
↪most_common())}

vocab["<pad>"] = 0
vocab["<sos>"] = 1
vocab["<eos>"] = 2
return vocab

# Build vocabulary for source and target languages


source_vocab = build_vocab(source_sentences)
target_vocab = build_vocab(target_sentences)

# Tokenize function
def tokenize(sentence, vocab):
tokens = ["<sos>"] + sentence.split() + ["<eos>"]
return [vocab[token] if token in vocab else vocab["<pad>"] for token in␣
↪tokens]

# Prepare data for training


train_data = [(torch.tensor(tokenize(src, source_vocab)), torch.
↪tensor(tokenize(tgt, target_vocab)))

for src, tgt in zip(source_sentences, target_sentences)]

# Define Dataset and DataLoader

1
class TranslationDataset(Dataset):
def __init__(self, data):
self.data = data

def __len__(self):
return len(self.data)

def __getitem__(self, idx):


return self.data[idx]

dataset = TranslationDataset(train_data)
dataloader = DataLoader(dataset, batch_size=2, shuffle=True, collate_fn=lambda␣
↪x: x)

# Encoder model
class Encoder(nn.Module):
def __init__(self, input_dim, emb_dim, hidden_dim, n_layers):
super(Encoder, self).__init__()
self.embedding = nn.Embedding(input_dim, emb_dim)
self.lstm = nn.LSTM(emb_dim, hidden_dim, n_layers, batch_first=True)

def forward(self, src):


embedded = self.embedding(src)
outputs, (hidden, cell) = self.lstm(embedded)
return outputs, hidden, cell

# Attention model
class Attention(nn.Module):
def __init__(self, hidden_dim):
super(Attention, self).__init__()
self.attn = nn.Linear(hidden_dim * 2, hidden_dim)
self.v = nn.Linear(hidden_dim, 1, bias=False)

def forward(self, hidden, encoder_outputs):


src_len = encoder_outputs.shape[1]
hidden = hidden[-1].unsqueeze(1).repeat(1, src_len, 1)
energy = torch.tanh(self.attn(torch.cat((hidden, encoder_outputs),␣
↪dim=2)))

attention = self.v(energy).squeeze(2)
return torch.softmax(attention, dim=1)

# Decoder model with attention


class Decoder(nn.Module):
def __init__(self, output_dim, emb_dim, hidden_dim, n_layers, attention):
super(Decoder, self).__init__()
self.output_dim = output_dim
self.embedding = nn.Embedding(output_dim, emb_dim)

2
self.lstm = nn.LSTM(emb_dim + hidden_dim, hidden_dim, n_layers,␣
↪batch_first=True)
self.fc_out = nn.Linear(hidden_dim * 2, output_dim)
self.attention = attention

def forward(self, tgt, hidden, cell, encoder_outputs):


tgt = tgt.unsqueeze(1)
embedded = self.embedding(tgt)
attn_weights = self.attention(hidden, encoder_outputs)
context = torch.bmm(attn_weights.unsqueeze(1), encoder_outputs)
lstm_input = torch.cat((embedded, context), dim=2)
output, (hidden, cell) = self.lstm(lstm_input, (hidden, cell))
prediction = self.fc_out(torch.cat((output, context), dim=2).squeeze(1))
return prediction, hidden, cell

# Seq2Seq model combining encoder and decoder


class Seq2Seq(nn.Module):
def __init__(self, encoder, decoder, device):
super(Seq2Seq, self).__init__()
self.encoder = encoder
self.decoder = decoder
self.device = device

def forward(self, src, tgt):


encoder_outputs, hidden, cell = self.encoder(src)
outputs = torch.zeros(tgt.shape[0], tgt.shape[1], self.decoder.
↪output_dim).to(self.device)

input = tgt[:, 0]
for t in range(1, tgt.shape[1]):
output, hidden, cell = self.decoder(input, hidden, cell,␣
↪encoder_outputs)

outputs[:, t] = output
input = output.argmax(1)
return outputs

# Hyperparameters and model initialization


INPUT_DIM = len(source_vocab)
OUTPUT_DIM = len(target_vocab)
EMB_DIM = 256
HIDDEN_DIM = 512
N_LAYERS = 2

encoder = Encoder(INPUT_DIM, EMB_DIM, HIDDEN_DIM, N_LAYERS)


attention = Attention(HIDDEN_DIM)
decoder = Decoder(OUTPUT_DIM, EMB_DIM, HIDDEN_DIM, N_LAYERS, attention)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = Seq2Seq(encoder, decoder, device).to(device)

3
# Training setup
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss(ignore_index=target_vocab["<pad>"])

# Training loop
def train(model, dataloader, optimizer, criterion):
model.train()
epoch_loss = 0
for batch in dataloader:
src, tgt = zip(*batch)
src, tgt = torch.nn.utils.rnn.pad_sequence(src,␣
↪padding_value=source_vocab["<pad>"], batch_first=True), \

torch.nn.utils.rnn.pad_sequence(tgt,␣
↪padding_value=target_vocab["<pad>"], batch_first=True)

src, tgt = src.to(device), tgt.to(device)

optimizer.zero_grad()
output = model(src, tgt)

output_dim = output.shape[-1]
output = output[:, 1:].reshape(-1, output_dim)
tgt = tgt[:, 1:].reshape(-1)

loss = criterion(output, tgt)


loss.backward()
optimizer.step()
epoch_loss += loss.item()
return epoch_loss / len(dataloader)

# Training epochs
for epoch in range(10):
loss = train(model, dataloader, optimizer, criterion)
print(f'Epoch {epoch+1}, Loss: {loss:.4f}')

Epoch 1, Loss: 2.0400


Epoch 2, Loss: 1.8393
Epoch 3, Loss: 1.5252
Epoch 4, Loss: 1.2470
Epoch 5, Loss: 0.8623
Epoch 6, Loss: 0.5609
Epoch 7, Loss: 0.4295
Epoch 8, Loss: 0.1870
Epoch 9, Loss: 0.2638
Epoch 10, Loss: 0.1014

You might also like