bigfreecoder
diff --git a/‎Course 5/Week 3/Neural Machine Translation/Neural Machine Translation.pdf
5.5 MB b/‎Course 5/Week 3/Neural Machine Translation/Neural Machine Translation.pdf
5.5 MB
diff --git a/‎Course 5/Week 3/Neural Machine Translation/Neural+machine+translation+with+attention+-+v4.ipynb
Lines changed: 1173 additions & 0 deletions b/‎Course 5/Week 3/Neural Machine Translation/Neural+machine+translation+with+attention+-+v4.ipynb
Lines changed: 1173 additions & 0 deletions
diff --git a/‎Course 5/Week 3/Neural Machine Translation/Neural+machine+translation+with+attention+-+v4.py
Lines changed: 483 additions & 0 deletions b/‎Course 5/Week 3/Neural Machine Translation/Neural+machine+translation+with+attention+-+v4.py
Lines changed: 483 additions & 0 deletions
diff --git a/‎Course 5/Week 3/Neural Machine Translation/images/attn_mechanism.png
168 KB b/‎Course 5/Week 3/Neural Machine Translation/images/attn_mechanism.png
168 KB
diff --git a/‎Course 5/Week 3/Neural Machine Translation/images/attn_model.png
271 KB b/‎Course 5/Week 3/Neural Machine Translation/images/attn_model.png
271 KB
diff --git a/‎Course 5/Week 3/Neural Machine Translation/images/date_attention.png
131 KB b/‎Course 5/Week 3/Neural Machine Translation/images/date_attention.png
131 KB
diff --git a/‎Course 5/Week 3/Neural Machine Translation/images/date_attention2.png
130 KB b/‎Course 5/Week 3/Neural Machine Translation/images/date_attention2.png
130 KB
diff --git a/‎Course 5/Week 3/Neural Machine Translation/images/poorly_trained_model.png
10.1 KB b/‎Course 5/Week 3/Neural Machine Translation/images/poorly_trained_model.png
10.1 KB
diff --git a/‎Course 5/Week 3/Neural Machine Translation/images/table.png
86.9 KB b/‎Course 5/Week 3/Neural Machine Translation/images/table.png
86.9 KB
diff --git a/‎Course 5/Week 3/Neural Machine Translation/nmt_utils.py
Lines changed: 251 additions & 0 deletions b/‎Course 5/Week 3/Neural Machine Translation/nmt_utils.py
Lines changed: 251 additions & 0 deletions
@@ -0,0 +1,251 @@
+import numpy as np
+from faker import Faker
+import random
+from tqdm import tqdm
+from babel.dates import format_date
+from keras.utils import to_categorical
+import keras.backend as K
+import matplotlib.pyplot as plt
+
+fake = Faker()
+fake.seed(12345)
+random.seed(12345)
+
+# Define format of the data we would like to generate
+FORMATS = ['short',
+           'medium',
+           'long',
+           'full',
+           'full',
+           'full',
+           'full',
+           'full',
+           'full',
+           'full',
+           'full',
+           'full',
+           'full',
+           'd MMM YYY', 
+           'd MMMM YYY',
+           'dd MMM YYY',
+           'd MMM, YYY',
+           'd MMMM, YYY',
+           'dd, MMM YYY',
+           'd MM YY',
+           'd MMMM YYY',
+           'MMMM d YYY',
+           'MMMM d, YYY',
+           'dd.MM.YY']
+
+# change this if you want it to work with another language
+LOCALES = ['en_US']
+
+def load_date():
+    """
+        Loads some fake dates 
+        :returns: tuple containing human readable string, machine readable string, and date object
+    """
+    dt = fake.date_object()
+
+    try:
+        human_readable = format_date(dt, format=random.choice(FORMATS),  locale='en_US') # locale=random.choice(LOCALES))
+        human_readable = human_readable.lower()
+        human_readable = human_readable.replace(',','')
+        machine_readable = dt.isoformat()
+        
+    except AttributeError as e:
+        return None, None, None
+
+    return human_readable, machine_readable, dt
+
+def load_dataset(m):
+    """
+        Loads a dataset with m examples and vocabularies
+        :m: the number of examples to generate
+    """
+    
+    human_vocab = set()
+    machine_vocab = set()
+    dataset = []
+    Tx = 30
+    
+
+    for i in tqdm(range(m)):
+        h, m, _ = load_date()
+        if h is not None:
+            dataset.append((h, m))
+            human_vocab.update(tuple(h))
+            machine_vocab.update(tuple(m))
+    
+    human = dict(zip(sorted(human_vocab) + ['<unk>', '<pad>'], 
+                     list(range(len(human_vocab) + 2))))
+    inv_machine = dict(enumerate(sorted(machine_vocab)))
+    machine = {v:k for k,v in inv_machine.items()}
+ 
+    return dataset, human, machine, inv_machine
+
+def preprocess_data(dataset, human_vocab, machine_vocab, Tx, Ty):
+    
+    X, Y = zip(*dataset)
+    
+    X = np.array([string_to_int(i, Tx, human_vocab) for i in X])
+    Y = [string_to_int(t, Ty, machine_vocab) for t in Y]
+    
+    Xoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(human_vocab)), X)))
+    Yoh = np.array(list(map(lambda x: to_categorical(x, num_classes=len(machine_vocab)), Y)))
+
+    return X, np.array(Y), Xoh, Yoh
+
+def string_to_int(string, length, vocab):
+    """
+    Converts all strings in the vocabulary into a list of integers representing the positions of the
+    input string's characters in the "vocab"
+    
+    Arguments:
+    string -- input string, e.g. 'Wed 10 Jul 2007'
+    length -- the number of time steps you'd like, determines if the output will be padded or cut
+    vocab -- vocabulary, dictionary used to index every character of your "string"
+    
+    Returns:
+    rep -- list of integers (or '<unk>') (size = length) representing the position of the string's character in the vocabulary
+    """
+    
+    #make lower to standardize
+    string = string.lower()
+    string = string.replace(',','')
+    
+    if len(string) > length:
+        string = string[:length]
+        
+    rep = list(map(lambda x: vocab.get(x, '<unk>'), string))
+    
+    if len(string) < length:
+        rep += [vocab['<pad>']] * (length - len(string))
+    
+    #print (rep)
+    return rep
+
+
+def int_to_string(ints, inv_vocab):
+    """
+    Output a machine readable list of characters based on a list of indexes in the machine's vocabulary
+    
+    Arguments:
+    ints -- list of integers representing indexes in the machine's vocabulary
+    inv_vocab -- dictionary mapping machine readable indexes to machine readable characters 
+    
+    Returns:
+    l -- list of characters corresponding to the indexes of ints thanks to the inv_vocab mapping
+    """
+    
+    l = [inv_vocab[i] for i in ints]
+    return l
+
+
+EXAMPLES = ['3 May 1979', '5 Apr 09', '20th February 2016', 'Wed 10 Jul 2007']
+
+def run_example(model, input_vocabulary, inv_output_vocabulary, text):
+    encoded = string_to_int(text, TIME_STEPS, input_vocabulary)
+    prediction = model.predict(np.array([encoded]))
+    prediction = np.argmax(prediction[0], axis=-1)
+    return int_to_string(prediction, inv_output_vocabulary)
+
+def run_examples(model, input_vocabulary, inv_output_vocabulary, examples=EXAMPLES):
+    predicted = []
+    for example in examples:
+        predicted.append(''.join(run_example(model, input_vocabulary, inv_output_vocabulary, example)))
+        print('input:', example)
+        print('output:', predicted[-1])
+    return predicted
+
+
+def softmax(x, axis=1):
+    """Softmax activation function.
+    # Arguments
+        x : Tensor.
+        axis: Integer, axis along which the softmax normalization is applied.
+    # Returns
+        Tensor, output of softmax transformation.
+    # Raises
+        ValueError: In case `dim(x) == 1`.
+    """
+    ndim = K.ndim(x)
+    if ndim == 2:
+        return K.softmax(x)
+    elif ndim > 2:
+        e = K.exp(x - K.max(x, axis=axis, keepdims=True))
+        s = K.sum(e, axis=axis, keepdims=True)
+        return e / s
+    else:
+        raise ValueError('Cannot apply softmax to a tensor that is 1D')
+        
+
+def plot_attention_map(model, input_vocabulary, inv_output_vocabulary, text, n_s = 128, num = 6, Tx = 30, Ty = 10):
+    """
+    Plot the attention map.
+  
+    """
+    attention_map = np.zeros((10, 30))
+    Ty, Tx = attention_map.shape
+    
+    s0 = np.zeros((1, n_s))
+    c0 = np.zeros((1, n_s))
+    layer = model.layers[num]
+
+    encoded = np.array(string_to_int(text, Tx, input_vocabulary)).reshape((1, 30))
+    encoded = np.array(list(map(lambda x: to_categorical(x, num_classes=len(input_vocabulary)), encoded)))
+
+    f = K.function(model.inputs, [layer.get_output_at(t) for t in range(Ty)])
+    r = f([encoded, s0, c0])
+    
+    for t in range(Ty):
+        for t_prime in range(Tx):
+            attention_map[t][t_prime] = r[t][0,t_prime,0]
+
+    # Normalize attention map
+#     row_max = attention_map.max(axis=1)
+#     attention_map = attention_map / row_max[:, None]
+
+    prediction = model.predict([encoded, s0, c0])
+    
+    predicted_text = []
+    for i in range(len(prediction)):
+        predicted_text.append(int(np.argmax(prediction[i], axis=1)))
+        
+    predicted_text = list(predicted_text)
+    predicted_text = int_to_string(predicted_text, inv_output_vocabulary)
+    text_ = list(text)
+    
+    # get the lengths of the string
+    input_length = len(text)
+    output_length = Ty
+    
+    # Plot the attention_map
+    plt.clf()
+    f = plt.figure(figsize=(8, 8.5))
+    ax = f.add_subplot(1, 1, 1)
+
+    # add image
+    i = ax.imshow(attention_map, interpolation='nearest', cmap='Blues')
+
+    # add colorbar
+    cbaxes = f.add_axes([0.2, 0, 0.6, 0.03])
+    cbar = f.colorbar(i, cax=cbaxes, orientation='horizontal')
+    cbar.ax.set_xlabel('Alpha value (Probability output of the "softmax")', labelpad=2)
+
+    # add labels
+    ax.set_yticks(range(output_length))
+    ax.set_yticklabels(predicted_text[:output_length])
+
+    ax.set_xticks(range(input_length))
+    ax.set_xticklabels(text_[:input_length], rotation=45)
+
+    ax.set_xlabel('Input Sequence')
+    ax.set_ylabel('Output Sequence')
+
+    # add grid and legend
+    ax.grid()
+
+    #f.show()
+    
+    return attention_map