alifa2try
diff --git a/‎README.md
Lines changed: 1 addition & 0 deletions b/‎README.md
Lines changed: 1 addition & 0 deletions
diff --git a/‎machine-learning/nlp/text-generator/README.md
Lines changed: 32 additions & 0 deletions b/‎machine-learning/nlp/text-generator/README.md
Lines changed: 32 additions & 0 deletions
diff --git a/‎machine-learning/nlp/text-generator/data/wonderland-char2int.pickle
400 Bytes b/‎machine-learning/nlp/text-generator/data/wonderland-char2int.pickle
400 Bytes
diff --git a/‎machine-learning/nlp/text-generator/data/wonderland-int2char.pickle
400 Bytes b/‎machine-learning/nlp/text-generator/data/wonderland-int2char.pickle
400 Bytes
diff --git a/‎machine-learning/nlp/text-generator/data/wonderland.txt
Lines changed: 3735 additions & 0 deletions b/‎machine-learning/nlp/text-generator/data/wonderland.txt
Lines changed: 3735 additions & 0 deletions
diff --git a/‎machine-learning/nlp/text-generator/generate.py
Lines changed: 55 additions & 0 deletions b/‎machine-learning/nlp/text-generator/generate.py
Lines changed: 55 additions & 0 deletions
diff --git a/‎machine-learning/nlp/text-generator/requirements.txt
Lines changed: 4 additions & 0 deletions b/‎machine-learning/nlp/text-generator/requirements.txt
Lines changed: 4 additions & 0 deletions
diff --git a/‎machine-learning/nlp/text-generator/results/wonderland-v2-0.75.h5
9.63 MB b/‎machine-learning/nlp/text-generator/results/wonderland-v2-0.75.h5
9.63 MB
diff --git a/‎machine-learning/nlp/text-generator/train.py
Lines changed: 86 additions & 0 deletions b/‎machine-learning/nlp/text-generator/train.py
Lines changed: 86 additions & 0 deletions
@@ -19,6 +19,7 @@ This is a repository of all the tutorials of [The Python Code](https://www.thepy
 - ### [Machine Learning](https://www.thepythoncode.com/topic/machine-learning)
     - ### [Natural Language Processing](https://www.thepythoncode.com/topic/nlp)
         - [How to Build a Spam Classifier using Keras in Python](https://www.thepythoncode.com/article/build-spam-classifier-keras-python). ([code](machine-learning/nlp/spam-classifier))
+        - [How to Build a Text Generator using Keras in Python](https://www.thepythoncode.com/article/text-generation-keras-python). ([code](machine-learning/nlp/text-generator))
 
     - [How to Detect Human Faces in Python using OpenCV](https://www.thepythoncode.com/article/detect-faces-opencv-python). ([code](machine-learning/face_detection))
     - [Building a Speech Emotion Recognizer using Scikit-learn](https://www.thepythoncode.com/article/building-a-speech-emotion-recognizer-using-sklearn). ([code](machine-learning/speech-emotion-recognition))
 
@@ -0,0 +1,32 @@
+# [How to Build a Text Generator using Keras in Python](https://www.thepythoncode.com/article/text-generation-keras-python)
+To run this:
+- `pip3 install -r requirements.txt`
+- To use pre-trained text generator model that was trained on Alice's wonderland text book:
+    ```
+    python generate.py --help
+    ```
+    **Output:**
+    ```
+    usage: generate.py [-h] [-n N_CHARS] seed
+
+    Text generator that was trained on Alice's Adventures in the Wonderland book.
+
+    positional arguments:
+    seed                  Seed text to start with, can be any english text, but
+                            it's preferable you take from the book itself.
+
+    optional arguments:
+    -h, --help            show this help message and exit
+    -n N_CHARS, --n-chars N_CHARS
+                            Number of characters to generate, default is 200.
+    ```
+    Generating 200 characters with that seed:
+    ```
+    python generate.py "down down down there was nothing else to do so alice soon began talking again " -n 200
+    ```
+    **Output:**
+    ```
+    Generating text: 100%|█████████████████████████████████████████████████████████████████████████████████████████████████| 200/200 [00:40<00:00,  5.02it/s]
+    Generated text:
+    the duchess asked to the dormouse she wanted about for the world all her life i dont know what to think that it was so much sort of mine for the world a little like a stalking and was going to the mou
+    ```
@@ -0,0 +1,55 @@
+import numpy as np
+import pickle
+import tqdm
+from keras.models import Sequential
+from keras.layers import Dense, LSTM, Dropout, Activation
+from keras.callbacks import ModelCheckpoint
+
+# seed = "do not try to"
+
+char2int = pickle.load(open("data/wonderland-char2int.pickle", "rb"))
+int2char = pickle.load(open("data/wonderland-int2char.pickle", "rb"))
+
+sequence_length = 100
+n_unique_chars = len(char2int)
+
+# building the model
+model = Sequential([
+    LSTM(256, input_shape=(sequence_length, n_unique_chars), return_sequences=True),
+    Dropout(0.3),
+    LSTM(256),
+    Dense(n_unique_chars, activation="softmax"),
+])
+
+model.load_weights("results/wonderland-v2-0.75.h5")
+
+if __name__ == "__main__":
+    import argparse
+    parser = argparse.ArgumentParser(description="Text generator that was trained on Alice's Adventures in the Wonderland book.")
+    parser.add_argument("seed", help="Seed text to start with, can be any english text, but it's preferable you take from the book itself.")
+    parser.add_argument("-n", "--n-chars", type=int, dest="n_chars", help="Number of characters to generate, default is 200.", default=200)
+    args = parser.parse_args()
+
+    n_chars = args.n_chars
+    seed = args.seed
+
+    # generate 400 characters
+    generated = ""
+    for i in tqdm.tqdm(range(n_chars), "Generating text"):
+        # make the input sequence
+        X = np.zeros((1, sequence_length, n_unique_chars))
+        for t, char in enumerate(seed):
+            X[0, (sequence_length - len(seed)) + t, char2int[char]] = 1
+        # predict the next character
+        predicted = model.predict(X, verbose=0)[0]
+        # converting the vector to an integer
+        next_index = np.argmax(predicted)
+        # converting the integer to a character
+        next_char = int2char[next_index]
+        # add the character to results
+        generated += next_char
+        # shift seed and the predicted character
+        seed = seed[1:] + next_char
+
+    print("Generated text:")
+    print(generated)
@@ -0,0 +1,4 @@
+numpy
+tensorflow==1.13.1
+keras
+requests
@@ -0,0 +1,86 @@
+import numpy as np
+import os
+import pickle
+from keras.models import Sequential
+from keras.layers import Dense, LSTM, Dropout
+from keras.callbacks import ModelCheckpoint
+from string import punctuation
+
+# commented because already downloaded
+# import requests
+# content = requests.get("http://www.gutenberg.org/cache/epub/11/pg11.txt").text
+# open("data/wonderland.txt", "w", encoding="utf-8").write(content)
+
+# read the data
+text = open("data/wonderland.txt", encoding="utf-8").read()
+# remove caps
+text = text.lower()
+# remove punctuation
+text = text.translate(str.maketrans("", "", punctuation))
+# print some stats
+n_chars = len(text)
+unique_chars = ''.join(sorted(set(text)))
+print("unique_chars:", unique_chars)
+n_unique_chars = len(unique_chars)
+print("Number of characters:", n_chars)
+print("Number of unique characters:", n_unique_chars)
+
+# dictionary that converts characters to integers
+char2int = {c: i for i, c in enumerate(unique_chars)}
+# dictionary that converts integers to characters
+int2char = {i: c for i, c in enumerate(unique_chars)}
+
+# save these dictionaries for later generation
+pickle.dump(char2int, open("wonderland-char2int.pickle", "wb"))
+pickle.dump(int2char, open("wonderland-int2char.pickle", "wb"))
+
+# hyper parameters
+sequence_length = 100
+step = 1
+batch_size = 128
+epochs = 40
+
+sentences = []
+y_train = []
+for i in range(0, len(text) - sequence_length, step):
+    sentences.append(text[i: i + sequence_length])
+    y_train.append(text[i+sequence_length])
+print("Number of sentences:", len(sentences))
+
+# vectorization
+X = np.zeros((len(sentences), sequence_length, n_unique_chars))
+y = np.zeros((len(sentences), n_unique_chars))
+
+for i, sentence in enumerate(sentences):
+    for t, char in enumerate(sentence):
+        X[i, t, char2int[char]] = 1
+        y[i, char2int[y_train[i]]] = 1
+
+print("X.shape:", X.shape)
+
+# building the model
+# model = Sequential([
+#     LSTM(128, input_shape=(sequence_length, n_unique_chars)),
+#     Dense(n_unique_chars, activation="softmax"),
+# ])
+
+# a better model (slower to train obviously)
+model = Sequential([
+    LSTM(256, input_shape=(sequence_length, n_unique_chars), return_sequences=True),
+    Dropout(0.3),
+    LSTM(256),
+    Dense(n_unique_chars, activation="softmax"),
+])
+
+# model.load_weights("results/wonderland-v2-2.48.h5")
+
+model.summary()
+model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
+
+if not os.path.isdir("results"):
+    os.mkdir("results")
+
+checkpoint = ModelCheckpoint("results/wonderland-v2-{loss:.2f}.h5", verbose=1)
+
+# train the model
+model.fit(X, y, batch_size=batch_size, epochs=epochs, callbacks=[checkpoint])
-Original file line number
+Diff line change
@@ @@ -0,0 +1,4 @@ @@
 +numpy
 +tensorflow==1.13.1
 +keras
 +requests