|
1 |
| -# to use CPU uncomment below code |
2 |
| -# import os |
3 |
| -# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 |
4 |
| -# os.environ["CUDA_VISIBLE_DEVICES"] = "-1" |
5 |
| - |
6 |
| -# import tensorflow as tf |
7 |
| - |
8 |
| -# config = tf.ConfigProto(intra_op_parallelism_threads=5, |
9 |
| -# inter_op_parallelism_threads=5, |
10 |
| -# allow_soft_placement=True, |
11 |
| -# device_count = {'CPU' : 1, |
12 |
| -# 'GPU' : 0} |
13 |
| -# ) |
14 |
| - |
15 |
| - |
16 |
| -from keras.preprocessing.text import Tokenizer |
17 |
| -from keras.preprocessing.sequence import pad_sequences |
18 |
| -from keras.utils import to_categorical |
19 |
| -from keras.callbacks import ModelCheckpoint, TensorBoard |
| 1 | +import tensorflow as tf |
| 2 | +gpus = tf.config.experimental.list_physical_devices('GPU') |
| 3 | +if gpus: |
| 4 | + # only use GPU memory that we need, not allocate all the GPU memory |
| 5 | + tf.config.experimental.set_memory_growth(gpus[0], enable=True) |
| 6 | + |
| 7 | +from tensorflow.keras.preprocessing.text import Tokenizer |
| 8 | +from tensorflow.keras.preprocessing.sequence import pad_sequences |
| 9 | +from tensorflow.keras.utils import to_categorical |
| 10 | +from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard |
20 | 11 | from sklearn.model_selection import train_test_split
|
21 | 12 | import time
|
22 | 13 | import numpy as np
|
23 | 14 | import pickle
|
24 | 15 |
|
25 |
| -from utils import get_embedding_vectors, get_model, SEQUENCE_LENGTH, EMBEDDING_SIZE, TEST_SIZE |
26 |
| -from utils import BATCH_SIZE, EPOCHS, int2label, label2int |
| 16 | +from utils import get_model, SEQUENCE_LENGTH, TEST_SIZE |
| 17 | +from utils import BATCH_SIZE, EPOCHS, label2int |
27 | 18 |
|
28 | 19 |
|
29 | 20 | def load_data():
|
@@ -69,26 +60,25 @@ def load_data():
|
69 | 60 |
|
70 | 61 | y = [ label2int[label] for label in y ]
|
71 | 62 | y = to_categorical(y)
|
72 |
| - |
73 | 63 | print(y[0])
|
74 | 64 |
|
75 | 65 | # split and shuffle
|
76 | 66 | X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=7)
|
77 |
| - |
| 67 | +# print our data shapes |
| 68 | +print("X_train.shape:", X_train.shape) |
| 69 | +print("X_test.shape:", X_test.shape) |
| 70 | +print("y_train.shape:", y_train.shape) |
| 71 | +print("y_test.shape:", y_test.shape) |
78 | 72 | # constructs the model with 128 LSTM units
|
79 | 73 | model = get_model(tokenizer=tokenizer, lstm_units=128)
|
80 | 74 |
|
81 | 75 | # initialize our ModelCheckpoint and TensorBoard callbacks
|
82 | 76 | # model checkpoint for saving best weights
|
83 |
| -model_checkpoint = ModelCheckpoint("results/spam_classifier_{val_loss:.2f}", save_best_only=True, |
| 77 | +model_checkpoint = ModelCheckpoint("results/spam_classifier_{val_loss:.2f}.h5", save_best_only=True, |
84 | 78 | verbose=1)
|
85 | 79 | # for better visualization
|
86 | 80 | tensorboard = TensorBoard(f"logs/spam_classifier_{time.time()}")
|
87 |
| -# print our data shapes |
88 |
| -print("X_train.shape:", X_train.shape) |
89 |
| -print("X_test.shape:", X_test.shape) |
90 |
| -print("y_train.shape:", y_train.shape) |
91 |
| -print("y_test.shape:", y_test.shape) |
| 81 | + |
92 | 82 | # train the model
|
93 | 83 | model.fit(X_train, y_train, validation_data=(X_test, y_test),
|
94 | 84 | batch_size=BATCH_SIZE, epochs=EPOCHS,
|
|
0 commit comments