Skip to content

Commit 96fd0fb

Browse files
committed
update spam classifier tutorial to tensorflow
1 parent 972b3dc commit 96fd0fb

File tree

11 files changed

+356
-54
lines changed

11 files changed

+356
-54
lines changed
Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,4 @@
11
sklearn
2-
keras
32
tqdm
43
numpy
5-
keras_metrics
6-
tensorflow==1.15.4
4+
tensorflow
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.

machine-learning/nlp/spam-classifier/spam_classifier.py

Lines changed: 19 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -1,29 +1,20 @@
1-
# to use CPU uncomment below code
2-
# import os
3-
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152
4-
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
5-
6-
# import tensorflow as tf
7-
8-
# config = tf.ConfigProto(intra_op_parallelism_threads=5,
9-
# inter_op_parallelism_threads=5,
10-
# allow_soft_placement=True,
11-
# device_count = {'CPU' : 1,
12-
# 'GPU' : 0}
13-
# )
14-
15-
16-
from keras.preprocessing.text import Tokenizer
17-
from keras.preprocessing.sequence import pad_sequences
18-
from keras.utils import to_categorical
19-
from keras.callbacks import ModelCheckpoint, TensorBoard
1+
import tensorflow as tf
2+
gpus = tf.config.experimental.list_physical_devices('GPU')
3+
if gpus:
4+
# only use GPU memory that we need, not allocate all the GPU memory
5+
tf.config.experimental.set_memory_growth(gpus[0], enable=True)
6+
7+
from tensorflow.keras.preprocessing.text import Tokenizer
8+
from tensorflow.keras.preprocessing.sequence import pad_sequences
9+
from tensorflow.keras.utils import to_categorical
10+
from tensorflow.keras.callbacks import ModelCheckpoint, TensorBoard
2011
from sklearn.model_selection import train_test_split
2112
import time
2213
import numpy as np
2314
import pickle
2415

25-
from utils import get_embedding_vectors, get_model, SEQUENCE_LENGTH, EMBEDDING_SIZE, TEST_SIZE
26-
from utils import BATCH_SIZE, EPOCHS, int2label, label2int
16+
from utils import get_model, SEQUENCE_LENGTH, TEST_SIZE
17+
from utils import BATCH_SIZE, EPOCHS, label2int
2718

2819

2920
def load_data():
@@ -69,26 +60,25 @@ def load_data():
6960

7061
y = [ label2int[label] for label in y ]
7162
y = to_categorical(y)
72-
7363
print(y[0])
7464

7565
# split and shuffle
7666
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=TEST_SIZE, random_state=7)
77-
67+
# print our data shapes
68+
print("X_train.shape:", X_train.shape)
69+
print("X_test.shape:", X_test.shape)
70+
print("y_train.shape:", y_train.shape)
71+
print("y_test.shape:", y_test.shape)
7872
# constructs the model with 128 LSTM units
7973
model = get_model(tokenizer=tokenizer, lstm_units=128)
8074

8175
# initialize our ModelCheckpoint and TensorBoard callbacks
8276
# model checkpoint for saving best weights
83-
model_checkpoint = ModelCheckpoint("results/spam_classifier_{val_loss:.2f}", save_best_only=True,
77+
model_checkpoint = ModelCheckpoint("results/spam_classifier_{val_loss:.2f}.h5", save_best_only=True,
8478
verbose=1)
8579
# for better visualization
8680
tensorboard = TensorBoard(f"logs/spam_classifier_{time.time()}")
87-
# print our data shapes
88-
print("X_train.shape:", X_train.shape)
89-
print("X_test.shape:", X_test.shape)
90-
print("y_train.shape:", y_train.shape)
91-
print("y_test.shape:", y_test.shape)
81+
9282
# train the model
9383
model.fit(X_train, y_train, validation_data=(X_test, y_test),
9484
batch_size=BATCH_SIZE, epochs=EPOCHS,

machine-learning/nlp/spam-classifier/test.py

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,10 @@
1-
import os
2-
# os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152
3-
# os.environ["CUDA_VISIBLE_DEVICES"] = "-1"
4-
5-
# import tensorflow as tf
6-
7-
# config = tf.ConfigProto(intra_op_parallelism_threads=5,
8-
# inter_op_parallelism_threads=5,
9-
# allow_soft_placement=True,
10-
# device_count = {'CPU' : 1,
11-
# 'GPU' : 0}
12-
# )
13-
from utils import get_model, int2label, label2int
14-
from keras.preprocessing.sequence import pad_sequences
1+
import tensorflow as tf
2+
gpus = tf.config.experimental.list_physical_devices('GPU')
3+
if gpus:
4+
# only use GPU memory that we need, not allocate all the GPU memory
5+
tf.config.experimental.set_memory_growth(gpus[0], enable=True)
6+
from utils import get_model, int2label
7+
from tensorflow.keras.preprocessing.sequence import pad_sequences
158

169
import pickle
1710
import numpy as np
@@ -22,7 +15,8 @@
2215
tokenizer = pickle.load(open("results/tokenizer.pickle", "rb"))
2316

2417
model = get_model(tokenizer, 128)
25-
model.load_weights("results/spam_classifier_0.05")
18+
# change to the model name in results folder
19+
model.load_weights("results/spam_classifier_0.06.h5")
2620

2721
def get_predictions(text):
2822
sequence = tokenizer.texts_to_sequences([text])

0 commit comments

Comments
 (0)