## Foundations of Python for AI
### Essential Python Libraries for AI
# Artificial Intelligence
import numpy as np
# Create a NumPy array
array = np.array([1, 2, 3, 4, 5])
print("Array:", array)
# Perform mathematical operations
print("Mean:", np.mean(array))
print("Standard Deviation:", np.std(array))
# Reshape an array
reshaped_array = array.reshape((1, 5))
print("Reshaped Array:", reshaped_array)
import pandas as pd
# Load a dataset (Iris dataset from sklearn)
from sklearn.datasets import load_iris
iris = load_iris()
iris_df = pd.DataFrame(data=iris.data, columns=iris.feature_names)
# Add a target column
iris_df['target'] = iris.target
iris_df.head()
# Filter rows
filtered_df = iris_df[iris_df['target'] == 1]
print("Filtered DataFrame:")
print(filtered_df.head(7))
iris.feature_names[1]
import matplotlib.pyplot as plt
# Plot a scatter plot
plt.scatter(iris_df[iris.feature_names[0]], iris_df[iris.feature_names[1]], c=iris_df['target'],
cmap='viridis')
plt.title('Scatter Plot of Iris Dataset')
plt.xlabel(iris.feature_names[0])
plt.ylabel(iris.feature_names[1])
plt.colorbar(label='Target')
plt.show()
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
# Split the data into training and testing sets
X = iris_df[iris.feature_names]
y = iris_df['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# Train a logistic regression model
model = LogisticRegression(max_iter=200)
model.fit(X_train, y_train)
# Predict and evaluate
predictions = model.predict(X_test)
accuracy = accuracy_score(y_test, predictions)
print("Model Accuracy:", accuracy)
"""
X train and y train is for training the data
x test is for testing the model that already we train
y test is for compare the prediction (based on result from x test that we test)
"""
### Data Management Techniques
import pandas as pd
# Load the dataset
file_path = "2021-10-19-2024-11-01.csv"
# Use low_memory=True for large datasets
data = pd.read_csv(file_path, low_memory=True)
# Display basic information about the dataset
print("Dataset Info:")
data.info()
# Preview the first few rows
print("Dataset Head:")
print(data.head())
# Function to optimize memory usage
def optimize_memory(df):
for col in df.select_dtypes(include=['float', 'int']).columns:
df[col] = pd.to_numeric(df[col], downcast='unsigned')
for col in df.select_dtypes(include=['object']).columns:
num_unique_values = df[col].nunique()
num_total_values = len(df[col])
if num_unique_values / num_total_values < 0.5:
df[col] = df[col].astype('category')
return df
# Optimize memory usage
data_optimized = optimize_memory(data)
# Display memory usage before and after optimization
print("Memory Usage Before Optimization:", data.memory_usage(deep=True).sum())
print("Memory Usage After Optimization:", data_optimized.memory_usage(deep=True).sum())
# Check for missing values
missing_values = data.isnull().sum()
print("Missing Values:")
print(missing_values[missing_values > 0])
# Drop rows with missing critical information
data.dropna(subset=['tags'], inplace=True)
# original data have 984776 rows, but we delete the rows where tags is null
# so the data become just 491979 rows
# becareful using this, because theres a lot of data that we delete
"""
In my opinion, i always prefer CSV rather than json or parquet
"""
## Natural Language Processing (NLP)
### NLP Fundamentals
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer, WordNetLemmatizer
# Download required resources
nltk.download('punkt')
nltk.download('stopwords')
nltk.download('wordnet')
nltk.download('punkt_tab')
# Sample text
data = "Natural Language Processing is an exciting field of AI! NLP enables machines to
understand human language."
# Tokenization
tokens = word_tokenize(data)
print("Tokens:", tokens)
# Stop-word removal
stop_words = set(stopwords.words('english'))
filtered_tokens = [word for word in tokens if word.lower() not in stop_words]
print("Filtered Tokens:", filtered_tokens)
# Stemming
stemmer = PorterStemmer()
stemmed_words = [stemmer.stem(word) for word in filtered_tokens]
print("Stemmed Words:", stemmed_words)
# Lemmatization
lemmatizer = WordNetLemmatizer()
lemmatized_words = [lemmatizer.lemmatize(word) for word in filtered_tokens]
print("Lemmatized Words:", lemmatized_words)
# Bag-of-Words Representation
from sklearn.feature_extraction.text import CountVectorizer
# Sample corpus
corpus = [
"I love NLP and AI.",
"NLP is a subset of AI.",
"AI will revolutionize the world."
]
# Create the Bag-of-Words model
vectorizer = CountVectorizer()
BoW = vectorizer.fit_transform(corpus)
# Display the vocabulary and vectorized data
print("Vocabulary:", vectorizer.vocabulary_)
print("Bag-of-Words Representation:")
print(BoW.toarray())
from sklearn.feature_extraction.text import TfidfVectorizer
# Create the TF-IDF model
tfidf_vectorizer = TfidfVectorizer()
tfidf = tfidf_vectorizer.fit_transform(corpus)
# Display the vocabulary and TF-IDF weights
print("Vocabulary:", tfidf_vectorizer.vocabulary_)
print("TF-IDF Representation:")
print(tfidf.toarray())
### Working with Word Embeddings
from gensim.models import KeyedVectors
import gensim.downloader as api
# Load pre-trained Word2Vec embeddings
word2vec = api.load("word2vec-google-news-300")
# Check word similarity
similarity = word2vec.similarity('king', 'queen')
print(f"Similarity between 'king' and 'queen': {similarity}")
# Find most similar words
similar_words = word2vec.most_similar('king', topn=5)
print("Most similar words to 'king':", similar_words)
# Get vector representation of a word
vector = word2vec['king']
print("Vector for 'king':", vector[:10]) # Display the first 10 elements
import numpy as np
# Download and extract GloVe embeddings
!wget http://nlp.stanford.edu/data/glove.6B.zip -O glove.6B.zip
!unzip glove.6B.zip -d glove
# Load the 50-dimensional GloVe vectors
glove_path = 'glove/glove.6B.50d.txt'
embeddings_index = {}
with open(glove_path, 'r', encoding='utf-8') as f:
for line in f:
values = line.split()
word = values[0]
vector = np.array(values[1:], dtype='float32')
embeddings_index[word] = vector
# Check vector for a word
vector = embeddings_index.get('king')
print("Vector for 'king':", vector[:10]) # Display the first 10 elements
# Compute cosine similarity
from numpy.linalg import norm
similarity = np.dot(embeddings_index['king'], embeddings_index['queen']) / (
norm(embeddings_index['king']) * norm(embeddings_index['queen'])
print(f"Similarity between 'king' and 'queen': {similarity}")
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report
# Sample dataset
sentences = [
"I love machine learning.",
"AI is the future of technology.",
"Python is great for data science.",
"I enjoy solving problems with AI.",
"Machine learning is fascinating."
labels = [1, 1, 0, 1, 1] # 1 for positive, 0 for neutral
# Create sentence embeddings by averaging word vectors
def sentence_to_vector(sentence, embeddings_index):
words = sentence.split()
vectors = [embeddings_index[word] for word in words if word in embeddings_index]
if vectors:
return np.mean(vectors, axis=0)
else:
return np.zeros(50) # Assuming 50-dimensional embeddings
# Convert sentences to embeddings
sentence_vectors = [sentence_to_vector(sentence, embeddings_index) for sentence in
sentences]
# Train-test split
X_train, X_test, y_train, y_test = train_test_split(sentence_vectors, labels, test_size=0.2,
random_state=42)
# Train a classifier
clf = RandomForestClassifier()
clf.fit(X_train, y_train)
# Evaluate the model
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
sentences_example = [
"i like python",
"i like data analyst",
"learning is great for data science.",
# Convert sentences to embeddings
sentence_vectors_example = [sentence_to_vector(sentence, embeddings_index) for sentence
in sentences_example]
# Evaluate the model
y_pred = clf.predict(sentence_vectors_example)
y_pred
### NLP Applications
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report
# Sample dataset
sentences = [
"I love this product!",
"This is the worst service I've ever had.",
"Absolutely fantastic experience.",
"Not worth the money.",
"Amazing quality and quick delivery!"
labels = [1, 0, 1, 0, 1] # 1: Positive, 0: Negative
# Convert text to feature vectors
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(sentences)
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(X, labels, test_size=0.2, random_state=42)
# Train a Multinomial Naive Bayes classifier
clf = MultinomialNB()
clf.fit(X_train, y_train)
# Predict and evaluate
y_pred = clf.predict(X_test)
print(classification_report(y_test, y_pred))
from transformers import pipeline
# Initialize a sentiment analysis pipeline
sentiment_pipeline = pipeline("sentiment-analysis")
# Sample sentences
sentences = [
"I absolutely love this!",
"This is terrible.",
"Not bad, but could be better.",
"Fantastic experience overall!"
# Perform sentiment analysis
results = sentiment_pipeline(sentences)
for sentence, result in zip(sentences, results):
print(f"Sentence: {sentence}\nSentiment: {result['label']}, Score: {result['score']:.2f}\n")
import spacy
# Load the spaCy model
nlp = spacy.load("en_core_web_sm")
# Sample text
text = "Apple Inc. is looking at buying a startup in San Francisco for $1 billion."
# Process the text
doc = nlp(text)
# Extract entities
print("Named Entities:")
for ent in doc.ents:
print(f"{ent.text} ({ent.label_})")
# Combined pipeline: Text classification followed by NER
text = "The new iPhone has amazing features! It is priced at $999."
# Sentiment Analysis
sentiment = sentiment_pipeline([text])[0]
print(f"Sentiment: {sentiment['label']} (Score: {sentiment['score']:.2f})")
# Named Entity Recognition
doc = nlp(text)
print("Named Entities:")
for ent in doc.ents:
print(f"{ent.text} ({ent.label_})")
### Project - Build a Basic Text Classification Model
# Import libraries
from sklearn.feature_extraction.text import CountVectorizer, TfidfTransformer
from sklearn.model_selection import train_test_split
from sklearn.naive_bayes import MultinomialNB
from sklearn.metrics import classification_report, confusion_matrix, accuracy_score
import pandas as pd
# Sample dataset
data = {
'Review': [
"I love this product!",
"This is the worst experience I've ever had.",
"Absolutely fantastic service.",
"Not worth the money.",
"Great quality and quick delivery!",
"Terrible customer service.",
"Excellent value for money!",
"Awful, will never buy again."
],
'Sentiment': [1, 0, 1, 0, 1, 0, 1, 0] # 1: Positive, 0: Negative
# Create a DataFrame
df = pd.DataFrame(data)
# Display the dataset
print(df.head())
# Convert text to feature vectors
vectorizer = CountVectorizer()
X = vectorizer.fit_transform(df['Review'])
# Target variable
y = df['Sentiment']
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
print("Training data shape:", X_train.shape)
print("Test data shape:", X_test.shape)
# Initialize and train the Multinomial Naive Bayes classifier
clf = MultinomialNB()
clf.fit(X_train, y_train)
# Print the model parameters
print("Model trained successfully!")
# Make predictions on the test set
y_pred = clf.predict(X_test)
# Evaluate the model
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))
print("Accuracy:", accuracy_score(y_test, y_pred))
import matplotlib.pyplot as plt
import seaborn as sns
# Plot confusion matrix
conf_matrix = confusion_matrix(y_test, y_pred)
sns.heatmap(conf_matrix, annot=True, fmt='d', cmap='Blues', xticklabels=['Negative', 'Positive'],
yticklabels=['Negative', 'Positive'])
plt.xlabel('Predicted')
plt.ylabel('Actual')
plt.title('Confusion Matrix')
plt.show()
## Computer Vision
### Basics of Image Processing
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
# Load the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# Display the first image in the dataset
plt.imshow(X_train[0], cmap='gray')
plt.title("Sample Image")
plt.show()
print("Shape of training data:", X_train.shape)
print("Shape of test data:", X_test.shape)
# Example of grayscale conversion
image = cv2.imread(cv2.samples.findFile("cat.png")) # Replace with your image path
image_gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Display grayscale image
plt.imshow(image_gray, cmap='gray')
plt.title("Grayscale Image")
plt.axis('off')
plt.show()
# Apply edge detection to the first MNIST image
image_edges = cv2.Canny(X_train[0], threshold1=50, threshold2=150)
# Display edge-detected image
plt.imshow(image_edges, cmap='gray')
plt.title("Edge Detection")
plt.axis('off')
plt.show()
# Apply thresholding to the first MNIST image
_, image_thresholded = cv2.threshold(X_train[0], 127, 255, cv2.THRESH_BINARY)
# Display thresholded image
plt.imshow(image_thresholded, cmap='gray')
plt.title("Thresholded Image")
plt.axis('off')
plt.show()
from tensorflow.keras.preprocessing.image import ImageDataGenerator
# Define the data augmentation generator
data_gen = ImageDataGenerator(
rotation_range=20,
width_shift_range=0.2,
height_shift_range=0.2,
shear_range=0.2,
zoom_range=0.2,
horizontal_flip=False
# Expand dimensions of a single image for augmentation
sample_image = np.expand_dims(X_train[0], axis=(0, -1))
# Generate augmented images
augmented_images = data_gen.flow(sample_image, batch_size=1)
# Display some augmented images
plt.figure(figsize=(10, 5))
for i in range(5):
augmented_image = next(augmented_images)[0].astype('uint8')
plt.subplot(1, 5, i+1)
plt.imshow(augmented_image.squeeze(), cmap='gray')
plt.axis('off')
plt.suptitle("Augmented Images")
plt.show()
### Object Recognition and Feature Extraction
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
# Load the MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# Select a sample image
image = X_train[0]
# Display the image
plt.imshow(image, cmap='gray')
plt.title("Sample MNIST Image")
plt.axis('off')
plt.show()
# Resize the image to 128x128 for better feature extraction
image_resized = cv2.resize(image, (128, 128))
# Normalize the image
image_normalized = image_resized / 255.0
# Display the processed image
plt.imshow(image_normalized, cmap='gray')
plt.title("Resized and Normalized Image")
plt.axis('off')
plt.show()
# Convert image to uint8 format (required for SIFT)
image_uint8 = (image_resized).astype('uint8')
# Initialize SIFT detector
sift = cv2.SIFT_create()
# Detect keypoints and compute descriptors
keypoints, descriptors = sift.detectAndCompute(image_uint8, None)
# Draw keypoints on the image
image_with_keypoints = cv2.drawKeypoints(image_uint8, keypoints, None,
flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
# Display the image with keypoints
plt.imshow(image_with_keypoints, cmap='gray')
plt.title("SIFT Keypoints")
plt.axis('off')
plt.show()
# Initialize ORB detector
orb = cv2.ORB_create()
# Detect keypoints and compute descriptors
keypoints_orb, descriptors_orb = orb.detectAndCompute(image_uint8, None)
# Draw keypoints on the image
image_with_orb_keypoints = cv2.drawKeypoints(image_uint8, keypoints_orb, None,
flags=cv2.DRAW_MATCHES_FLAGS_DRAW_RICH_KEYPOINTS)
# Display the image with ORB keypoints
plt.imshow(image_with_orb_keypoints, cmap='gray')
plt.title("ORB Keypoints")
plt.axis('off')
plt.show()
# Create a second image by rotating the original
image_rotated = cv2.rotate(image_uint8, cv2.ROTATE_90_CLOCKWISE)
# Detect features in the rotated image
keypoints2, descriptors2 = orb.detectAndCompute(image_rotated, None)
# Match features using BFMatcher
bf = cv2.BFMatcher(cv2.NORM_HAMMING, crossCheck=True)
matches = bf.match(descriptors_orb, descriptors2)
matches = sorted(matches, key=lambda x: x.distance)
# Draw matches
image_matches = cv2.drawMatches(image_uint8, keypoints_orb, image_rotated, keypoints2,
matches[:10], None, flags=cv2.DrawMatchesFlags_NOT_DRAW_SINGLE_POINTS)
# Display the matches
plt.imshow(image_matches)
plt.title("Feature Matching")
plt.axis('off')
plt.show()
### Applications in Vision
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from sklearn.cluster import KMeans
# Load MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# Select a sample image
image = X_train[0]
# Display the image
plt.imshow(image, cmap='gray')
plt.title("Sample MNIST Image")
plt.axis('off')
plt.show()
# Reshape image for K-Means
image_reshaped = image.reshape((-1, 1))
image_reshaped = np.float32(image_reshaped)
# Apply K-Means clustering
kmeans = KMeans(n_clusters=2, random_state=42)
kmeans.fit(image_reshaped)
# Replace pixel values with their cluster centers
segmented_image = kmeans.cluster_centers_[kmeans.labels_]
segmented_image = segmented_image.reshape(image.shape)
# Display segmented image
plt.imshow(segmented_image, cmap='gray')
plt.title("Segmented Image")
plt.axis('off')
plt.show()
# Load a larger image (replace with your dataset if available)
image_large = cv2.resize(image, (128, 128))
# Create a template (a smaller region of interest)
template = image_large[30:70, 30:70]
# Match the template
result = cv2.matchTemplate(image_large, template, cv2.TM_CCOEFF_NORMED)
# Find the location of the best match
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(result)
top_left = max_loc
bottom_right = (top_left[0] + template.shape[1], top_left[1] + template.shape[0])
# Draw a rectangle around the matched region
image_matched = image_large.copy()
cv2.rectangle(image_matched, top_left, bottom_right, 255, 2)
# Display the result
plt.imshow(image_matched, cmap='gray')
plt.title("Template Matching")
plt.axis('off')
plt.show()
# Load Haar cascade for face detection
face_cascade = cv2.CascadeClassifier(cv2.data.haarcascades +
'haarcascade_frontalface_default.xml')
# Load a sample image (replace with a face image from your dataset)
image_color = cv2.cvtColor(image_large, cv2.COLOR_GRAY2BGR)
# Detect faces
faces = face_cascade.detectMultiScale(image_color, scaleFactor=1.1, minNeighbors=5,
minSize=(30, 30))
# Draw rectangles around detected faces
for (x, y, w, h) in faces:
cv2.rectangle(image_color, (x, y), (x+w, y+h), (255, 0, 0), 2)
# Display the result
plt.imshow(cv2.cvtColor(image_color, cv2.COLOR_BGR2RGB))
plt.title("Object Recognition: Face Detection")
plt.axis('off')
plt.show()
### Project - Create a Pipeline for Image Classification
import cv2
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.datasets import mnist
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, accuracy_score
# Load MNIST dataset
(X_train, y_train), (X_test, y_test) = mnist.load_data()
# Normalize the pixel values to the range [0, 1]
X_train = X_train / 255.0
X_test = X_test / 255.0
# Reshape the data to 2D arrays for feature extraction
X_train_flattened = X_train.reshape(X_train.shape[0], -1)
X_test_flattened = X_test.reshape(X_test.shape[0], -1)
# Split the training data into training and validation sets
X_train_split, X_val_split, y_train_split, y_val_split = train_test_split(
X_train_flattened, y_train, test_size=0.2, random_state=42)
print(f"Training data shape: {X_train_split.shape}")
print(f"Validation data shape: {X_val_split.shape}")
# Function to extract histogram features
def extract_features(images):
features = []
for img in images:
# Ensure image is in the correct format for OpenCV (grayscale and uint8)
img = (img * 255).astype(np.uint8)
# Compute histogram for the image
hist = cv2.calcHist([img], [0], None, [256], [0, 256])
hist = cv2.normalize(hist, hist).flatten()
features.append(hist)
return np.array(features)
# Extract features from the dataset
X_train_features = extract_features(X_train_split.reshape(-1, 28, 28))
X_val_features = extract_features(X_val_split.reshape(-1, 28, 28))
X_test_features = extract_features(X_test.reshape(-1, 28, 28))
print(f"Feature shape: {X_train_features.shape}")
# Train a Random Forest classifier
rf_classifier = RandomForestClassifier(n_estimators=100, random_state=42)
rf_classifier.fit(X_train_features, y_train_split)
# Validate the model
y_val_pred = rf_classifier.predict(X_val_features)
print("Validation Accuracy:", accuracy_score(y_val_split, y_val_pred))
# Test the model
y_test_pred = rf_classifier.predict(X_test_features)
# Print evaluation metrics
print("Test Accuracy:", accuracy_score(y_test, y_test_pred))
print("Classification Report:")
print(classification_report(y_test, y_test_pred))
# Visualize some predictions
for i in range(10):
plt.imshow(X_test[i], cmap='gray')
plt.title(f"True: {y_test[i]}, Predicted: {y_test_pred[i]}")
plt.axis('off')
plt.show()
## Search and Optimization in AI
### Fundamentals of Search
import numpy as np
from queue import Queue, LifoQueue, PriorityQueue
# For visualizing search paths
import networkx as nx
import matplotlib.pyplot as plt
# Create a sample graph for search algorithms
G = nx.Graph()
# Add nodes and edges
edges = [
('A', 'B', 1), ('A', 'C', 4),
('B', 'D', 2), ('B', 'E', 5),
('C', 'F', 3), ('D', 'G', 1),
('E', 'G', 1), ('F', 'G', 2)
]
G.add_weighted_edges_from(edges)
# Visualize the graph
pos = nx.spring_layout(G)
nx.draw(G, pos, with_labels=True, node_size=500, node_color='skyblue')
nx.draw_networkx_edge_labels(G, pos, edge_labels={(u, v): d for u, v, d in edges})
plt.title("Search Space Graph")
plt.show()
def bfs(graph, start, goal):
visited = set()
queue = Queue()
queue.put((start, [start]))
while not queue.empty():
node, path = queue.get()
if node in visited:
continue
visited.add(node)
if node == goal:
return path
for neighbor in graph[node]:
if neighbor not in visited:
queue.put((neighbor, path + [neighbor]))
# Convert graph to adjacency list
adj_list = {node: list(G.neighbors(node)) for node in G.nodes()}
# Perform BFS
path_bfs = bfs(adj_list, 'A', 'G')
print("BFS Path:", path_bfs)
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.colors import ListedColormap
from collections import deque
def bfs(maze, start, goal):
queue = deque([(start, [])])
visited = set()
while queue:
current, path = queue.popleft()
x, y = current
if current == goal:
return path + [current]
if current in visited:
continue
visited.add(current)
for dx, dy in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
nx, ny = x + dx, y + dy
if 0 <= nx < len(maze) and 0 <= ny < len(maze[0]) and maze[nx][ny] != 1:
queue.append(((nx, ny), path + [current]))
return None
def visualize_maze(maze, start, goal, path=None):
cmap = ListedColormap(['white', 'black', 'red', 'blue', 'green'])
bounds = [0, 0.5, 1.5, 2.5, 3.5, 4.5]
norm = plt.Normalize(bounds[0], bounds[-1])
fig, ax = plt.subplots()
ax.imshow(maze, cmap=cmap, norm=norm)
ax.scatter(start[1], start[0], color='yellow', marker='o', label='Start')
ax.scatter(goal[1], goal[0], color='purple', marker='o', label='Goal')
if path:
for node in path[1:-1]:
ax.scatter(node[1], node[0], color='green', marker='o')
ax.legend()
plt.show()
# Example maze
maze = np.array([
[0, 0, 0, 0, 0],
[1, 1, 0, 1, 1],
[0, 0, 0, 0, 0],
[0, 1, 1, 1, 0],
[0, 0, 0, 0, 0]
])
start = (0, 0)
goal = (4, 4)
path = bfs(maze, start, goal)
visualize_maze(maze, start, goal, path)
def visualize_maze(maze, start, goal, path=None):
cmap = ListedColormap(['white', 'black', 'red', 'blue', 'green'])
bounds = [0, 0.5, 1.5, 2.5, 3.5, 4.5]
norm = plt.Normalize(bounds[0], bounds[-1])
fig, ax = plt.subplots()
ax.imshow(maze, cmap=cmap, norm=norm)
ax.scatter(start[1], start[0], color='yellow', marker='o', label='Start')
ax.scatter(goal[1], goal[0], color='purple', marker='o', label='Goal')
if path:
for node in path[1:-1]:
ax.scatter(node[1], node[0], color='green', marker='o')
ax.legend()
plt.show()
def dfs(maze, start, goal):
stack = [(start, [])]
visited = set()
while stack:
current, path = stack.pop()
x, y = current
if current == goal:
return path + [current]
if current in visited:
continue
visited.add(current)
for dx, dy in [(0, 1), (0, -1), (1, 0), (-1, 0)]:
nx, ny = x + dx, y + dy
if 0 <= nx < len(maze) and 0 <= ny < len(maze[0]) and maze[nx][ny] != 1:
stack.append(((nx, ny), path + [current]))
return None
# Example maze
maze = np.array([
[0, 0, 0, 0, 0],
[1, 1, 0, 1, 1],
[0, 0, 0, 0, 0],
[0, 1, 1, 1, 0],
[0, 0, 0, 0, 0]
])
start = (0, 0)
goal = (4, 4)
path = dfs(maze, start, goal)
visualize_maze(maze, start, goal, path)
def heuristic(node, goal):
# Simple heuristic: Distance between nodes
h = {'A': 6, 'B': 4, 'C': 5, 'D': 2, 'E': 3, 'F': 4, 'G': 0}
return h[node]
def a_star(graph, start, goal):
visited = set()
pq = PriorityQueue()
pq.put((0, start, [start]))
while not pq.empty():
cost, node, path = pq.get()
if node in visited:
continue
visited.add(node)
if node == goal:
return path
for neighbor in graph[node]:
edge_cost = G[node][neighbor]['weight']
total_cost = cost + edge_cost + heuristic(neighbor, goal)
pq.put((total_cost, neighbor, path + [neighbor]))
# Perform A* Search
path_a_star = a_star(adj_list, 'A', 'G')
print("A* Path:", path_a_star)
# Python program for A* Search Algorithm
import math
import heapq
# Define the Cell class
class Cell:
def __init__(self):
# Parent cell's row index
self.parent_i = 0
# Parent cell's column index
self.parent_j = 0
# Total cost of the cell (g + h)
self.f = float('inf')
# Cost from start to this cell
self.g = float('inf')
# Heuristic cost from this cell to destination
self.h = 0
# Define the size of the grid
ROW = 9
COL = 10
# Check if a cell is valid (within the grid)
def is_valid(row, col):
return (row >= 0) and (row < ROW) and (col >= 0) and (col < COL)
# Check if a cell is unblocked
def is_unblocked(grid, row, col):
return grid[row][col] == 1
# Check if a cell is the destination
def is_destination(row, col, dest):
return row == dest[0] and col == dest[1]
# Calculate the heuristic value of a cell (Euclidean distance to destination)
def calculate_h_value(row, col, dest):
return ((row - dest[0]) ** 2 + (col - dest[1]) ** 2) ** 0.5
# Trace the path from source to destination
def trace_path(cell_details, dest):
print("The Path is ")
path = []
row = dest[0]
col = dest[1]
# Trace the path from destination to source using parent cells
while not (cell_details[row][col].parent_i == row and cell_details[row][col].parent_j == col):
path.append((row, col))
temp_row = cell_details[row][col].parent_i
temp_col = cell_details[row][col].parent_j
row = temp_row
col = temp_col
# Add the source cell to the path
path.append((row, col))
# Reverse the path to get the path from source to destination
path.reverse()
# Print the path
for i in path:
print("->", i, end=" ")
print()
# Implement the A* search algorithm
def a_star_search(grid, src, dest):
# Check if the source and destination are valid
if not is_valid(src[0], src[1]) or not is_valid(dest[0], dest[1]):
print("Source or destination is invalid")
return
# Check if the source and destination are unblocked
if not is_unblocked(grid, src[0], src[1]) or not is_unblocked(grid, dest[0], dest[1]):
print("Source or the destination is blocked")
return
# Check if we are already at the destination
if is_destination(src[0], src[1], dest):
print("We are already at the destination")
return
# Initialize the closed list (visited cells)
closed_list = [[False for _ in range(COL)] for _ in range(ROW)]
# Initialize the details of each cell
cell_details = [[Cell() for _ in range(COL)] for _ in range(ROW)]
# Initialize the start cell details
i = src[0]
j = src[1]
cell_details[i][j].f = 0
cell_details[i][j].g = 0
cell_details[i][j].h = 0
cell_details[i][j].parent_i = i
cell_details[i][j].parent_j = j
# Initialize the open list (cells to be visited) with the start cell
open_list = []
heapq.heappush(open_list, (0.0, i, j))
# Initialize the flag for whether destination is found
found_dest = False
# Main loop of A* search algorithm
while len(open_list) > 0:
# Pop the cell with the smallest f value from the open list
p = heapq.heappop(open_list)
# Mark the cell as visited
i = p[1]
j = p[2]
closed_list[i][j] = True
# For each direction, check the successors
directions = [(0, 1), (0, -1), (1, 0), (-1, 0),
(1, 1), (1, -1), (-1, 1), (-1, -1)]
for dir in directions:
new_i = i + dir[0]
new_j = j + dir[1]
# If the successor is valid, unblocked, and not visited
if is_valid(new_i, new_j) and is_unblocked(grid, new_i, new_j) and not closed_list[new_i]
[new_j]:
# If the successor is the destination
if is_destination(new_i, new_j, dest):
# Set the parent of the destination cell
cell_details[new_i][new_j].parent_i = i
cell_details[new_i][new_j].parent_j = j
print("The destination cell is found")
# Trace and print the path from source to destination
trace_path(cell_details, dest)
found_dest = True
return
else:
# Calculate the new f, g, and h values
g_new = cell_details[i][j].g + 1.0
h_new = calculate_h_value(new_i, new_j, dest)
f_new = g_new + h_new
# If the cell is not in the open list or the new f value is smaller
if cell_details[new_i][new_j].f == float('inf') or cell_details[new_i][new_j].f > f_new:
# Add the cell to the open list
heapq.heappush(open_list, (f_new, new_i, new_j))
# Update the cell details
cell_details[new_i][new_j].f = f_new
cell_details[new_i][new_j].g = g_new
cell_details[new_i][new_j].h = h_new
cell_details[new_i][new_j].parent_i = i
cell_details[new_i][new_j].parent_j = j
# If the destination is not found after visiting all cells
if not found_dest:
print("Failed to find the destination cell")
# Driver Code
def main():
# Define the grid (1 for unblocked, 0 for blocked)
grid = [
[1, 0, 1, 1, 1, 1, 0, 1, 1, 1],
[1, 1, 1, 0, 1, 1, 1, 0, 1, 1],
[1, 1, 1, 0, 1, 1, 0, 1, 0, 1],
[0, 0, 1, 0, 1, 0, 0, 0, 0, 1],
[1, 1, 1, 0, 1, 1, 1, 0, 1, 0],
[1, 0, 1, 1, 1, 1, 0, 1, 0, 0],
[1, 0, 0, 0, 0, 1, 0, 0, 0, 1],
[1, 0, 1, 1, 1, 1, 0, 1, 1, 1],
[1, 1, 1, 0, 0, 0, 1, 0, 0, 1]
# Define the source and destination
src = [8, 0]
dest = [0, 0]
# Run the A* search algorithm
a_star_search(grid, src, dest)
if __name__ == "__main__":
main()
print("BFS Path:", path_bfs)
print("A* Path:", path_a_star)
### Optimization Techniques
import numpy as np
# Define a simple quadratic function: f(x) = x^2
# Derivative of f(x): f'(x) = 2x
def gradient_descent(learning_rate=0.1, epochs=100):
x = 10 # Starting point
history = []
for epoch in range(epochs):
grad = 2 * x # Derivative of the function
x = x - learning_rate * grad
history.append(x)
print(f"Epoch {epoch+1}: x = {x:.4f}, f(x) = {x**2:.4f}")
return history
# Run Gradient Descent
history = gradient_descent()
import numpy as np
import random
# Define a fitness function that returns positive values
def fitness(x):
# Ensure fitness is positive by adding a constant offset
return max(1e-6, -(x ** 2) + 100)
# Initialize population
def initialize_population(size, bounds):
return [random.uniform(bounds[0], bounds[1]) for _ in range(size)]
# Selection
def select_parents(population):
fitness_values = [fitness(x) for x in population]
total_fitness = sum(fitness_values)
if total_fitness <= 0:
raise ValueError("Total of weights must be greater than zero")
return random.choices(population, k=2, weights=fitness_values)
# Crossover
def crossover(parent1, parent2):
return (parent1 + parent2) / 2
# Mutation
def mutate(child, mutation_rate=0.1):
if random.random() < mutation_rate:
return child + random.uniform(-1, 1)
return child
# Genetic Algorithm
def genetic_algorithm(bounds, population_size=10, generations=20):
population = initialize_population(population_size, bounds)
for generation in range(generations):
new_population = []
for _ in range(population_size):
parent1, parent2 = select_parents(population)
child = crossover(parent1, parent2)
child = mutate(child)
new_population.append(child)
population = new_population
best_individual = max(population, key=fitness)
print(f"Generation {generation+1}: Best fitness = {fitness(best_individual):.4f}, x =
{best_individual:.4f}")
# Run Genetic Algorithm
genetic_algorithm(bounds=(-10, 10))
import math
import random
# Define the function to minimize: f(x) = x^2
def f(x):
return x ** 2
# Simulated Annealing Algorithm
def simulated_annealing(initial_temp, cooling_rate, bounds):
x = random.uniform(bounds[0], bounds[1]) # Initial solution
current_temp = initial_temp
while current_temp > 1:
new_x = x + random.uniform(-1, 1) # Neighbor solution
new_x = max(min(new_x, bounds[1]), bounds[0])
delta_e = f(new_x) - f(x)
if delta_e < 0 or random.random() < math.exp(-delta_e / current_temp):
x = new_x
current_temp *= cooling_rate
return x
# Run Simulated Annealing
best_solution = simulated_annealing(initial_temp=1000, cooling_rate=0.9, bounds=(-10, 10))
print(f"Best solution found: x = {best_solution:.4f}, f(x) = {f(best_solution):.4f}")
### Applications of Search in AI
pip install --upgrade --user ortools
from ortools.sat.python import cp_model
# Create the model
model = cp_model.CpModel()
# Define tasks and durations
tasks = ['Task A', 'Task B', 'Task C']
durations = [3, 2, 4] # in hours
# Create start and end time variables
start_times = [model.NewIntVar(0, sum(durations), f'start_{task}') for task in tasks]
end_times = [model.NewIntVar(0, sum(durations), f'end_{task}') for task in tasks]
# Add constraints for task durations
for i in range(len(tasks)):
model.Add(end_times[i] == start_times[i] + durations[i])
# Add no-overlap constraints
for i in range(len(tasks)):
for j in range(i + 1, len(tasks)):
model.Add(start_times[i] >=
end_times[j]).OnlyEnforceIf(model.NewBoolVar(f'cond_{i}_{j}'))
# Objective: Minimize the makespan
makespan = model.NewIntVar(0, sum(durations), 'makespan')
model.AddMaxEquality(makespan, end_times)
model.Minimize(makespan)
# Solve the model
solver = cp_model.CpSolver()
status = solver.Solve(model)
if status == cp_model.OPTIMAL:
print("Optimal Schedule Found")
for i, task in enumerate(tasks):
print(f"{task}: Start at {solver.Value(start_times[i])}, End at {solver.Value(end_times[i])}")
else:
print("No Optimal Solution Found")
import matplotlib.pyplot as plt
def plot_schedule(tasks, start_times, durations):
plt.figure(figsize=(10, 5))
for i, task in enumerate(tasks):
plt.barh(i, durations[i], left=start_times[i])
plt.text(start_times[i] + durations[i] / 2, i, task, ha='center', va='center')
plt.xlabel("Time")
plt.ylabel("Tasks")
plt.yticks(range(len(tasks)), tasks)
plt.title("Task Schedule")
plt.show()
# Example usage
plot_schedule(['Task A', 'Task B', 'Task C'], [0, 3, 5], [3, 2, 4])
import numpy as np
import heapq
# Define the grid and heuristic
def create_grid():
return np.array([
[0, 0, 0, 1, 0],
[1, 1, 0, 1, 0],
[0, 0, 0, 0, 0],
[0, 1, 1, 1, 0],
[0, 0, 0, 1, 0]
])
def heuristic(a, b):
return abs(a[0] - b[0]) + abs(a[1] - b[1])
# A* Algorithm
def astar(grid, start, goal):
rows, cols = grid.shape
open_list = []
heapq.heappush(open_list, (0 + heuristic(start, goal), 0, start, []))
visited = set()
while open_list:
_, cost, current, path = heapq.heappop(open_list)
if current in visited:
continue
path = path + [current]
if current == goal:
return path
visited.add(current)
for dx, dy in [(-1, 0), (1, 0), (0, -1), (0, 1)]:
neighbor = (current[0] + dx, current[1] + dy)
if 0 <= neighbor[0] < rows and 0 <= neighbor[1] < cols and grid[neighbor] == 0:
heapq.heappush(open_list, (cost + heuristic(neighbor, goal), cost + 1, neighbor, path))
return None
# Run A*
grid = create_grid()
start = (0, 0)
goal = (4, 4)
path = astar(grid, start, goal)
print("Path:", path)
import matplotlib.pyplot as plt
def plot_grid(grid, path):
plt.imshow(grid, cmap="Greys", origin="upper")
for node in path:
plt.plot(node[1], node[0], "bo")
plt.show()
plot_grid(grid, path)
from scipy.optimize import linprog
# Define costs, constraints, and bounds
c = [1, 2, 3] # Cost coefficients
A = [[1, 1, 1], [2, 1, 2]] # Constraints
b = [7, 10] # Bounds for constraints
x_bounds = [(0, None), (0, None), (0, None)]
# Solve linear programming problem
result = linprog(c, A_ub=A, b_ub=b, bounds=x_bounds, method='highs')
print("Optimal Resource Allocation:", result.x)
## Reinforcement Learning
### Introduction to Reinforcement Learning
import gym
import matplotlib.pyplot as plt
# Create the CartPole environment
env = gym.make("CartPole-v1", render_mode="rgb_array")
# Initialize the environment
state = env.reset()
print("Initial State:", state)
# Render the environment (capture as image)
frame = env.render()
frame = frame[0] # Extract the 3D image array
plt.imshow(frame)
plt.axis('off')
plt.show()
# Close the environment
env.close()
import time
from IPython.display import clear_output, display
# Run a random policy
def random_policy(env, episodes=1):
for episode in range(episodes):
state = env.reset()
done = False
while not done:
action = env.action_space.sample() # Random action
state, reward, done, info = env.step(action)
frame = env.render()[0] # Extract the 3D image array
plt.imshow(frame)
plt.axis('off')
display(plt.gcf())
clear_output(wait=True)
time.sleep(0.05) # Slow down the visualization
env.close()
random_policy(env)
import matplotlib.pyplot as plt
# Simulate 100 steps and record rewards
rewards = []
state = env.reset()
for step in range(100):
action = env.action_space.sample()
_, reward, done, _ = env.step(action)
rewards.append(reward)
if done:
break
# Plot rewards
plt.plot(rewards)
plt.title("Rewards over Steps")
plt.xlabel("Steps")
plt.ylabel("Reward")
plt.show()
### Q-Learning Basics
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
# Define grid-world dimensions
grid_size = 5
reward_matrix = np.zeros((grid_size, grid_size))
# Define rewards
reward_matrix[4, 4] = 10 # Goal state
reward_matrix[2, 2] = -10 # Obstacle
# Visualization function
def plot_grid(reward_matrix, agent_pos):
grid = reward_matrix.copy()
grid[agent_pos] = 5 # Represent agent position
sns.heatmap(grid, annot=True, fmt=".1f", cmap="coolwarm")
plt.show()
# Initialize agent position
agent_pos = (0, 0)
plot_grid(reward_matrix, agent_pos)
# Define actions
actions = ["up", "down", "left", "right"]
q_table = np.zeros((grid_size, grid_size, len(actions)))
# Hyperparameters
alpha = 0.1 # Learning rate
gamma = 0.9 # Discount factor
epsilon = 0.1 # Exploration rate
def choose_action(state, epsilon):
if np.random.rand() < epsilon:
return np.random.choice(actions)
else:
return actions[np.argmax(q_table[state[0], state[1]])]
def take_action(state, action):
x, y = state
if action == "up":
x = max(0, x - 1)
elif action == "down":
x = min(grid_size - 1, x + 1)
elif action == "left":
y = max(0, y - 1)
elif action == "right":
y = min(grid_size - 1, y + 1)
return (x, y)
def update_q_table(state, action, reward, next_state):
action_idx = actions.index(action)
best_next_action = np.max(q_table[next_state[0], next_state[1]])
q_table[state[0], state[1], action_idx] += alpha * (reward + gamma * best_next_action -
q_table[state[0], state[1], action_idx])
# Training loop
num_episodes = 5
for episode in range(num_episodes):
state = (0, 0) # Start state
while state != (4, 4):
action = choose_action(state, epsilon)
next_state = take_action(state, action)
reward = reward_matrix[next_state[0], next_state[1]]
update_q_table(state, action, reward, next_state)
state = next_state
# Visualize the policy
def visualize_policy():
policy_grid = np.zeros((grid_size, grid_size), dtype=str)
action_symbols = ['↑', '↓', '←', '→']
for x in range(grid_size):
for y in range(grid_size):
if (x, y) == (4, 4):
policy_grid[x, y] = 'G' # Goal state
else:
best_action = np.argmax(q_table[x, y])
policy_grid[x, y] = action_symbols[best_action]
print("Optimal Policy:")
for row in policy_grid:
print(' '.join(row))
visualize_policy()
#### Trying Frozen Lake
import numpy as np
import gym
import random
env = gym.make("FrozenLake-v1")
action_size = env.action_space.n
state_size = env.observation_space.n
qtable = np.zeros((state_size, action_size))
print(qtable)
total_episodes = 10 # Total episodes
learning_rate = 0.8 # Learning rate
max_steps = 99 # Max steps per episode
gamma = 0.95 # Discounting rate
# Exploration parameters
epsilon = 1.0 # Exploration rate
max_epsilon = 1.0 # Exploration probability at start
min_epsilon = 0.01 # Minimum exploration probability
decay_rate = 0.005 # Exponential decay rate for exploration prob
# List of rewards
rewards = []
# 2 For life or until learning is stopped
for episode in range(total_episodes):
# Reset the environment
state = env.reset()
step = 0
done = False
total_rewards = 0
for step in range(max_steps):
# 3. Choose an action a in the current world state (s)
## First we randomize a number
exp_exp_tradeoff = random.uniform(0, 1)
## If this number > greater than epsilon --> exploitation (taking the biggest Q value for this
state)
if exp_exp_tradeoff > epsilon:
action = np.argmax(qtable[state,:])
# Else doing a random choice --> exploration
else:
action = env.action_space.sample()
# Take the action (a) and observe the outcome state(s') and reward (r)
new_state, reward, done, info = env.step(action)
# Update Q(s,a):= Q(s,a) + lr [R(s,a) + gamma * max Q(s',a') - Q(s,a)]
# qtable[new_state,:] : all the actions we can take from new state
qtable[state, action] = qtable[state, action] + learning_rate * (reward + gamma *
np.max(qtable[new_state, :]) - qtable[state, action])
total_rewards += reward
# Our new state is state
state = new_state
# If done (if we're dead) : finish episode
if done == True:
break
# Reduce epsilon (because we need less and less exploration)
epsilon = min_epsilon + (max_epsilon - min_epsilon)*np.exp(-decay_rate*episode)
rewards.append(total_rewards)
print ("Score over time: " + str(sum(rewards)/total_episodes))
print(qtable)
env.reset()
for episode in range(5):
state = env.reset()
step = 0
done = False
print("****************************************************")
print("EPISODE ", episode)
for step in range(max_steps):
# Take the action (index) that have the maximum expected future reward given that state
action = np.argmax(qtable[state,:])
new_state, reward, done, info = env.step(action)
if done:
# Here, we decide to only print the last state (to see if our agent is on the goal or fall into
an hole)
env.render()
# We print the number of step it took.
print("Number of steps", step)
break
state = new_state
env.close()
## Generative AI
### Introduction to Generative AI
import numpy as np
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import layers
# Generate synthetic data
def generate_data(n_samples=1000):
x = np.linspace(-1, 1, n_samples)
y = x**2 + np.random.normal(scale=0.1, size=n_samples)
return x, y
x, y = generate_data()
# Visualize the dataset
plt.scatter(x, y, s=5, label="Data")
plt.xlabel("x")
plt.ylabel("y")
plt.title("Synthetic Dataset")
plt.legend()
plt.show()
# Define the generative model
def build_generator():
model = tf.keras.Sequential([
layers.Dense(64, activation='relu', input_dim=1),
layers.Dense(64, activation='relu'),
layers.Dense(1)
])
return model
# Instantiate the generator
generator = build_generator()
generator.summary()
# Compile the model
generator.compile(optimizer=tf.keras.optimizers.Adam(learning_rate=0.01), loss='mse')
# Train the model
x_train = x.reshape(-1, 1)
generator.fit(x_train, y, epochs=50, batch_size=32, verbose=1)
# Generate data
x_new = np.linspace(-1, 1, 100).reshape(-1, 1)
y_new = generator.predict(x_new)
# Visualize the results
plt.scatter(x, y, s=5, label="Original Data")
plt.plot(x_new, y_new, color='red', label="Generated Data")
plt.xlabel("x")
plt.ylabel("y")
plt.title("Generated Data Using the Generative Model")
plt.legend()
plt.show()
### Autoencoders
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense
from tensorflow.keras.datasets import mnist
# Load the MNIST dataset
(x_train, _), (x_test, _) = mnist.load_data()
# Normalize the data to range [0, 1]
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.
# Flatten the images for training
x_train = x_train.reshape((len(x_train), np.prod(x_train.shape[1:])))
x_test = x_test.reshape((len(x_test), np.prod(x_test.shape[1:])))
print("Training data shape:", x_train.shape)
print("Test data shape:", x_test.shape)
# Define the encoding dimension
encoding_dim = 32 # Size of the latent space
# Input placeholder
input_img = Input(shape=(784,))
# Encoder layers
encoded = Dense(128, activation='relu')(input_img)
encoded = Dense(encoding_dim, activation='relu')(encoded)
# Decoder layers
decoded = Dense(128, activation='relu')(encoded)
decoded = Dense(784, activation='sigmoid')(decoded)
# Autoencoder model
autoencoder = Model(input_img, decoded)
# Encoder model (for visualization)
encoder = Model(input_img, encoded)
# Compile the autoencoder
autoencoder.compile(optimizer='adam', loss='binary_crossentropy')
autoencoder.summary()
# Train the autoencoder
autoencoder.fit(x_train, x_train,
epochs=50,
batch_size=256,
shuffle=True,
validation_data=(x_test, x_test))
# Encode and decode some images
encoded_imgs = encoder.predict(x_test)
decoded_imgs = autoencoder.predict(x_test)
# Visualize original and reconstructed images
n = 10 # Number of images to display
plt.figure(figsize=(20, 4))
for i in range(n):
# Display original
ax = plt.subplot(2, n, i + 1)
plt.imshow(x_test[i].reshape(28, 28), cmap='gray')
plt.title("Original")
plt.axis('off')
# Display reconstruction
ax = plt.subplot(2, n, i + 1 + n)
plt.imshow(decoded_imgs[i].reshape(28, 28), cmap='gray')
plt.title("Reconstructed")
plt.axis('off')
plt.show()
### Applications of Generative AI
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import make_classification
from imblearn.over_sampling import SMOTE
# Generate synthetic classification data
X, y = make_classification(n_samples=1000,
n_features=2,
n_informative=2,
n_redundant=0,
n_clusters_per_class=1,
random_state=42)
# Visualize the data
plt.scatter(X[:, 0], X[:, 1], c=y, cmap='viridis', s=20)
plt.title("Synthetic Data using make_classification")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.show()
# Introduce imbalance in the dataset
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, stratify=y, random_state=42)
# Make the dataset imbalanced
y_train_imbalanced = y_train.copy()
y_train_imbalanced[y_train == 0] = 0
# Apply SMOTE to balance the dataset
smote = SMOTE(random_state=42)
X_resampled, y_resampled = smote.fit_resample(X_train, y_train_imbalanced)
# Visualize the resampled data
plt.scatter(X_resampled[:, 0], X_resampled[:, 1], c=y_resampled, cmap='viridis', s=20)
plt.title("Balanced Data using SMOTE")
plt.xlabel("Feature 1")
plt.ylabel("Feature 2")
plt.show()
# Generate synthetic time-series data
import pandas as pd
import numpy as np
time = pd.date_range(start='2023-01-01', periods=100, freq='D')
synthetic_series = np.sin(np.linspace(0, 20, 100)) + np.random.normal(scale=0.1, size=100)
# Plot the time-series data
plt.figure(figsize=(10, 5))
plt.plot(time, synthetic_series, label='Synthetic Time-Series')
plt.title("Synthetic Time-Series Data")
plt.xlabel("Time")
plt.ylabel("Value")
plt.legend()
plt.show()
### Project - Generate Realistic Text Using an AI-based Language Model
# Install required libraries
!pip install transformers
# Import necessary modules
from transformers import AutoTokenizer, AutoModelForCausalLM
# Load a pre-trained model (GPT-2 in this case)
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Input text for the model
input_text = "Once upon a time in a distant land"
# Tokenize the input text
tokens = tokenizer.encode(input_text, return_tensors="pt")
# Generate text using the model
generated_tokens = model.generate(tokens,
max_length=50,
num_return_sequences=1,
no_repeat_ngram_size=2,
top_k=50,
top_p=0.95,
temperature=0.7)
# Decode the generated tokens to text
generated_text = tokenizer.decode(generated_tokens[0], skip_special_tokens=True)
print("Generated Text:\n", generated_text)