H2_AndresAlcivar

ANDRES ALCIVAR
SOONGSIL UNIVERSITY
1. GRADIENT_1D
import numpy as np
import matplotlib.pylab as plt
# Define the numerical differentiation function

def numerical_diff(f, x):
h = 1e-4 # Small number to approximate the derivative
return (f(x + h) - f(x - h)) / (2 * h) # Central difference formula
# Define the function for which we want to calculate the derivative

def function_1(x):
return 0.01 * x ** 2 + 0.1 * x
# Function to create a tangent line at a given point x

def tangent_line(f, x):
d = numerical_diff(f, x) # Calculate the derivative at x
print(d) # Print the derivative for verification
y = f(x) - d * x # Calculate the y-intercept of the tangent line
return lambda t: d * t + y # Return the linear function representing
the tangent line
# Create an array of x values from 0 to 20 with an increment of 0.1

x = np.arange(0.0, 20.0, 0.1)
y = function_1(x) # Calculate y values for the function
# Labels for the plot

plt.xlabel("x")
plt.ylabel("f(x)")
# Get the tangent line function at x = 5

tf = tangent_line(function_1, 5)
y2 = tf(x) # Calculate y values for the tangent line
# Plot the original function and its tangent line at x = 5

plt.plot(x, y)
plt.plot(x, y2)
plt.show() # Display the plot
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
2. GRADIENT_2D
import numpy as np
from mpl_toolkits.mplot3d import Axes3D
# Function to compute the numerical gradient of a scalar function at a

single point
def _numerical_gradient_no_batch(f, x):
h = 1e-4 # Small number for finite difference approximation
grad = np.zeros_like(x) # Initialize gradient array with the same
shape as x
# Iterate over all dimensions of x to calculate partial

derivatives
for idx in range(x.size):
tmp_val = x[idx] # Store the original value
x[idx] = float(tmp_val) + h
fxh1 = f(x) # Evaluate f at x+h
x[idx] = tmp_val - h
fxh2 = f(x) # Evaluate f at x-h
grad[idx] = (fxh1 - fxh2) / (2 * h) # Compute the partial
derivative
x[idx] = tmp_val # Restore the original value of x[idx]
return grad # Return the gradient vector
# Function to compute the numerical gradient of a function at one or

more points
def numerical_gradient(f, X):
if X.ndim == 1:
return _numerical_gradient_no_batch(f, X) # Handle single
point case
else:
grad = np.zeros_like(X) # Initialize gradient matrix
# Compute gradient for each point in X

for idx, x in enumerate(X):
grad[idx] = _numerical_gradient_no_batch(f, x)
return grad # Return the gradient matrix
# Function to be minimized (in this case, a simple sum of squares

function)
def function_2(x):
if x.ndim == 1:
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
return np.sum(x**2)
else:
return np.sum(x**2, axis=1)
# Main script execution starts here

if __name__ == '__main__':
# Generate grid points for plotting
x0 = np.arange(-2, 2.5, 0.25)
x1 = np.arange(-2, 2.5, 0.25)
X, Y = np.meshgrid(x0, x1) # Create a meshgrid for the 2D plot
X = X.flatten() # Flatten the grid arrays for processing

Y = Y.flatten()
# Compute the gradient of function_2 at each grid point

grad = numerical_gradient(function_2, np.array([X, Y]).T).T
# Plotting
plt.figure()
plt.quiver(X, Y, -grad[0], -grad[1], angles="xy",
color="#666666") # Draw arrows showing gradient direction
plt.xlim([-2, 2])
plt.ylim([-2, 2])
plt.xlabel('x0') # Label for the x-axis
plt.ylabel('x1') # Label for the y-axis
plt.grid() # Add a grid to the plot
plt.draw() # Render the plot
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
3. GRADIENT_METHOD
import numpy as np
from gradient_2d import numerical_gradient # Import the numerical
gradient function
# Function to perform gradient descent on a given function f

def gradient_descent(f, init_x, lr=0.01, step_num=100):
x = init_x # Initialize x with the starting point
x_history = [] # List to store the history of x values during the
descent
# Perform the gradient descent for a given number of steps

for i in range(step_num):
x_history.append(x.copy()) # Add the current x value to the
history
grad = numerical_gradient(f, x) # Compute the gradient at

current x
x -= lr * grad # Update x by moving against the gradient
return x, np.array(x_history) # Return the final position and the

history of x values
# Function to be optimized, in this case, a simple quadratic function

def function_2(x):
return x[0]**2 + x[1]**2 # Return the sum of squares of x
# Initial point from which the gradient descent starts

init_x = np.array([-3.0, 4.0])
# Set the learning rate and number of steps for the gradient descent
lr = 0.1
step_num = 20
x, x_history = gradient_descent(function_2, init_x, lr=lr,
step_num=step_num)
# Plotting setup
plt.plot([-5, 5], [0, 0], '--b') # Draw x-axis
plt.plot([0, 0], [-5, 5], '--b') # Draw y-axis
plt.plot(x_history[:, 0], x_history[:, 1], 'o') # Plot the history of x
positions
# Set plot limits and labels

plt.xlim(-3.5, 3.5)
plt.ylim(-4.5, 4.5)
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
plt.xlabel("X0")
plt.ylabel("X1")
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
4. GRADIENT_SIMPLENET
import sys, os
# Append the parent directory to sys.path to import modules from there
sys.path.append(os.pardir)
import numpy as np # Import numpy for numerical operations
from common.functions import softmax, cross_entropy_error # Import
common functions used in neural networks
from common.gradient import numerical_gradient # Import the gradient
computation function
# Define a simple neural network class

class simpleNet:
def __init__(self):
# Initialize the weights randomly using a Gaussian distribution
self.W = np.random.randn(2, 3)
# Define the prediction function using matrix multiplication

def predict(self, x):
return np.dot(x, self.W)
# Define the loss function which calculates the total loss for a
given input and true output
def loss(self, x, t):
z = self.predict(x) # Get the score vector by predicting
y = softmax(z) # Apply softmax to get probability distribution
loss = cross_entropy_error(y, t) # Calculate the cross-entropy
loss
return loss
# Create an instance of the neural network

net = simpleNet()
# Test data
x = np.array([0.6, 0.9]) # Input data
t = np.array([0, 0, 1]) # True label (one-hot encoded)
# Define the function to compute the loss

f = lambda w: net.loss(x, t) # Lambda function for loss computation
dW = numerical_gradient(f, net.W) # Calculate the gradient of the loss
with respect to the weights
print(dW) # Print the gradient of the weights

ANDRES ALCIVAR
SOONGSIL UNIVERSITY
5. TWO_LAYER_NET
import sys, os
# Append the parent directory to the system path to allow importing
modules from there
from common.functions import * # Import common neural network
functions like sigmoid and softmax
from common.gradient import numerical_gradient # Import the function
to compute numerical gradients
import numpy as np # Import numpy for matrix and vector computations
# Define a class for a two-layer neural network

class TwoLayerNet:
# Constructor to initialize the network architecture and
parameters
def __init__(self, input_size, hidden_size, output_size,
weight_init_std=0.01):
# Initialize weights and biases
self.params = {}
self.params['W1'] = weight_init_std *
np.random.randn(input_size, hidden_size)
self.params['b1'] = np.zeros(hidden_size)
self.params['W2'] = weight_init_std *
np.random.randn(hidden_size, output_size)
self.params['b2'] = np.zeros(output_size)
# Method to compute the output of the network for a given input

def predict(self, x):
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
# Forward pass through the network

a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
y = softmax(a2)
return y # Return the network output
# Compute the loss function for a given set of input and true
output
def loss(self, x, t):
y = self.predict(x)
return cross_entropy_error(y, t) # Return the cross-entropy
loss
# Calculate the accuracy of the network on a given set of data

def accuracy(self, x, t):
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
y = self.predict(x)
y = np.argmax(y, axis=1)
t = np.argmax(t, axis=1)
accuracy = np.sum(y == t) / float(x.shape[0]) # Compute the

accuracy
return accuracy
# Compute numerical gradients of the loss with respect to the

parameters
def numerical_gradient(self, x, t):
loss_W = lambda W: self.loss(x, t) # Define the loss function
# Calculate gradients for each parameter

grads = {}
grads['W1'] = numerical_gradient(loss_W, self.params['W1'])
grads['b1'] = numerical_gradient(loss_W, self.params['b1'])
grads['W2'] = numerical_gradient(loss_W, self.params['W2'])
grads['b2'] = numerical_gradient(loss_W, self.params['b2'])
return grads # Return the computed gradients
# Compute analytical gradients of the loss with respect to the

parameters
def gradient(self, x, t):
# Unpack parameters
W1, W2 = self.params['W1'], self.params['W2']
b1, b2 = self.params['b1'], self.params['b2']
grads = {}
batch_num = x.shape[0]
# Forward pass: compute activations

a1 = np.dot(x, W1) + b1
z1 = sigmoid(a1)
a2 = np.dot(z1, W2) + b2
y = softmax(a2)
# Backward pass: compute gradients

dy = (y - t) / batch_num
grads['W2'] = np.dot(z1.T, dy)
grads['b2'] = np.sum(dy, axis=0)
dz1 = np.dot(dy, W2.T)

da1 = sigmoid_grad(a1) * dz1
grads['W1'] = np.dot(x.T, da1)
grads['b1'] = np.sum(da1, axis=0)
return grads # Return the computed gradients

ANDRES ALCIVAR
SOONGSIL UNIVERSITY
6. TRAINING_NEURALNET
import sys, os
# Append the parent directory to the system path to allow importing from
there
import numpy as np # Numerical computing library
import matplotlib.pyplot as plt # Plotting library
from dataset.mnist import load_mnist # Function to load the MNIST
dataset
from two_layer_net import TwoLayerNet # Class representing a two-layer
neural network
# Load the MNIST dataset with normalization and one-hot encoding of the
labels
(x_train, t_train), (x_test, t_test) = load_mnist(normalize=True,
one_hot_label=True)
# Initialize the neural network with the specified architecture

network = TwoLayerNet(input_size=784, hidden_size=50, output_size=10)
# Define training parameters

iters_num = 10000 # Number of training iterations
train_size = x_train.shape[0] # Total number of training samples
batch_size = 100 # Number of samples in each mini-batch
learning_rate = 0.1 # Learning rate for weight updates
# Lists to store the loss and accuracy values for plotting

train_loss_list = []
train_acc_list = []
test_acc_list = []
# Calculate the number of iterations per epoch (full pass through the
training data)
iter_per_epoch = max(train_size / batch_size, 1)
# Training loop
for i in range(iters_num):
# Randomly select a mini-batch of samples
batch_mask = np.random.choice(train_size, batch_size)
x_batch = x_train[batch_mask]
t_batch = t_train[batch_mask]
# Compute the gradient of the loss function

grad = network.gradient(x_batch, t_batch)
# Update the network parameters based on the gradient

for key in ('W1', 'b1', 'W2', 'b2'):
network.params[key] -= learning_rate * grad[key]
ANDRES ALCIVAR
SOONGSIL UNIVERSITY
# Calculate and record the loss for the current mini-batch

loss = network.loss(x_batch, t_batch)
train_loss_list.append(loss)
# At the end of each epoch, evaluate and print the training and test
accuracy
if i % iter_per_epoch == 0:
train_acc = network.accuracy(x_train, t_train)
test_acc = network.accuracy(x_test, t_test)
train_acc_list.append(train_acc)
test_acc_list.append(test_acc)
print("train acc, test acc | " + str(train_acc) + ", " +
str(test_acc))
# Plotting the training and test accuracies

markers = {'train': 'o', 'test': 's'} # Markers for the plot
x = np.arange(len(train_acc_list)) # Epochs on the x-axis
plt.plot(x, train_acc_list, label='train acc') # Training accuracy plot
plt.plot(x, test_acc_list, label='test acc', linestyle='--') # Test
accuracy plot
plt.xlabel("epochs") # X-axis label
plt.ylabel("accuracy") # Y-axis label
plt.ylim(0, 1.0) # Set y-axis limits
plt.legend(loc='lower right') # Add a legend
ANDRES ALCIVAR
SOONGSIL UNIVERSITY

H2_AndresAlcivar

Uploaded by

Copyright:

Available Formats

H2_AndresAlcivar

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

H2_AndresAlcivar

Uploaded by

Copyright:

Available Formats

ANDRES ALCIVAR

# Define the numerical differentiation function

# Define the function for which we want to calculate the derivative

# Function to create a tangent line at a given point x

# Create an array of x values from 0 to 20 with an increment of 0.1

# Labels for the plot

# Get the tangent line function at x = 5

# Plot the original function and its tangent line at x = 5

# Function to compute the numerical gradient of a scalar function at a

# Iterate over all dimensions of x to calculate partial

x[idx] = tmp_val # Restore the original value of x[idx]

return grad # Return the gradient vector

# Function to compute the numerical gradient of a function at one or

# Compute gradient for each point in X

return grad # Return the gradient matrix

# Function to be minimized (in this case, a simple sum of squares

# Main script execution starts here

X = X.flatten() # Flatten the grid arrays for processing

# Compute the gradient of function_2 at each grid point

# Function to perform gradient descent on a given function f

# Perform the gradient descent for a given number of steps

grad = numerical_gradient(f, x) # Compute the gradient at

return x, np.array(x_history) # Return the final position and the

# Function to be optimized, in this case, a simple quadratic function

# Initial point from which the gradient descent starts

# Set plot limits and labels

# Define a simple neural network class

# Define the prediction function using matrix multiplication

# Create an instance of the neural network

# Define the function to compute the loss

print(dW) # Print the gradient of the weights

# Define a class for a two-layer neural network

# Method to compute the output of the network for a given input

# Forward pass through the network

return y # Return the network output

# Calculate the accuracy of the network on a given set of data

accuracy = np.sum(y == t) / float(x.shape[0]) # Compute the

# Compute numerical gradients of the loss with respect to the

# Calculate gradients for each parameter

return grads # Return the computed gradients

# Compute analytical gradients of the loss with respect to the

# Forward pass: compute activations

# Backward pass: compute gradients

dz1 = np.dot(dy, W2.T)

return grads # Return the computed gradients

# Initialize the neural network with the specified architecture

# Define training parameters

# Lists to store the loss and accuracy values for plotting

# Compute the gradient of the loss function

# Update the network parameters based on the gradient

# Calculate and record the loss for the current mini-batch

# Plotting the training and test accuracies

You might also like