PROGRAM-1
OBJECTIVE-To get the input from user and perform numerical operations
(MAX, MIN, AVG, SUM, SQRT, ROUND) using in R.
Program-
#Create a Vector
> data=c(23,4,56,21,34,56,73)
> #Get the maximum value
> print(max(data))
[1] 73
> #Get the minimum value
> print(min(data))
[1] 4
> #Get the SUM-
> sum(data)
[1] 267
> #Get the AVG-
> print(mean(data))
[1] 38.14286
> #Get the SQRT-
> a=5
> print(sqrt(a))
[1] 2.236068
> a=5.2
> #Get the ROUND-
> print(round(a))
[1] 5
PROGRAMMED BY- SAURABH MISHRA
Sample Output-
MAX & MIN-
SUM & AVG-
SQRT & ROUND-
PROGRAMMED BY- SAURABH MISHRA
PROGRAM-2
OBJECTIVE- To perform data import/export (.CSV, .XLS, .TXT) operations using
data frames in R.
Program-
#.CSV
> read.data<-read.csv("C:/Users/saurabh/OneDrive/Desktop/College
Work/5th-Semester/I.D.A.V Lab/business-financial-data-june-2023-quarter-
csv.csv")
> print(read.data)
#.XLS
> install.packages("readxl")
> library(readxl)
> excel_data <-
read_excel("C:/Users/saurabh/Downloads/file_example_XLS_10.xls
")
> print(excel_data)
#.TXT
> txt_data <-
read.table("C:/Users/saurabh/OneDrive/Desktop/R_Language.txt", header =
TRUE, sep = "\t")
> print(excel_data)
Sample Output-
CSV FILE-
PROGRAMMED BY- SAURABH MISHRA
XML FILE-
TXT FILE-
PROGRAMMED BY- SAURABH MISHRA
PROGRAM-3
OBJECTIVE- To get the input matrix from user and perform Matrix addition,
subtraction, multiplication, inverse transpose and division operations using
vector concept in R.
Program-
#MATRIX CREATION
> {r = readline("Enter the number of rows:")}
> r = as.integer(r)
> {c = readline("Enter the number of columns:")}
> c = as.integer(c)
#MTRIX VALUES:
> A = scan()
> B = scan()
> M1 = matrix(A,nrow = r,ncol = c,byrow = TRUE)
> M2 = matrix(B,nrow = r,ncol = c,byrow = TRUE)
#MATRIX M1:
> print(M1)
#MATRIX M2:
> print(M2)
#1.ADDITION-
> print(M1+M2)
#2.SUBTRACTION-
>print(M1-M2)
#3.MULTIPLICATION-
>print(M1*M2)
PROGRAMMED BY- SAURABH MISHRA
Sample Output-
PROGRAMMED BY- SAURABH MISHRA
PROGRAM-4
OBJECTIVE- To perform statistical operations (Mean, Median, Mode and
Standard deviation) using R.
Program-
# DEFINING VECTOR
> a=c(23,84,16,95,23,6,41,29,6,4,6)
#1.MEAN-
> print(mean(a))
#2.MEDIAN-
> print(median(a))
#3.MODE-
> getmode <- function(a) {
uniqv <- unique(a)
uniqv[which.max(tabulate(match(a, uniqv)))]
}
> print(getmode(a))
#4.STANDARD DEVIATION-
> print(sd(a))
Sample Output-
PROGRAMMED BY- SAURABH MISHRA
PROGRAM-5
OBJECTIVE- To perform data pre-processing operation (1) Handle mining data (2)
Min-Max normalization
Program-
#Handle Mining Data-
x = c(NA,3,4,NA,NA,NA)
is.na(x)
x = c(NA,3 4,NA,NA,0/0,0/0)
is. nan (x)
#Min-Max Normlization-
install.packages("caret")
library(caret)
data = data.frame(Var1 = c(120, 345, 145, 522, 596, 285, 21), Var2 = c(10, 15, 45, 22, 53, 28, 12),
Var3 = c(-34, 0.05, 0-15, 0-12, -6, 0·85, 0.11))
#Creating Function To Implement Min-Max Scaling-
MinMax = function (x) {(x-min(x))/(max(x)-min(x))}
#Normalize Data Using Custom Function-
Normalized_My_Data = as.data.frame(apply(data, 1, MinMax)) head(Normalized_My_Data)
#Checking Summary After Normalization-
>summary (Normalized_My_Data)
PROGRAMMED BY- SAURABH MISHRA
Sample Output-
PROGRAMMED BY- SAURABH MISHRA
PROGRAM-6
OBJECTIVE- To perform dimensionality reduction operation using PCA for
houses data set in R.
Program-
> data("USArrests")
> rawdf <- na.omit(USArrests)
> names(rawdf)=c("Murder","Assault", "Assasination”, "UrbanPop")
> head(rawdf)
> arrests.pca <- prcomp(scale(USArrests),center = TRUE)
#Checking output of pca. prcomp function returns standard deviation
(sdev), rotation and loadings-
> names(arrests.pca)
> print(arrests.pca)
> summary(arrests.pca)
> pcaCharts(arrests.pca)
> biplot(arrests.pca,scale=0, cex=.7)
> pca.out <- arrests.pca
> pca.out$rotation <- -pca.out$rotation
> pca.out$x <- -pca.out$x
> biplot(pca.out,scale=0, cex=.7)
> pca.out$rotation[,1:2]
Sample Output-
PROGRAMMED BY- SAURABH MISHRA
PROGRAMMED BY- SAURABH MISHRA
PROGRAM-7
OBJECTIVE- To perform Simple Linear Regression with R.
Program-
# Input data
x <- c(1, 2, 3, 4, 5)
y <- c(2, 4, 5, 4, 5)
# Fit a linear regression model
model <- lm(y ~ x)
# Plot the data
plot(x, y, main = "Simple Linear Regression",
xlab = "Independent Variable",
ylab = "Dependent Variable",
pch = 16, col = "blue")
# Add the regression line
abline(model, col = "red")
Sample Output-
PROGRAMMED BY- SAURABH MISHRA
PROGRAM-8
OBJECTIVE- To perform single logistic regression with R.
Program-
# Install and load necessary packages
if(!require(caTools)) install.packages("caTools")
if(!require(ggplot2)) install.packages("ggplot2")
library(caTools)
library(ggplot2)
data <- read.csv("/gender_submission.csv")
target_variable <- 'passenger' # Example target variable - this likely
needs to be changed to a column in the data.
predictor_variable <- 'Survived' # Example predictor variable
# Adjust the condition based on the unique values printed above
# Example: if the unique values are "male" and "female":
# data$Sex <- ifelse(tolower(trimws(data$Sex)) %in% c("female", "male"),
ifelse(tolower(trimws(data$Sex)) == "female", 1, 0), NA)
# Assuming there is no 'Sex' column or you don't need it
# Comment out the line below if you do want to keep the 'Sex' column:
# data$Sex <- NULL # Removing the Sex column entirely
# Split the data into training and testing sets
set.seed(123) # For reproducibility
# Since 'passenger' column does not exist in this dataset
# Using 'Survived' as the target for the split to avoid the error.
split <- sample.split(data[[predictor_variable]], SplitRatio = 0.7)
training_set <- subset(data, split == TRUE)
testing_set <- subset(data, split == FALSE)
model <- glm(as.formula(paste(predictor_variable, "~",
predictor_variable)), family = binomial, data = training_set)
summary(model)
PROGRAMMED BY- SAURABH MISHRA
# Make predictions on the testing set
predictions <- predict(model, newdata = testing_set, type = "response")
predicted_classes <- ifelse(predictions > 0.5, 1, 0)
# Evaluate the model
confusion_matrix <- table(testing_set[[predictor_variable]],
predicted_classes)
print(confusion_matrix)
# Plot the logistic regression curve - the original plot may not be
relevant
# Plotting 'Survived' against itself for demonstration.
ggplot(training_set, aes_string(x = predictor_variable, y =
predictor_variable)) +
geom_point(aes(color = factor(training_set[[predictor_variable]])),
alpha = 0.5) +
stat_smooth(method = "glm", method.args = list(family = "binomial"), se
= FALSE, color = "blue") +
labs(title = "Logistic Regression Curve", x = "Predictor Variable", y =
"Probability of Target Variable") +
theme_minimal()
PROGRAMMED BY- SAURABH MISHRA
Sample Output-
PROGRAMMED BY- SAURABH MISHRA
PROGRAM – 10
OBJECTIVE – To perform association analysis on a given dataset and evaluate
its accuracy.
Program –
!pip install mlxtend --upgrade # Upgrade mlxtend to the latest version
import numpy as np
import pandas as pd
import seaborn as sns
from matplotlib import pyplot as plt
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder
from sklearn.metrics import classification_report, accuracy_score
from mlxtend.frequent_patterns import apriori, association_rules
# Load dataset
data = pd.read_csv('/content/Groceries_dataset.csv')
# Remove duplicate rows
data.drop_duplicates(inplace=True)
# Convert 'Date' column to datetime
data['Date'] = pd.to_datetime(data['Date'], format='%d-%m-%Y', errors='coerce')
# Check for missing values
data.isnull().sum()
# Extract date features (if Date column exists)
if 'Date' in data.columns:
data['year'] = data['Date'].dt.year
data['month'] = data['Date'].dt.month
data['day'] = data['Date'].dt.day
data['day_name'] = data['Date'].dt.day_name()
# Create item count per transaction
item_count = data.groupby(['Member_number', 'itemDescription',
'Date'])['itemDescription'].count().reset_index(name='Count')
PROGRAMMED BY- SAURABH MISHRA
# Add a 'Transaction' column
item_count['Transaction'] = item_count.groupby(['Member_number', 'Date']).ngroup() + 1
# Pivot the data
item_count_pivot = item_count.pivot_table(index='Transaction', columns='itemDescription',
values='Count', aggfunc='sum').fillna(0)
# Convert to integer
item_count_pivot = item_count_pivot.astype('int32')
# Apriori Algorithm
basket = item_count.groupby('Transaction')['itemDescription'].apply(list).reset_index()
basket_encoded = basket['itemDescription'].str.join('|').str.get_dummies('|')
# Generate frequent itemsets
frequent_itemsets = apriori(basket_encoded, min_support=0.01, use_colnames=True)
# Sort frequent itemsets
frequent_itemsets.sort_values('support', ascending=False).head(10)
# Generate association rules
rules = association_rules(frequent_itemsets, metric="lift", min_threshold=0.5, support_only=False,
num_itemsets=len(frequent_itemsets))
# Sort rules and display top 10
rules.sort_values('confidence', ascending=False)[['antecedents', 'consequents', 'support', 'lift',
'confidence']].head(10)
# Include 'confidence' in the column selection after sorting.
PROGRAMMED BY- SAURABH MISHRA
Sample Output-
PROGRAMMED BY- SAURABH MISHRA
PROGRAMMED BY- SAURABH MISHRA
PROGRAM-9
OBJECTIVE- To perform K-means clustering operation and visualize for iris data
set.
Program-
>install.packages(“stats”)
>install.packages(“cluster”)
>install.packages(“ClusterR”)
>library(stats)
>library(cluster)
>library(ClusterR)
> # Removing initial label of
> # Species from original dataset
> iris_1 <- iris[, -5]
> # Fitting K-Means clustering Model
> # to training dataset
> set.seed(240) # Setting seed
> kmeans.re <- kmeans(iris_1, centers = 3, nstart = 20)
> kmeans.re
> # Cluster identification for
> # each observation
> kmeans.re$cluster
> # Confusion Matrix
> cm <- table(iris$Species, kmeans.re$cluster)
> cm
> # Model Evaluation and visualization
> plot(iris_1[c("Sepal.Length", "Sepal.Width")])
> plot(iris_1[c("Sepal.Length", "Sepal.Width")],
+ col = kmeans.re$cluster)
PROGRAMMED BY- SAURABH MISHRA
> plot(iris_1[c("Sepal.Length", "Sepal.Width")],
+ col = kmeans.re$cluster,
+ main = "K-means with 3 clusters")
> ## Plotiing cluster centers
> kmeans.re$centers
> kmeans.re$centers[, c("Sepal.Length", "Sepal.Width")]
> # cex is font size, pch is symbol
> points(kmeans.re$centers[, c("Sepal.Length", "Sepal.Width")],
+ col = 1:3, pch = 8, cex = 3)
> ## Visualizing clusters
> y_kmeans <- kmeans.re$cluster
> clusplot(iris_1[, c("Sepal.Length", "Sepal.Width")],
+ y_kmeans,
+ lines = 0,
+ shade = TRUE,
+ color = TRUE,
+ labels = 2,
+ plotchar = FALSE,
+ span = TRUE,
+ main = paste("Cluster iris"),
+ xlab = 'Sepal.Length',
+ ylab = 'Sepal.Width')
Sample Output-
PROGRAMMED BY- SAURABH MISHRA
PROGRAMMED BY- SAURABH MISHRA