IRIS Commands Practice

https://r4ds.had.co.nz/transform.
html ---link for r commands
install.packages("dplyr");
install.packages("plyr")
install.packages("readr")
install.packages("FSelector");
library(dplyr);
library(readr)
library(FSelector)
library(plyr)
library(dplyr);
library(FSelector);
> setwd("C:\\Users\\aqiba\\OneDrive\\Desktop\\Data Analytics\\IRIS dataset");

> mydata <- read_csv("iris_new.csv");
-- Column specification --------------------------------------------------------

cols(
sepal.length = col_double(),
sepal.width = col_double(),
petal.length = col_double(),
petal.width = col_double(),
variety = col_character()
)
> mydata
> filter(mydata, variety =="Setosa" & sepal.length > 4);
> summary(mydata)
sepal.length sepal.width petal.length petal.width
Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
Median :5.800 Median :3.000 Median :4.350 Median :1.300
Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
variety
Length:150
Class :character
Mode :character
> transform(mydata, variety =="Setose");

> show(mydata)
> View(mydata)
> select(mydata, variety, sepal.length);
> mutate(mydata, total = sepal.width + petal.width);

> select(mydata, total);
> View(mydata)
> transform(mydata, sepal.width = -sepal.width);
> View(mydata)
> v_change_sepal <- transform(mydata, sepal.width = -sepal.width);
> v_change_sepal
rename(iris_date_new, c("variety" = "species")); ------to rename any column of data
set
sum(is.na(iris_null_data)); ---for finding null values in dataset

[1] 5
OR
any(is.na(data)) -----for checking null values if any in entire dataframe
> for (i in which(sapply(iris_null_data, is.numeric))) {

+ iris_null_data[is.na(iris_null_data[, i]), i] <- mean(iris_null_data[, i],
na.rm = TRUE) + } -----for replacing null values
> newdata <- na.omit(iris_null_data)
------------------------links ------------------------------
https://subscription.packtpub.com/book/big_data_and_business_intelligence/978178528
6544/1/ch01lvl1sec11/data-preprocessing-techniques---- preprocessing steps
http://dataanalyticsedge.com/2018/05/02/data-cleaning-using-r/ ------for funtions
of checking missing values
-----------------------------------
regression----------------------------------------------------------------
https://www.datacamp.com/community/tutorials/linear-regression-R
?USJudgeRatings
head(USJudgeRatings)
USJUDGE_DATE <- USJudgeRatings
X <- as.matrix([-10]);
X <- as.matrix(USJUDGE_DATE[-10]);
X
X
X <- as.matrix(USJUDGE_DATE[1]);
X
X
V_REG <- lm(RTEN ~ CONT + INTG, data = USJUDGE_DATE);
V_REG
plot(V_REG);
plot(V_REG);
abline(V_REG);
V_REG <- lm(RTEN ~ CONT data = USJUDGE_DATE);
V_REG <- lm(RTEN ~ CONT, data = USJUDGE_DATE);
abline(V_REG);
v_plot <- abline(V_REG);
v_plot
abline(V_REG);
plot(V_REG, pch = 16, col = "blue");
---------------------------------
CORRELATION--------------------------------------------------------------
head(iris_new)
> x <- iris_new[1:1];

> y <- iris_new[2:2];
> y
# A tibble: 150 x 1
sepal.width
<dbl>
1 3.5
2 3
3 3.2
4 3.1
5 3.6
6 3.9
7 3.4
8 3.4
9 2.9
10 3.1
# ... with 140 more rows
> x
# A tibble: 150 x 1
sepal.length
<dbl>
1 5.1
2 4.9
3 4.7
4 4.6
5 5
6 5.4
7 4.6
8 5
9 4.4
10 4.9
# ... with 140 more rows
> v_get_correlation <- cor(x,y);
> v_get_correlation
sepal.width
sepal.length -0.1175698
> v_get_correlation <- cor(x,y, method = "spearman");
> v_get_correlation
sepal.width
sepal.length -0.1667777
> install.packages("ggpubr");
Error in install.packages : Updating loaded packages
> install.packages("ggpubr")
WARNING: Rtools is required to build R packages but is not currently installed.
Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Installing package into ‘C:/Users/aqiba/OneDrive/Documents/R/win-library/4.0’
(as ‘lib’ is unspecified)
>
>
>
> library(ggpubr);
Attaching package: ‘ggpubr’
The following object is masked from ‘package:plyr’:
mutate
> plot(v_get_correlation);
> ggqqplot(iris_new$sepal.length,iris_new$petal.width);
Error in data[, x] : incorrect number of dimensions
> ggqqplot(iris_new$sepal.length, ylab = "sepal_length");
> ggqqplot(iris_new$petal.width, ylab = "petal_width");
------------------------------------------------------------------
install.packages("rpart.plot");
WARNING: Rtools is required to build R packages but is not currently installed.
Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Installing package into ‘C:/Users/aqiba/OneDrive/Documents/R/win-library/4.0’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/rpart.plot_3.0.9.zip'
Content type 'application/zip' length 1034182 bytes (1009 KB)
downloaded 1009 KB
package ‘rpart.plot’ successfully unpacked and MD5 sums checked
The downloaded binary packages are in

C:\Users\aqiba\AppData\Local\Temp\RtmpcVOHBJ\downloaded_packages
> install.packages("data.tree");
> install.packages("party");
> library(dplyr);
> library(readr);
> library(plyr);
-------------------------------------------------------------------------------
You have loaded plyr after dplyr - this is likely to cause problems.
If you need functions from both plyr and dplyr, please load plyr first, then dplyr:
library(plyr); library(dplyr)
-------------------------------------------------------------------------------
> setwd("C:\\Users\\aqiba\\OneDrive\\Desktop\\Data Analytics\\IRIS dataset");
> iris_data <- read.csv("iris_new.csv");
> iris_data;
> library(rpart);
> library(rpart.plot);
> create_tree <- rpart(variety ~ sepal.length, data = iris_data);
> create_tree
n= 150
node), split, n, loss, yval, (yprob)

* denotes terminal node
1) root 150 100 Setosa (0.33333333 0.33333333 0.33333333)

2) sepal.length< 5.45 52 7 Setosa (0.86538462 0.11538462 0.01923077) *
3) sepal.length>=5.45 98 49 Virginica (0.05102041 0.44897959 0.50000000)
6) sepal.length< 6.15 43 15 Versicolor (0.11627907 0.65116279 0.23255814) *
7) sepal.length>=6.15 55 16 Virginica (0.00000000 0.29090909 0.70909091) *
> rpart.plot(create_tree, extra = 7);
Warning message:
extra=7 but the response has 3 levels (only the 2nd level is displayed)
> View(iris_data)
> create_tree <- rpart(variety ~ sepal.length + sepal.width, data = iris_data);
> create_tree
n= 150
node), split, n, loss, yval, (yprob)

1) root 150 100 Setosa (0.33333333 0.33333333 0.33333333)

2) sepal.length< 5.45 52 7 Setosa (0.86538462 0.11538462 0.01923077)
4) sepal.width>=2.8 45 1 Setosa (0.97777778 0.02222222 0.00000000) *
5) sepal.width< 2.8 7 2 Versicolor (0.14285714 0.71428571 0.14285714) *
3) sepal.length>=5.45 98 49 Virginica (0.05102041 0.44897959 0.50000000)
6) sepal.length< 6.15 43 15 Versicolor (0.11627907 0.65116279 0.23255814)
12) sepal.width>=3.1 7 2 Setosa (0.71428571 0.28571429 0.00000000) *
13) sepal.width< 3.1 36 10 Versicolor (0.00000000 0.72222222 0.27777778) *
7) sepal.length>=6.15 55 16 Virginica (0.00000000 0.29090909 0.70909091) *
Warning message:
extra=7 but the response has 3 levels (only the 2nd level is displayed)
-----------------------other method for

tree---------------------------------------------
create_tree <- ctree(sepal.width ~ sepal.length, data = iris_data);

> create_tree
Model formula:
sepal.width ~ sepal.length
Fitted party:
[1] root: 3.057 (n = 150, err = 28.3)
Number of inner nodes: 0

Number of terminal nodes: 1
> plot(create_tree);
> create_tree <- ctree(petal.length ~ sepal.length, data = iris_data);
> create_tree
Model formula:
petal.length ~ sepal.length
Fitted party:
[1] root
| [2] sepal.length <= 5.5
| | [3] sepal.length <= 5.4: 1.769 (n = 52, err = 34.1)
| | [4] sepal.length > 5.4: 3.229 (n = 7, err = 10.2)
| [5] sepal.length > 5.5
| | [6] sepal.length <= 6.2
| | | [7] sepal.length <= 5.8: 3.924 (n = 21, err = 25.8)
| | | [8] sepal.length > 5.8: 4.711 (n = 19, err = 3.3)
| | [9] sepal.length > 6.2
| | | [10] sepal.length <= 7: 5.169 (n = 39, err = 8.9)
| | | [11] sepal.length > 7: 6.300 (n = 12, err = 1.4)
Number of inner nodes: 5

Number of terminal nodes: 6
> create_tree <- rpart(petal.length ~ sepal.length, data = iris_data);
The 'extra' argument:

0 No extra information
1 Number of observations in the node
2 Class models: Classification rate (ncorrect/nobservations)
Poisson and exp models: number of events
3 Class models: Misclassification rate
4 Class models: Probability per class
5 Class models: Like 4 but don't display the fitted class
6 Class models: Probability of second class only
8 Class models: Probability of the fitted class
9 Class models: Probability relative to all observations
10 Class models: like 9 but display the probability of the second class only
Add 100 to also display the percentage of observations in the node
Error: extra=3 is legal only for "class" models (you have an "anova" model)
> rpart.plot(create_tree);
The 'extra' argument:

0 No extra information
1 Number of observations in the node
2 Class models: Classification rate (ncorrect/nobservations)
Poisson and exp models: number of events
3 Class models: Misclassification rate
4 Class models: Probability per class
6 Class models: Probability of second class only
8 Class models: Probability of the fitted class
9 Class models: Probability relative to all observations
10 Class models: like 9 but display the probability of the second class only
Add 100 to also display the percentage of observations in the node
Error: extra=2 is legal only for "class", "poisson" and "exp" models (you have an
"anova" model)
> create_tree
n= 150
node), split, n, deviance, yval

1) root 150 464.325400 3.758000

2) sepal.length< 5.55 59 57.404070 1.942373
4) sepal.length< 5.45 52 34.090770 1.769231 *
5) sepal.length>=5.45 7 10.174290 3.228571 *
3) sepal.length>=5.55 91 86.327470 4.935165
6) sepal.length< 6.25 40 35.249750 4.297500
12) sepal.length< 5.85 21 25.778100 3.923810 *
13) sepal.length>=5.85 19 3.297895 4.710526 *
7) sepal.length>=6.25 51 22.056470 5.435294
14) sepal.length< 7.05 39 8.923077 5.169231 *
15) sepal.length>=7.05 12 1.400000 6.300000 *
----------------------------------------------------------------------------neural
network-----------------
v_neural <- read.csv("neural_net.csv");

> v_neural
TCK CSS placed
1 20 90 1
2 10 20 0
3 30 40 0
4 20 50 0
5 80 50 1
6 30 80 1
> nn=neuralnet(placed~TCK+CSS,data=v_neural, hidden=3,act.fct = "logistic",

+ linear.output = FALSE);
> plot(nn)
--------------------------------------------------------------------
clustering---------------------------------------------
>iris_data
> Iris_cluster = iris_data;
> Iris_cluster
> Iris_cluster$variety = NULL;
> Iris_cluster
> create_cluster <- kmeans(Iris_cluster,3);
> create_cluster
K-means clustering with 3 clusters of sizes 38, 50, 62
Cluster means:
1 6.850000 3.073684 5.742105 2.071053
2 5.006000 3.428000 1.462000 0.246000
3 5.901613 2.748387 4.393548 1.433871
Clustering vector:
[1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[39] 2 2 2 2 2 2 2 2 2 2 2 2 3 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
[77] 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 3 1 1 1 1 3 1 1 1 1 1 1 3
[115] 3 1 1 1 1 3 1 3 1 3 1 1 3 3 1 1 1 1 1 3 1 1 1 1 3 1 1 1 3 1 1 1 3 1 1 3
Within cluster sum of squares by cluster:

[1] 23.87947 15.15100 39.82097
(between_SS / total_SS = 88.4 %)
Available components:
[1] "cluster" "centers" "totss" "withinss" "tot.withinss"

[6] "betweenss" "size" "iter" "ifault"
> create_cluster
K-means clustering with 2 clusters of sizes 97, 53
Cluster means:
1 6.301031 2.886598 4.958763 1.695876
2 5.005660 3.369811 1.560377 0.290566
Clustering vector:
[1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[39] 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[77] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[115] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1

[1] 123.79588 28.55208

> create_cluster
K-means clustering with 3 clusters of sizes 38, 62, 50
Cluster means:
1 6.850000 3.073684 5.742105 2.071053
2 5.901613 2.748387 4.393548 1.433871
3 5.006000 3.428000 1.462000 0.246000
Clustering vector:
[1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
[39] 3 3 3 3 3 3 3 3 3 3 3 3 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[77] 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 1 1 2 1 1 1 1 1 1 2
[115] 2 1 1 1 1 2 1 2 1 2 1 1 2 2 1 1 1 1 1 2 1 1 1 1 2 1 1 1 2 1 1 1 2 1 1 2

[1] 23.87947 39.82097 15.15100

> create_cluster$size
[1] 38 62 50
>
>
>
>
> table(iris_data$variety, create_cluster);
Error in table(iris_data$variety, create_cluster) :
all arguments must have the same length
> table(iris_data$variety, create_cluster$cluster);
1 2 3
Setosa 0 0 50
Versicolor 2 48 0
Virginica 36 14 0
> plot(iris_data[c("petal.length", "petal.width")], col = create_cluster$cluster);
> plot(iris_data[c("sepal.length", "sepal.width")], col = create_cluster$cluster);
>
---------------------------to find if there is any null values in entire data

set----------------------------------
any(is.na(v_iris_data));
---------------------------to remove all rows having null values or N/A value in

any row----------------------------
v_get_new_iris_data <- na.omit(v_iris_data);
> v_get_new_iris_data;
-----------------------------------principle component
analysis---------------------------------
----https://www.datacamp.com/community/tutorials/pca-analysis-r
------https://aaronschlegel.me/principal-component-analysis-r-example.html
v_get_pca <- prcomp(v_get_new_iris_data[,1:3]);

> v_get_pca
>install.packages("ggfortify");
pca.plot <- autoplot(v_get_pca, data = v_get_new_iris_data, colour = 'Group');
Error in autoplot(v_get_pca, data = v_get_new_iris_data, colour = "Group") :
could not find function "autoplot"
> library(ggfortify);
> pca.plot <- autoplot(v_get_pca, data = v_get_new_iris_data, colour = 'Group');
> pca.plot
Error: Unknown colour name: Group
> pca.plot <- autoplot(v_get_pca, data = v_get_new_iris_data, colour = 'Red');
> pca.plot
-----------------------------------------create hisogram
------------------------------
> hist(v_get_new_iris_data$petal.length);
-----------------------------str command as similar to

summary---------------------------
str(v_get_new_iris_data);
----------------------------------
correlation-------------------------------------------
x <- v_get_new_iris_data[1:1];
> y <- v_get_new_iris_data[3:3];
> v_find_correlation <- cor(x,y);
> v_find_correlation;
petal.length
sepal.length 0.8679478
> v_find_correlation <- cor(x,y, method = "spearman");
petal.length
> v_find_correlation <- cor(x,y, method = "kendal");
> v_find_correlation <- cor(x,y, method = "kendal");
petal.length
> library(ggplot2)
> plot(v_find_correlation);
?mtcar
No documentation for ‘mtcar’ in specified packages and libraries:
you could try ‘??mtcar’
> mtcars
> v_find_correlation <- cor(mtcars);
> v_find_correlation
> library(corrplot)
> plot(mtcars, method="circle");
> plot(mtcars, method="pie");
> install.packages("PerformanceAnalytics");
> library("PerformanceAnalytics");
> my_data <- mtcars[, c(1,3,4,5,6,7)]
> chart.Correlation(my_data, histogram=TRUE, pch=19);
------------------------------------regression
line---------------------------------
abline(lm(mpg~wt), col="red") # regression line (y~x)
lines(lowess(wt,mpg), col="blue") # lowess line (x,y)
> install.packages("ggcorrplot");
> v_linear_regression <- lm(iris$Petal.Length ~ iris$Sepal.Length +
iris$Sepal.Width, data = iris);
> v_linear_regression;
Call:
lm(formula = iris$Petal.Length ~ iris$Sepal.Length + iris$Sepal.Width,
data = iris)
Coefficients:
(Intercept) iris$Sepal.Length iris$Sepal.Width
-2.525 1.776 -1.339
> library(ggplot2);
> plot(v_linear_regression);

IRIS Commands Practice

Uploaded by

Copyright:

Available Formats

IRIS Commands Practice

Uploaded by

Document Information

Copyright

Available Formats

Share this document

Share or Embed Document

Sharing Options

Did you find this document useful?

Is this content inappropriate?

Copyright:

Available Formats

IRIS Commands Practice

Uploaded by

Copyright:

Available Formats

https://r4ds.had.co.nz/transform.

html ---link for r commands

> setwd("C:\\Users\\aqiba\\OneDrive\\Desktop\\Data Analytics\\IRIS dataset");

-- Column specification --------------------------------------------------------

> transform(mydata, variety =="Setose");

> mutate(mydata, total = sepal.width + petal.width);

sum(is.na(iris_null_data)); ---for finding null values in dataset

any(is.na(data)) -----for checking null values if any in entire dataframe

> for (i in which(sapply(iris_null_data, is.numeric))) {

> x <- iris_new[1:1];

Attaching package: ‘ggpubr’

The following object is masked from ‘package:plyr’:

package ‘rpart.plot’ successfully unpacked and MD5 sums checked

The downloaded binary packages are in

node), split, n, loss, yval, (yprob)

1) root 150 100 Setosa (0.33333333 0.33333333 0.33333333)

node), split, n, loss, yval, (yprob)

1) root 150 100 Setosa (0.33333333 0.33333333 0.33333333)

> rpart.plot(create_tree, extra = 3);

-----------------------other method for

create_tree <- ctree(sepal.width ~ sepal.length, data = iris_data);

Number of inner nodes: 0

Number of inner nodes: 5

The 'extra' argument:

Add 100 to also display the percentage of observations in the node

The 'extra' argument:

Add 100 to also display the percentage of observations in the node

node), split, n, deviance, yval

1) root 150 464.325400 3.758000

v_neural <- read.csv("neural_net.csv");

> nn=neuralnet(placed~TCK+CSS,data=v_neural, hidden=3,act.fct = "logistic",

Within cluster sum of squares by cluster:

[1] "cluster" "centers" "totss" "withinss" "tot.withinss"

Within cluster sum of squares by cluster:

[1] "cluster" "centers" "totss" "withinss" "tot.withinss"

Within cluster sum of squares by cluster:

[1] "cluster" "centers" "totss" "withinss" "tot.withinss"

---------------------------to find if there is any null values in entire data

---------------------------to remove all rows having null values or N/A value in

v_get_pca <- prcomp(v_get_new_iris_data[,1:3]);

-----------------------------str command as similar to

You might also like