IRIS Commands Practice
IRIS Commands Practice
IRIS Commands Practice
install.packages("dplyr");
install.packages("plyr")
install.packages("readr")
install.packages("FSelector");
library(dplyr);
library(readr)
library(FSelector)
library(plyr)
library(dplyr);
library(FSelector);
> mydata
> filter(mydata, variety =="Setosa" & sepal.length > 4);
> summary(mydata)
sepal.length sepal.width petal.length petal.width
Min. :4.300 Min. :2.000 Min. :1.000 Min. :0.100
1st Qu.:5.100 1st Qu.:2.800 1st Qu.:1.600 1st Qu.:0.300
Median :5.800 Median :3.000 Median :4.350 Median :1.300
Mean :5.843 Mean :3.057 Mean :3.758 Mean :1.199
3rd Qu.:6.400 3rd Qu.:3.300 3rd Qu.:5.100 3rd Qu.:1.800
Max. :7.900 Max. :4.400 Max. :6.900 Max. :2.500
variety
Length:150
Class :character
Mode :character
------------------------links ------------------------------
https://subscription.packtpub.com/book/big_data_and_business_intelligence/978178528
6544/1/ch01lvl1sec11/data-preprocessing-techniques---- preprocessing steps
http://dataanalyticsedge.com/2018/05/02/data-cleaning-using-r/ ------for funtions
of checking missing values
-----------------------------------
regression----------------------------------------------------------------
https://www.datacamp.com/community/tutorials/linear-regression-R
?USJudgeRatings
head(USJudgeRatings)
USJUDGE_DATE <- USJudgeRatings
X <- as.matrix([-10]);
X <- as.matrix(USJUDGE_DATE[-10]);
X
X <- as.matrix(USJUDGE_DATE[-7]);
X
X <- as.matrix(USJUDGE_DATE[1]);
X
X <- as.matrix(USJUDGE_DATE[-8]);
X
V_REG <- lm(RTEN ~ CONT + INTG, data = USJUDGE_DATE);
V_REG
plot(V_REG);
plot(V_REG);
abline(V_REG);
V_REG <- lm(RTEN ~ CONT data = USJUDGE_DATE);
V_REG <- lm(RTEN ~ CONT, data = USJUDGE_DATE);
abline(V_REG);
v_plot <- abline(V_REG);
v_plot
abline(V_REG);
plot(V_REG, pch = 16, col = "blue");
---------------------------------
CORRELATION--------------------------------------------------------------
head(iris_new)
https://cran.rstudio.com/bin/windows/Rtools/
Installing package into ‘C:/Users/aqiba/OneDrive/Documents/R/win-library/4.0’
(as ‘lib’ is unspecified)
>
>
>
> library(ggpubr);
mutate
> plot(v_get_correlation);
> ggqqplot(iris_new$sepal.length,iris_new$petal.width);
Error in data[, x] : incorrect number of dimensions
> ggqqplot(iris_new$sepal.length, ylab = "sepal_length");
> ggqqplot(iris_new$petal.width, ylab = "petal_width");
------------------------------------------------------------------
install.packages("rpart.plot");
WARNING: Rtools is required to build R packages but is not currently installed.
Please download and install the appropriate version of Rtools before proceeding:
https://cran.rstudio.com/bin/windows/Rtools/
Installing package into ‘C:/Users/aqiba/OneDrive/Documents/R/win-library/4.0’
(as ‘lib’ is unspecified)
trying URL 'https://cran.rstudio.com/bin/windows/contrib/4.0/rpart.plot_3.0.9.zip'
Content type 'application/zip' length 1034182 bytes (1009 KB)
downloaded 1009 KB
Model formula:
sepal.width ~ sepal.length
Fitted party:
[1] root: 3.057 (n = 150, err = 28.3)
Model formula:
petal.length ~ sepal.length
Fitted party:
[1] root
| [2] sepal.length <= 5.5
| | [3] sepal.length <= 5.4: 1.769 (n = 52, err = 34.1)
| | [4] sepal.length > 5.4: 3.229 (n = 7, err = 10.2)
| [5] sepal.length > 5.5
| | [6] sepal.length <= 6.2
| | | [7] sepal.length <= 5.8: 3.924 (n = 21, err = 25.8)
| | | [8] sepal.length > 5.8: 4.711 (n = 19, err = 3.3)
| | [9] sepal.length > 6.2
| | | [10] sepal.length <= 7: 5.169 (n = 39, err = 8.9)
| | | [11] sepal.length > 7: 6.300 (n = 12, err = 1.4)
Error: extra=3 is legal only for "class" models (you have an "anova" model)
> rpart.plot(create_tree);
> rpart.plot(create_tree, extra = 2);
Error: extra=2 is legal only for "class", "poisson" and "exp" models (you have an
"anova" model)
> create_tree
n= 150
--------------------------------------------------------------------
clustering---------------------------------------------
>iris_data
> Iris_cluster = iris_data;
> Iris_cluster
> Iris_cluster$variety = NULL;
> Iris_cluster
> create_cluster <- kmeans(Iris_cluster,3);
> create_cluster
K-means clustering with 3 clusters of sizes 38, 50, 62
Cluster means:
sepal.length sepal.width petal.length petal.width
1 6.850000 3.073684 5.742105 2.071053
2 5.006000 3.428000 1.462000 0.246000
3 5.901613 2.748387 4.393548 1.433871
Clustering vector:
[1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[39] 2 2 2 2 2 2 2 2 2 2 2 2 3 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
[77] 3 1 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 1 3 1 1 1 1 3 1 1 1 1 1 1 3
[115] 3 1 1 1 1 3 1 3 1 3 1 1 3 3 1 1 1 1 1 3 1 1 1 1 3 1 1 1 3 1 1 1 3 1 1 3
Available components:
Cluster means:
sepal.length sepal.width petal.length petal.width
1 6.301031 2.886598 4.958763 1.695876
2 5.005660 3.369811 1.560377 0.290566
Clustering vector:
[1] 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[39] 2 2 2 2 2 2 2 2 2 2 2 2 1 1 1 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[77] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 2 1 1 1 1 2 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
[115] 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1
Cluster means:
sepal.length sepal.width petal.length petal.width
1 6.850000 3.073684 5.742105 2.071053
2 5.901613 2.748387 4.393548 1.433871
3 5.006000 3.428000 1.462000 0.246000
Clustering vector:
[1] 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3 3
[39] 3 3 3 3 3 3 3 3 3 3 3 3 2 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2
[77] 2 1 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 2 1 2 1 1 1 1 2 1 1 1 1 1 1 2
[115] 2 1 1 1 1 2 1 2 1 2 1 1 2 2 1 1 1 1 1 2 1 1 1 1 2 1 1 1 2 1 1 1 2 1 1 2
Available components:
1 2 3
Setosa 0 0 50
Versicolor 2 48 0
Virginica 36 14 0
> plot(iris_data[c("petal.length", "petal.width")], col = create_cluster$cluster);
> plot(iris_data[c("sepal.length", "sepal.width")], col = create_cluster$cluster);
>
-----------------------------------------create hisogram
------------------------------
> hist(v_get_new_iris_data$petal.length);
x <- v_get_new_iris_data[1:1];
> y <- v_get_new_iris_data[3:3];
> v_find_correlation <- cor(x,y);
> v_find_correlation;
petal.length
sepal.length 0.8679478
> v_find_correlation <- cor(x,y, method = "spearman");
> v_find_correlation;
petal.length
sepal.length 0.8800297
> v_find_correlation <- cor(x,y, method = "kendal");
> v_find_correlation <- cor(x,y, method = "kendal");
> v_find_correlation;
petal.length
sepal.length 0.7157654
> library(ggplot2)
> plot(v_find_correlation);
?mtcar
No documentation for ‘mtcar’ in specified packages and libraries:
you could try ‘??mtcar’
> mtcars
> v_find_correlation <- cor(mtcars);
> v_find_correlation
> library(corrplot)
> plot(mtcars, method="circle");
> plot(mtcars, method="pie");
> install.packages("PerformanceAnalytics");
> library("PerformanceAnalytics");
> my_data <- mtcars[, c(1,3,4,5,6,7)]
> chart.Correlation(my_data, histogram=TRUE, pch=19);
------------------------------------regression
line---------------------------------
abline(lm(mpg~wt), col="red") # regression line (y~x)
lines(lowess(wt,mpg), col="blue") # lowess line (x,y)
> install.packages("ggcorrplot");
> v_linear_regression <- lm(iris$Petal.Length ~ iris$Sepal.Length +
iris$Sepal.Width, data = iris);
> v_linear_regression;
Call:
lm(formula = iris$Petal.Length ~ iris$Sepal.Length + iris$Sepal.Width,
data = iris)
Coefficients:
(Intercept) iris$Sepal.Length iris$Sepal.Width
-2.525 1.776 -1.339
> library(ggplot2);
> plot(v_linear_regression);