Load Iris dataset
examples/data-frames/load_iris.R
iris = read.table(file="data/iris.txt", sep=" ", header=T, stringsAsFactors=T) head(iris) # hist(iris$Sepal.Length) hist(iris$Petal.Width) mean_petal_width = mean(iris$Petal.Width) hist( x=iris$Petal.Width, breaks=10, col="light blue", main="Distribution of Petal Width", xlab="Width of Petal (cm)", #ylab="", sub=paste("Mean", mean_petal_width), # paste concatentes values ) #help(hist) #View(iris) #tail(iris) # iris #class(iris) #dim(iris) # rows, columns # iris[2, 4] # iris[1, ] # iris[, 1] #iris$Sepal.Length #iris['Sepal.Length'] # numeric vector #iris[c('Sepal.Length', 'Petal.Length')] # data.frame #iris[,c('Sepal.Length', 'Petal.Length')] # data.frame #summary(iris) colnames(iris) plot(iris$Petal.Length, iris$Sepal.Length) help(plot) # or ?plot plot( x=iris$Petal.Length, y=iris$Sepal.Length, #type="l", # or p for points #pch=20, # change how the points look like # col (short for color) #col="purple red", col=iris$Species, #pch=19 xlab="X Title", ylab="Y Title", main="Main heading", sub="Sub heading", ) # In the iris dataset when we draw a histogram how does it pick colors when we only have names? # Because it is a factor (beacise of the stringsAsFactors) class(iris$Species) # "factor" # Each "level" in the factor has a numerical value (1, 2, 3) and in R each color also a number (1 = black 2 = red, etc) plot( x=iris$Petal.Length, y=iris$Sepal.Length, pch=as.numeric(iris$Species), ) # TODO: how to pick the specific colors? # how to color the point accorind to some other condition? e.g. iris$Sepal.width > 2 plot( x=iris$Petal.Length, y=iris$Sepal.Length, col=as.numeric(iris$Sepal.Width > 3)+1 ) # iris$Sepal.Width > 2 is a boolean vector # as.numeric(iris$Sepal.Width > 2) is a numerical vector of 0 and 1 values # careful: color 0 is white so we won't see it, that's why we add 1 so instead of 0 and 1 we will get 1 and 2 values. levels(iris$Species) # "setosa" "versicolor" "virginica" help(data.frame) pairs(iris[,1:4]) # pairwise relation graphs pairs(iris[,1:4], col=as.numeric(iris$Species)) pairs(iris[,1:4], col=as.numeric(iris$Species), upper.panel=NULL)