#### R refresher
## The command line
# R syntax (like most languages) is case sensitive!
# Generally speaking:
# (1) everything in R is an object
# (2) everything that happens is a function call
# Data types and data structures
# Data structures are extremely important to understand
#### Vectors
# Probably the most important/common data structure is vectors.
# A vector can be a vector of elements that are most commonly character, logical, integer or numeric.
# a simple numeric vector
x <- 5
# add x to itself
x + x
# Assigning a character vector
myName <- "John"
myName
# look at attributes of vectors, e.g. myName
nchar(myName)
# Assigning a logical vector
skyIsBlue <- TRUE
skyIsBlue
isTRUE(skyIsBlue)
# Numeric vector (e.g. 5, 19.33, 17.2)
# We can create a sequence of numbers using:
countToTen <- c(1:10)
countToTen
# get the length attribute of this vector:
length(countToTen)
# access the fifth element of this vector
countToTen[5]
# access the first 3 elements of this vector
countToTen[1:3]
# examine this vector
typeof(countToTen)
str(countToTen)
# examine the myName vector
str(myName)
# Mixing data types in vectors is not possible and R will coerce your data to the lowest common denominator
# e.g. integers and characters will coerce to 'character'
mixedVector <- c(1,"a",2,"b")
str(mixedVector)
#### Matrices
# Matrices are special vectors in R
# basically a multi-dimensional atomic vector, i.e. with rows and columns
# matrices are filled column-wise, for example:
myMatrix <- matrix(1:6, nrow = 2, ncol = 3)
myMatrix
# You can make a matrix out of two vector objects, for example:
vector1 <- 1:5
vector2 <- 6:10
myMatrix2 <- cbind(vector1,vector2)
# access the column names of myMatrix2:
colnames(myMatrix2)
# access the element in the first row and second column
myMatrix2[1,2]
# access the first 3 rows in the second column
myMatrix2[1:3,2]
# access all of the second column
myMatrix2[,2]
#### Lists
# LISTS in R act like 'containers'
# They are different to vectors because each element can be different type, e.g.
myList <- list("Hello", 1, TRUE, "Goodbye")
myList
# Access elements in lists slightly differently:
myList[1][[1]]
# what is the class of the first element of the first list element?
class(myList[1][[1]])
# and what about the first element of the second list element
class(myList[2][[1]])
#### Factors
# Factors are special vectors that represent categorical data.
myFactor <- factor(c("yes", "no", "no", "yes", "yes"))
myFactor
# Operate on the factor to find which elements equal "yes"
which(myFactor=="yes")
#### Data frames
# Data frames are very important in R, and we will use them a lot in this course
# each column in the data frame can have a different data type, for example:
df <- data.frame(names = c("John","Jane","Sally"), testScores=c(99,84,30), failingGrade=c(FALSE, FALSE, TRUE))
df
# we can access the third column in two ways:
df[,3]
df$failingGrade
# number of rows in data frame
nrow(df)
# find out the structure of each column
str(df)
#### Naming objects
x <- 1:3
names(x) <- c("Charlie", "Echo", "Foxtrot")
x
#### Missing values
# denoted by NA or NaN (for undefined mathematical operations)
x <- c(1,2,NA,4,5)
x
is.na(x)
#### Functions in R
# objects that take some input and then return some output
myFunction <- function(x,y) {
result <- x + y
}
output <- myFunction(4,5)
output
someNumber <- 5
anotherNumber <- pi * 10 ^ 2
output <- myFunction(someNumber,anotherNumber)
output
#### Reading and writing files from R
df <- data.frame(names = c("John","Jane","Sally"), testScores=c(99,84,30), failingGrade=c(FALSE, FALSE, TRUE))
write.csv(df,"dataframe.csv")
dataIn <- read.csv("dataframe.csv")
dataIn