Assignment 5

Abigail Griffin

2023-02-03

Daily Assignment 5

2: Create a list with the following named elements:

my_matrix, which is a 4 x 4 matrix filled with random uniform values, my_logical which is a 100-element vector of TRUE or FALSE values. You can do this efficiently by setting up a vector of random values and then applying an inequality to it., my_letters, which is a 26-element vector of all the lower-case letters in random order.

my_data<-runif(n=16, min=0, max=20) # create a vector of 16 random uniform values between 0 and 20
my_matrix<-matrix(data=my_data,nrow=4, ncol=4) # create 4x4 matrix
print(my_matrix)
##           [,1]      [,2]      [,3]      [,4]
## [1,]  9.311300 16.421334  7.106850 12.668899
## [2,] 10.150067  8.781915 19.418150  7.749743
## [3,]  7.320184 19.550999 10.916902  9.017582
## [4,] 19.979705  7.602535  3.641672 17.076048
random_numbers<-runif(n=100, min=0, max=100) # assign 100 random uniform numbers to random_numbers
my_logical<- random_numbers>50 # logical statement which returns T if value > 50

my_letters<-c(letters[1:26]) # combine letters 1-26 of the alphabet into a list
my_letters<-sample(my_letters) # randomize these letters
typeof(my_letters)
## [1] "character"
assignment5_list<-list(my_matrix, my_logical, my_letters)
print(assignment5_list)
## [[1]]
##           [,1]      [,2]      [,3]      [,4]
## [1,]  9.311300 16.421334  7.106850 12.668899
## [2,] 10.150067  8.781915 19.418150  7.749743
## [3,]  7.320184 19.550999 10.916902  9.017582
## [4,] 19.979705  7.602535  3.641672 17.076048
## 
## [[2]]
##   [1] FALSE FALSE  TRUE  TRUE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE  TRUE
##  [13]  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE  TRUE  TRUE FALSE FALSE  TRUE
##  [25]  TRUE FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE FALSE FALSE  TRUE
##  [37] FALSE FALSE  TRUE FALSE FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE  TRUE
##  [49]  TRUE FALSE  TRUE FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE
##  [61]  TRUE  TRUE  TRUE  TRUE  TRUE FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE
##  [73]  TRUE  TRUE FALSE FALSE  TRUE  TRUE FALSE  TRUE FALSE  TRUE  TRUE FALSE
##  [85] FALSE FALSE  TRUE FALSE  TRUE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE
##  [97]  TRUE FALSE  TRUE  TRUE
## 
## [[3]]
##  [1] "o" "f" "r" "k" "i" "h" "c" "m" "s" "u" "b" "g" "a" "l" "y" "n" "v" "z" "t"
## [20] "j" "x" "p" "d" "e" "w" "q"

Then, complete the following steps:

a: create a new list, which has the element[2,2] from the matrix, the second element of the logical vector, and the second element of the letters vector.

new_list<-list(assignment5_list[[1]][2,2],assignment5_list[[2]][2],assignment5_list[[3]][2]) # create a new list with the element [2,2] from the first compartment, the second element from the second compartment, and the second element from the third compartment
print(new_list)
## [[1]]
## [1] 8.781915
## 
## [[2]]
## [1] FALSE
## 
## [[3]]
## [1] "f"

b: use the typeof() function to confirm the underlying data types of each component in this list

typeof(assignment5_list[[1]]) # double
## [1] "double"
typeof(assignment5_list[[2]]) # logical
## [1] "logical"
typeof(assignment5_list[[3]]) # character
## [1] "character"

c: i) combine the underlying elements from the new list into a single atomic vector with the c() function. ii) What is the data type of this vector?

# i)
single_vector<-c(new_list[[1]],new_list[[2]],new_list[[3]]) # combine these elements into a single vector
print(single_vector)
## [1] "8.78191475290805" "FALSE"            "f"
# ii)
typeof(single_vector)
## [1] "character"
# it has been coerced to character (remember data is coerced to the lowest base form in this order: logical -> integer -> double -> character)

3: Create an empty data frame with two variables (= columns) and 26 observations (= rows) below:

  • call the first column/variable my_unifs and fill it with 26 random uniform values from 0 to 10
  • call the second variable my_letters and fill it with 26 capital letters in random order.
my_unifs<-runif(n=26, min=0, max=10) # create a list of 26 random uniform numbers
print(my_unifs)
##  [1] 5.2370834 3.4730155 6.7812351 4.4853004 7.8295271 2.7533252 3.6025203
##  [8] 2.8562958 8.5754531 9.4978003 8.1594107 4.2486706 0.7512194 9.0793182
## [15] 8.4409602 3.6029800 5.7503393 9.1163914 5.7144436 6.6285327 6.5645899
## [22] 0.6624495 8.7508541 8.9456088 7.7865269 9.1752125
my_letters<-sample(letters[1:26]) # create a list of 26 letters in random order
myupper_letters<-toupper(my_letters) # capitalize these letters and assign to new name
print(myupper_letters)
##  [1] "Q" "G" "R" "Z" "T" "O" "U" "A" "N" "V" "E" "L" "I" "M" "W" "H" "S" "D" "C"
## [20] "B" "J" "X" "F" "P" "K" "Y"
  • for the first variable, use a single line of code in R to select 4 random rows and replace the numerical values in those rows with NA.
q3df<-data.frame(my_unifs, myupper_letters, stringsAsFactors = FALSE) # create a data frame of these two vectors
print(q3df)
##     my_unifs myupper_letters
## 1  5.2370834               Q
## 2  3.4730155               G
## 3  6.7812351               R
## 4  4.4853004               Z
## 5  7.8295271               T
## 6  2.7533252               O
## 7  3.6025203               U
## 8  2.8562958               A
## 9  8.5754531               N
## 10 9.4978003               V
## 11 8.1594107               E
## 12 4.2486706               L
## 13 0.7512194               I
## 14 9.0793182               M
## 15 8.4409602               W
## 16 3.6029800               H
## 17 5.7503393               S
## 18 9.1163914               D
## 19 5.7144436               C
## 20 6.6285327               B
## 21 6.5645899               J
## 22 0.6624495               X
## 23 8.7508541               F
## 24 8.9456088               P
## 25 7.7865269               K
## 26 9.1752125               Y
set.seed(1) # keep these random values
q3df[sample(nrow(q3df), 4),1]<-NA # add NA values to 4 random rows
print(q3df)
##     my_unifs myupper_letters
## 1         NA               Q
## 2  3.4730155               G
## 3  6.7812351               R
## 4         NA               Z
## 5  7.8295271               T
## 6  2.7533252               O
## 7         NA               U
## 8  2.8562958               A
## 9  8.5754531               N
## 10 9.4978003               V
## 11 8.1594107               E
## 12 4.2486706               L
## 13 0.7512194               I
## 14 9.0793182               M
## 15 8.4409602               W
## 16 3.6029800               H
## 17 5.7503393               S
## 18 9.1163914               D
## 19 5.7144436               C
## 20 6.6285327               B
## 21 6.5645899               J
## 22 0.6624495               X
## 23 8.7508541               F
## 24 8.9456088               P
## 25        NA               K
## 26 9.1752125               Y
  • for the first variable, write a single line of R code to identify which rows have the missing values.
q3df[is.na(q3df$my_unifs),] # tell me which rows have NA values
##    my_unifs myupper_letters
## 1        NA               Q
## 4        NA               Z
## 7        NA               U
## 25       NA               K
  • re-order the entire data frame to arrange the second variable in alphabetical order.
abc_q3df<-q3df[order(q3df$myupper_letters), ] # now alphabetize this vector # descending=TRUE would flip the way it ordered it
print(abc_q3df)
##     my_unifs myupper_letters
## 8  2.8562958               A
## 20 6.6285327               B
## 19 5.7144436               C
## 18 9.1163914               D
## 11 8.1594107               E
## 23 8.7508541               F
## 2  3.4730155               G
## 16 3.6029800               H
## 13 0.7512194               I
## 21 6.5645899               J
## 25        NA               K
## 12 4.2486706               L
## 14 9.0793182               M
## 9  8.5754531               N
## 6  2.7533252               O
## 24 8.9456088               P
## 1         NA               Q
## 3  6.7812351               R
## 17 5.7503393               S
## 5  7.8295271               T
## 7         NA               U
## 10 9.4978003               V
## 15 8.4409602               W
## 22 0.6624495               X
## 26 9.1752125               Y
## 4         NA               Z
  • calculate the column mean for the first variable.
mean(abc_q3df$my_unifs, na.rm=TRUE) # what is the mean of the first variable? # na.rm=TRUE removes the NA's before it calculates the mean
## [1] 6.243529
  • rename the second column ‘my_LETTERS’
colnames(abc_q3df)[colnames(abc_q3df) == "myupper_letters"] ="my_LETTERS" # rename the column
print(abc_q3df)
##     my_unifs my_LETTERS
## 8  2.8562958          A
## 20 6.6285327          B
## 19 5.7144436          C
## 18 9.1163914          D
## 11 8.1594107          E
## 23 8.7508541          F
## 2  3.4730155          G
## 16 3.6029800          H
## 13 0.7512194          I
## 21 6.5645899          J
## 25        NA          K
## 12 4.2486706          L
## 14 9.0793182          M
## 9  8.5754531          N
## 6  2.7533252          O
## 24 8.9456088          P
## 1         NA          Q
## 3  6.7812351          R
## 17 5.7503393          S
## 5  7.8295271          T
## 7         NA          U
## 10 9.4978003          V
## 15 8.4409602          W
## 22 0.6624495          X
## 26 9.1752125          Y
## 4         NA          Z