Loading in Data
January 26 2023
AGG
1. Create and save a dataset:
write.table(x=varName, file="outputFileName.csv", header=TRUE, sep=",")
2. These functions read in a data set
read.table()andread.csv()are main two operators of how to read in a data setread.table(file="path/to/data.csv", header=TRUE, sep = ","- usually better
read.csv(file="data.csv", header = TRUE, sep = ",")
- csv is best format for writing data into R
Use RDS Object only when working in R: saveRDs
- RDS creates an R object that is loaded in R which you can send to people. It is helpful with large datasets - creates much smaller files to send
saveRDS(my_data, file="FileName.RDS"): uses a variable you’ve created and named in R and saves it as its own objectreadRDS("FileName.RDS")p<-readRDS("FileName.RDS")
Long v. Wide data formats
- Long: more rows than columns. Contains values that DO repeat in ID column
- Wide: more columns than rows. Contains values that do not repeat in the ID column
- e.g.
library(tidyverse)## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2 ──
## ✔ ggplot2 3.4.0 ✔ purrr 1.0.1
## ✔ tibble 3.1.8 ✔ dplyr 1.0.10
## ✔ tidyr 1.2.1 ✔ stringr 1.5.0
## ✔ readr 2.1.3 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
library(ggthemes)
head(billboard)## # A tibble: 6 × 79
## artist track date.ent…¹ wk1 wk2 wk3 wk4 wk5 wk6 wk7 wk8 wk9
## <chr> <chr> <date> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 2 Pac Baby… 2000-02-26 87 82 72 77 87 94 99 NA NA
## 2 2Ge+her The … 2000-09-02 91 87 92 NA NA NA NA NA NA
## 3 3 Door… Kryp… 2000-04-08 81 70 68 67 66 57 54 53 51
## 4 3 Door… Loser 2000-10-21 76 76 72 69 67 65 55 59 62
## 5 504 Bo… Wobb… 2000-04-15 57 34 25 17 17 31 36 49 53
## 6 98^0 Give… 2000-08-19 51 39 34 26 26 19 2 2 3
## # … with 67 more variables: wk10 <dbl>, wk11 <dbl>, wk12 <dbl>, wk13 <dbl>,
## # wk14 <dbl>, wk15 <dbl>, wk16 <dbl>, wk17 <dbl>, wk18 <dbl>, wk19 <dbl>,
## # wk20 <dbl>, wk21 <dbl>, wk22 <dbl>, wk23 <dbl>, wk24 <dbl>, wk25 <dbl>,
## # wk26 <dbl>, wk27 <dbl>, wk28 <dbl>, wk29 <dbl>, wk30 <dbl>, wk31 <dbl>,
## # wk32 <dbl>, wk33 <dbl>, wk34 <dbl>, wk35 <dbl>, wk36 <dbl>, wk37 <dbl>,
## # wk38 <dbl>, wk39 <dbl>, wk40 <dbl>, wk41 <dbl>, wk42 <dbl>, wk43 <dbl>,
## # wk44 <dbl>, wk45 <dbl>, wk46 <dbl>, wk47 <dbl>, wk48 <dbl>, wk49 <dbl>, …
# this is wide format because there are so many columns
glimpse(billboard)## Rows: 317
## Columns: 79
## $ artist <chr> "2 Pac", "2Ge+her", "3 Doors Down", "3 Doors Down", "504 …
## $ track <chr> "Baby Don't Cry (Keep...", "The Hardest Part Of ...", "Kr…
## $ date.entered <date> 2000-02-26, 2000-09-02, 2000-04-08, 2000-10-21, 2000-04-…
## $ wk1 <dbl> 87, 91, 81, 76, 57, 51, 97, 84, 59, 76, 84, 57, 50, 71, 7…
## $ wk2 <dbl> 82, 87, 70, 76, 34, 39, 97, 62, 53, 76, 84, 47, 39, 51, 6…
## $ wk3 <dbl> 72, 92, 68, 72, 25, 34, 96, 51, 38, 74, 75, 45, 30, 28, 5…
## $ wk4 <dbl> 77, NA, 67, 69, 17, 26, 95, 41, 28, 69, 73, 29, 28, 18, 4…
## $ wk5 <dbl> 87, NA, 66, 67, 17, 26, 100, 38, 21, 68, 73, 23, 21, 13, …
## $ wk6 <dbl> 94, NA, 57, 65, 31, 19, NA, 35, 18, 67, 69, 18, 19, 13, 3…
## $ wk7 <dbl> 99, NA, 54, 55, 36, 2, NA, 35, 16, 61, 68, 11, 20, 11, 34…
## $ wk8 <dbl> NA, NA, 53, 59, 49, 2, NA, 38, 14, 58, 65, 9, 17, 1, 29, …
## $ wk9 <dbl> NA, NA, 51, 62, 53, 3, NA, 38, 12, 57, 73, 9, 17, 1, 27, …
## $ wk10 <dbl> NA, NA, 51, 61, 57, 6, NA, 36, 10, 59, 83, 11, 17, 2, 30,…
## $ wk11 <dbl> NA, NA, 51, 61, 64, 7, NA, 37, 9, 66, 92, 1, 17, 2, 36, N…
## $ wk12 <dbl> NA, NA, 51, 59, 70, 22, NA, 37, 8, 68, NA, 1, 3, 3, 37, N…
## $ wk13 <dbl> NA, NA, 47, 61, 75, 29, NA, 38, 6, 61, NA, 1, 3, 3, 39, N…
## $ wk14 <dbl> NA, NA, 44, 66, 76, 36, NA, 49, 1, 67, NA, 1, 7, 4, 49, N…
## $ wk15 <dbl> NA, NA, 38, 72, 78, 47, NA, 61, 2, 59, NA, 4, 10, 12, 57,…
## $ wk16 <dbl> NA, NA, 28, 76, 85, 67, NA, 63, 2, 63, NA, 8, 17, 11, 63,…
## $ wk17 <dbl> NA, NA, 22, 75, 92, 66, NA, 62, 2, 67, NA, 12, 25, 13, 65…
## $ wk18 <dbl> NA, NA, 18, 67, 96, 84, NA, 67, 2, 71, NA, 22, 29, 15, 68…
## $ wk19 <dbl> NA, NA, 18, 73, NA, 93, NA, 83, 3, 79, NA, 23, 29, 18, 79…
## $ wk20 <dbl> NA, NA, 14, 70, NA, 94, NA, 86, 4, 89, NA, 43, 40, 20, 86…
## $ wk21 <dbl> NA, NA, 12, NA, NA, NA, NA, NA, 5, NA, NA, 44, 43, 30, NA…
## $ wk22 <dbl> NA, NA, 7, NA, NA, NA, NA, NA, 5, NA, NA, NA, 50, 40, NA,…
## $ wk23 <dbl> NA, NA, 6, NA, NA, NA, NA, NA, 6, NA, NA, NA, NA, 39, NA,…
## $ wk24 <dbl> NA, NA, 6, NA, NA, NA, NA, NA, 9, NA, NA, NA, NA, 44, NA,…
## $ wk25 <dbl> NA, NA, 6, NA, NA, NA, NA, NA, 13, NA, NA, NA, NA, NA, NA…
## $ wk26 <dbl> NA, NA, 5, NA, NA, NA, NA, NA, 14, NA, NA, NA, NA, NA, NA…
## $ wk27 <dbl> NA, NA, 5, NA, NA, NA, NA, NA, 16, NA, NA, NA, NA, NA, NA…
## $ wk28 <dbl> NA, NA, 4, NA, NA, NA, NA, NA, 23, NA, NA, NA, NA, NA, NA…
## $ wk29 <dbl> NA, NA, 4, NA, NA, NA, NA, NA, 22, NA, NA, NA, NA, NA, NA…
## $ wk30 <dbl> NA, NA, 4, NA, NA, NA, NA, NA, 33, NA, NA, NA, NA, NA, NA…
## $ wk31 <dbl> NA, NA, 4, NA, NA, NA, NA, NA, 36, NA, NA, NA, NA, NA, NA…
## $ wk32 <dbl> NA, NA, 3, NA, NA, NA, NA, NA, 43, NA, NA, NA, NA, NA, NA…
## $ wk33 <dbl> NA, NA, 3, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ wk34 <dbl> NA, NA, 3, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ wk35 <dbl> NA, NA, 4, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ wk36 <dbl> NA, NA, 5, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ wk37 <dbl> NA, NA, 5, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ wk38 <dbl> NA, NA, 9, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ wk39 <dbl> NA, NA, 9, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA…
## $ wk40 <dbl> NA, NA, 15, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk41 <dbl> NA, NA, 14, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk42 <dbl> NA, NA, 13, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk43 <dbl> NA, NA, 14, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk44 <dbl> NA, NA, 16, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk45 <dbl> NA, NA, 17, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk46 <dbl> NA, NA, 21, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk47 <dbl> NA, NA, 22, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk48 <dbl> NA, NA, 24, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk49 <dbl> NA, NA, 28, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk50 <dbl> NA, NA, 33, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk51 <dbl> NA, NA, 42, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk52 <dbl> NA, NA, 42, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk53 <dbl> NA, NA, 49, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk54 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk55 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk56 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk57 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk58 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk59 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk60 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk61 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk62 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk63 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk64 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk65 <dbl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk66 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk67 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk68 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk69 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk70 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk71 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk72 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk73 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk74 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk75 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
## $ wk76 <lgl> NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, N…
b1<-billboard %>%
pivot_longer (
cols = starts_with ("wk"), #cols = : specify which columns you want to make longer - put them in a row
names_to = "Week", # name of new column which will contain the header names
values_to = "Rank", # name of new column which will contain the values
values_drop_na = TRUE) # excludes any rows which have NA
## only use view(b1) in R script or in console. Rmarkdown does not like this. Remove if knitting- pivot_wider: best for making occupancy matrix
glimpse(fish_encounters)## Rows: 114
## Columns: 3
## $ fish <fct> 4842, 4842, 4842, 4842, 4842, 4842, 4842, 4842, 4842, 4842, 48…
## $ station <fct> Release, I80_1, Lisbon, Rstr, Base_TD, BCE, BCW, BCE2, BCW2, M…
## $ seen <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,…
view(fish_encounters)
fish_encounters %>%
pivot_wider(names_from = station, values_from=seen, values_fill=0) # names_from= which column you want to turn into multiple columns # values_from= which column contains the values for the new column cells## # A tibble: 19 × 12
## fish Release I80_1 Lisbon Rstr Base_TD BCE BCW BCE2 BCW2 MAE MAW
## <fct> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int> <int>
## 1 4842 1 1 1 1 1 1 1 1 1 1 1
## 2 4843 1 1 1 1 1 1 1 1 1 1 1
## 3 4844 1 1 1 1 1 1 1 1 1 1 1
## 4 4845 1 1 1 1 1 0 0 0 0 0 0
## 5 4847 1 1 1 0 0 0 0 0 0 0 0
## 6 4848 1 1 1 1 0 0 0 0 0 0 0
## 7 4849 1 1 0 0 0 0 0 0 0 0 0
## 8 4850 1 1 0 1 1 1 1 0 0 0 0
## 9 4851 1 1 0 0 0 0 0 0 0 0 0
## 10 4854 1 1 0 0 0 0 0 0 0 0 0
## 11 4855 1 1 1 1 1 0 0 0 0 0 0
## 12 4857 1 1 1 1 1 1 1 1 1 0 0
## 13 4858 1 1 1 1 1 1 1 1 1 1 1
## 14 4859 1 1 1 1 1 0 0 0 0 0 0
## 15 4861 1 1 1 1 1 1 1 1 1 1 1
## 16 4862 1 1 1 1 1 1 1 1 1 0 0
## 17 4863 1 1 0 0 0 0 0 0 0 0 0
## 18 4864 1 1 0 0 0 0 0 0 0 0 0
## 19 4865 1 1 1 0 0 0 0 0 0 0 0
# now each fish is only represented once as a row and each station is represented if they are seen there. if never seen there is an NA. so add values_fill=0
# this is appropriate format if you want to create an occupancy matrixPublicly available data through Dryad
- a curated resource that makes research data discoverable, freely reusable, and citable.
- Explore data sets here.
- Open dryad data using
readtable()
dryadData<-read.table(file="data/veysey-babbitt_data_amphibians.csv", header=TRUE, sep = ",") # header=TRUE because data do have a header # sep = "," because it's a csv file - separated by commas
glimpse(dryadData)## Rows: 132
## Columns: 13
## $ wetland <int> 7, 7, 7, 7, 7, 7, 19, 19, 19, 19, 19, 19, 20, 20, 2…
## $ year <int> 2004, 2005, 2006, 2007, 2008, 2009, 2004, 2005, 200…
## $ treatment <chr> "30m", "30m", "30m", "30m", "30m", "30m", "30m", "3…
## $ species <chr> "RASY", "RASY", "RASY", "RASY", "RASY", "RASY", "RA…
## $ count.total.adults <int> 91, 47, 128, 60, 108, NA, 74, 75, 162, 120, 14, 44,…
## $ No.males <int> 61, 15, 58, 46, 89, NA, 58, 53, 130, 63, 8, 24, 113…
## $ No.females <int> 30, 32, 70, 14, 19, NA, 16, 22, 32, 57, 6, 20, 33, …
## $ No.recap <int> NA, 3, 18, 10, 6, NA, NA, 5, 29, 13, 0, 0, NA, 13, …
## $ No.newcap <int> 82, 44, 110, 50, 102, NA, 70, 70, 133, 107, 14, 44,…
## $ mean.hydro <dbl> 101.00, 101.00, 101.00, 101.00, 101.00, 101.00, 107…
## $ sd.hydro <dbl> 34.58, 34.58, 34.58, 34.58, 34.58, 34.58, 38.01, 38…
## $ dv.cutornot.yrs <int> 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, …
## $ dv.30m.yrs <int> 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, 6, 1, 2, 3, 4, 5, …
head(dryadData)## wetland year treatment species count.total.adults No.males No.females
## 1 7 2004 30m RASY 91 61 30
## 2 7 2005 30m RASY 47 15 32
## 3 7 2006 30m RASY 128 58 70
## 4 7 2007 30m RASY 60 46 14
## 5 7 2008 30m RASY 108 89 19
## 6 7 2009 30m RASY NA NA NA
## No.recap No.newcap mean.hydro sd.hydro dv.cutornot.yrs dv.30m.yrs
## 1 NA 82 101 34.58 1 1
## 2 3 44 101 34.58 2 2
## 3 18 110 101 34.58 3 3
## 4 10 50 101 34.58 4 4
## 5 6 102 101 34.58 5 5
## 6 NA NA 101 34.58 6 6
table(dryadData$species) # allows you to see different groups of character column##
## AMMA RASY
## 66 66
summary(dryadData$mean.hydro)## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 44.83 99.50 116.50 125.96 183.17 197.00
str(dryadData$species)## chr [1:132] "RASY" "RASY" "RASY" "RASY" "RASY" "RASY" "RASY" "RASY" "RASY" ...
dryadData$species<-factor(dryadData$species, labels=c("Spotted Salamander", "Wood Frog")) # creating 'labels' to use for the plot
class(dryadData$treatment)## [1] "character"
dryadData$treatment <- factor(dryadData$treatment,
levels=c("Reference",
"100m", "30m")) # Reference goes first, then 100m, then 30m
p<- ggplot(data=dryadData,
aes(x=interaction(wetland, treatment), # group treatment and wetland
y=count.total.adults, fill=factor(year))) + geom_bar(position="dodge", stat="identity", color="black") +
ylab("Number of breeding adults") +
xlab("") +
scale_y_continuous(breaks = c(0,100,200,300,400,500)) +
scale_x_discrete(labels=c("30 (Ref)", "124 (Ref)", "141 (Ref)", "25 (100m)","39 (100m)","55 (100m)","129 (100m)", "7 (30m)","19 (30m)","20 (30m)","59 (30m)")) + #labels for x axis
facet_wrap(~species, nrow=2, strip.position="right") +
theme_few() + scale_fill_grey() +
theme(panel.background = element_rect(fill = 'gray94', color = 'black'), legend.position="top", legend.title= element_blank(), axis.title.y = element_text(size=12, face="bold", colour = "black"), strip.text.y = element_text(size = 10, face="bold", colour = "black")) +
guides(fill=guide_legend(nrow=1,byrow=TRUE))
p## Warning: Removed 2 rows containing missing values (`geom_bar()`).
# will not print out any text