This report is automatically generated with the R
package knitr
(version 1.40
)
.
source("R Functions/functions_QA data.R") ## LOAD MERGED FISH DATA ## FishMaster <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/1a_FishMaster_merged.xlsx', sheet=1, guess_max = 30000) ## PREP DATA FOR GIS SCOPING ## FishMaster <- FishMaster %>% # Fix CoordSystem nomenclature mutate(CoordSystem = recode(CoordSystem, "NAD 83" = 'NAD83', "NAD 1927" = 'NAD27', "NR" = 'NAD83', "nr" = 'NAD83', "OTHER" = 'NAD83', "UNKWN" = 'NAD83', .missing = 'NAD83'), TargetLatitude = ifelse(grepl('[a-zA-Z]', TargetLatitude), NA_character_, TargetLatitude), TargetLatitude = gsub('[[:space:]]','',TargetLatitude), #need to remove spaces in text so NAs aren't introduced by coercion TargetLatitude = as.numeric(TargetLatitude), TargetLongitude = ifelse(grepl('[a-zA-Z]',TargetLongitude), NA_character_, TargetLongitude), TargetLongitude = gsub('[[:space:]]','',TargetLongitude), #need to remove spaces in text so NAs aren't introduced by coercion TargetLongitude = as.numeric(TargetLongitude), TargetLongitude = ifelse(TargetLongitude > 0, -TargetLongitude, TargetLongitude)) #fix longitudes that were incorrectly entered as a positive value unique(FishMaster$CoordSystem)
## [1] "NAD83" "NAD27" "WGS84"
missingLatLongPairs <- FishMaster %>% filter( (!is.na(TargetLatitude) & is.na(TargetLongitude)) | (is.na(TargetLatitude) & !is.na(TargetLongitude)) ) nrow(missingLatLongPairs) #should be 0
## [1] 0
# Identify Pond & Lake sites to keep in the linkage - Ideally this should have been done in the individual scripts but easiest to do here now PondLakeSites <- FishMaster %>% distinct(StationName, .keep_all=T) %>% filter( grepl('pond|lake', StationName, ignore.case=T) | WBT %in% c('Pond','Lake/Reservoir') ) %>% #mutate(ResultID = gsub('[0-9]*$', '', ResultID)) %>% #removes row number from ResultID column select(SourceID, StationName, WBT, TargetLatitude, TargetLongitude, CitationCode) writexl::write_xlsx(PondLakeSites, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/2b.1_PondLakeSites_pulledfromFishMaster.xlsx') # Then exclude StationNames identified in '2b.2_PondLakeSites_SitesToKeep_ManuallyFilled.xlsx' as not part of the linkage PondLakeSites <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/2b.2_PondLakeSites_SitesToKeep_ManuallyFilled.xlsx', sheet=1, guess_max = 30000) excludedSites <- PondLakeSites %>% filter(grepl('no', Keep, ignore.case=T)) %>% pull(StationName) all(excludedSites %in% FishMaster$StationName) #all should be TRUE - if all are TRUE then this statement result will be TRUE
## [1] TRUE
FishMaster <- FishMaster %>% filter(StationName %are not% excludedSites) all(!excludedSites %in% FishMaster$StationName) #should all be FALSE - if all are FALSE then this statement result will be TRUE
## [1] TRUE
# Look at blank Lat Longs and see if any can be filled in hasLatLong <- FishMaster %>% filter( !is.na(TargetLatitude) ) %>% distinct(StationName, .keep_all=T) %>% select(StationName, TargetLatitude, TargetLongitude, CitationCode) nrow(hasLatLong)
## [1] 1066
noLatLong <- FishMaster %>% filter( is.na(TargetLatitude) ) %>% distinct(StationName, .keep_all=T) %>% select(SourceID, StationName, TargetLatitude, TargetLongitude, CitationCode) nrow(noLatLong)
## [1] 105
mergeNoandHas <- rbind(select(noLatLong, -SourceID), hasLatLong) availLatLong <- mergeNoandHas[duplicated(mergeNoandHas$StationName) | duplicated(mergeNoandHas$StationName, fromLast=TRUE),] nrow(availLatLong)
## [1] 24
# View(availLatLong %>% arrange(StationName)) #Make sure it's appropriate for the site with blank Lat Longs to get coords from corresponding site with Lat Longs #Not appropriate to add Lat/Long for "Cosumnes River" since we do not know if the sampling locations are the same on the river hasLatLong <- hasLatLong %>% filter(StationName != "Cosumnes River") #Fill in blank TargetLat and TargetLong using temp Lat Long columns - then delete temp Lat Long columns hasLatLong <- hasLatLong %>% rename(tempLat = TargetLatitude, tempLong = TargetLongitude) %>% select(-CitationCode) FishMaster <- FishMaster %>% left_join(., hasLatLong, by='StationName')%>% #add Lat Long values so that all sites with the same name get the same coords mutate(TargetLatitude = ifelse(is.na(TargetLatitude), tempLat, TargetLatitude), #fill in blank TargetLat TargetLongitude = ifelse(is.na(TargetLongitude), tempLong, TargetLongitude)) %>% #fill in blank TargetLong select(-tempLat, -tempLong) #check to see which sites still miss Lat Long hasLatLong <- FishMaster %>% filter( !is.na(TargetLatitude) ) %>% distinct(StationName, .keep_all=T) %>% select(StationName, TargetLatitude, TargetLongitude, CitationCode) nrow(hasLatLong)
## [1] 1066
noLatLong <- FishMaster %>% filter( is.na(TargetLatitude) ) %>% distinct(StationName, .keep_all=T) %>% select(SourceID, StationName, TargetLatitude, TargetLongitude, CitationCode) nrow(noLatLong)
## [1] 94
mergeNoandHas <- rbind(select(noLatLong, -SourceID),hasLatLong) availLatLong <- mergeNoandHas[duplicated(mergeNoandHas$StationName) | duplicated(mergeNoandHas$StationName, fromLast=TRUE),] nrow(availLatLong) # There are 2 results for "Cosumnes River", which we did not want to math Lat/Longs to
## [1] 2
#Since noLatLong is >0 and availLatLong is zero, we need to save noLatLong as Excel file and add Lat Long by hand - then merge Lat Longs with FishMaster writexl::write_xlsx(noLatLong %>% arrange(SourceID, StationName), path='Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/2c.1_Fish_NeedLatLong.xlsx') #Load woorkbook with Lat Longs that were found by hand SitesWithLatLongByHand <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/2c.2_Fish_LatLong_ManuallyFilled.xlsx') %>% mutate(TargetLatitude = if_else(!is.na(`Coords from Google`), gsub(",.*$", "", `Coords from Google`), TargetLatitude), #subsitutes comma & everything after comma with "", then moves everything before comma to TargetLatitude TargetLongitude = if_else(!is.na(`Coords from Google`), gsub("^.*,\\s", "", `Coords from Google`), TargetLongitude)) %>% #subsitute space, comma, & everything before comma with "", then moves everything after space to TargetLongitude filter(!is.na(TargetLatitude)) %>% #filter out missing Lat/Longs mutate(tempLat = as.numeric(TargetLatitude), # rename Lat/Long column for left_join, these columns will be removed after join tempLong = as.numeric(TargetLongitude)) %>% select(-SourceID, -CitationCode, -`Coords from Google`, -Notes, -TargetLatitude, -TargetLongitude) # remove these columns so Lat Long can be merged with FishMaster #Join Lat Longs (found by hand) to MasterFish FishMaster <- FishMaster %>% left_join(., SitesWithLatLongByHand, by='StationName') %>% #add hand entered tempLat & tempLong colmns mutate(TargetLatitude = ifelse(is.na(TargetLatitude), tempLat, TargetLatitude), #fill in blank TargetLat TargetLongitude = ifelse(is.na(TargetLongitude), tempLong, TargetLongitude)) %>% #fill in blank TargetLong select(-tempLat, -tempLong) %>% # remove tempLat and tempLong columns filter(!is.na(TargetLatitude)) # remove sites that still have a blank Lat/Long nrow(FishMaster)
## [1] 22382
ScopeDaSites <- FishMaster %>% mutate(uniqName = paste(StationName, round(TargetLatitude,4), round(TargetLongitude,4))) %>% # needs to be the same code used in 3rd Script when creating variable "AqMaster" in mutate(uniqName = ...) distinct(uniqName, .keep_all=T) %>% select(StationName, TargetLatitude, TargetLongitude, CoordSystem, uniqName) %>% arrange(StationName) nrow(ScopeDaSites)
## [1] 1217
## SAVE MERGED CEDEN FORMATTED DATA & SITES FOR GIS## writexl::write_xlsx(FishMaster, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/2a_FishMaster_GISprepped.xlsx') writexl::write_xlsx(ScopeDaSites, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/2a_FishSitesToDemarcInGIS.xlsx') # write.csv(ScopeDaSites, file='2a_FishSitesToDemarcInGIS.csv', row.names=FALSE)
The R session information (including the OS info, R version and all packages used):
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt) ## Platform: x86_64-w64-mingw32/x64 (64-bit) ## Running under: Windows 10 x64 (build 22621) ## ## Matrix products: default ## ## locale: ## [1] LC_COLLATE=English_United States.utf8 LC_CTYPE=English_United States.utf8 ## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C ## [5] LC_TIME=English_United States.utf8 ## ## attached base packages: ## [1] stats graphics grDevices utils datasets methods base ## ## other attached packages: ## [1] lubridate_1.8.0 plotly_4.10.0 readxl_1.4.1 actuar_3.3-0 ## [5] NADA_1.6-1.1 forcats_0.5.2 stringr_1.4.1 dplyr_1.0.9 ## [9] purrr_0.3.4 readr_2.1.2 tidyr_1.2.0 tibble_3.1.8 ## [13] ggplot2_3.3.6 tidyverse_1.3.2 fitdistrplus_1.1-8 survival_3.4-0 ## [17] MASS_7.3-58.1 ## ## loaded via a namespace (and not attached): ## [1] lattice_0.20-45 assertthat_0.2.1 digest_0.6.29 utf8_1.2.2 ## [5] R6_2.5.1 cellranger_1.1.0 backports_1.4.1 reprex_2.0.2 ## [9] evaluate_0.16 highr_0.9 httr_1.4.4 pillar_1.8.1 ## [13] rlang_1.0.5 lazyeval_0.2.2 googlesheets4_1.0.1 rstudioapi_0.14 ## [17] data.table_1.14.2 Matrix_1.5-1 splines_4.2.2 googledrive_2.0.0 ## [21] htmlwidgets_1.5.4 munsell_0.5.0 broom_1.0.1 compiler_4.2.2 ## [25] modelr_0.1.9 xfun_0.32 pkgconfig_2.0.3 htmltools_0.5.3 ## [29] tidyselect_1.1.2 viridisLite_0.4.1 fansi_1.0.3 crayon_1.5.1 ## [33] tzdb_0.3.0 dbplyr_2.2.1 withr_2.5.0 grid_4.2.2 ## [37] jsonlite_1.8.0 gtable_0.3.1 lifecycle_1.0.1 DBI_1.1.3 ## [41] magrittr_2.0.3 scales_1.2.1 writexl_1.4.0 cli_3.3.0 ## [45] stringi_1.7.8 fs_1.5.2 xml2_1.3.3 ellipsis_0.3.2 ## [49] generics_0.1.3 vctrs_0.4.1 expint_0.1-7 tools_4.2.2 ## [53] glue_1.6.2 hms_1.1.2 fastmap_1.1.0 colorspace_2.0-3 ## [57] gargle_1.2.0 rvest_1.0.3 knitr_1.40 haven_2.5.1
Sys.time()
## [1] "2024-01-05 07:56:28 PST"