This report is automatically generated with the R package knitr (version 1.40) .

source("R Functions/functions_QA data.R")

  ## LOAD MERGED FISH DATA ##
FishMaster <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/1a_FishMaster_merged.xlsx', sheet=1, guess_max = 30000)


  ## PREP DATA FOR GIS SCOPING ##
FishMaster <- FishMaster %>%
  # Fix CoordSystem nomenclature
  mutate(CoordSystem = recode(CoordSystem,
                              "NAD 83" = 'NAD83',
                              "NAD 1927" = 'NAD27',
                              "NR" = 'NAD83',
                              "nr" = 'NAD83',
                              "OTHER" = 'NAD83',
                              "UNKWN" = 'NAD83',
                              .missing = 'NAD83'),

         TargetLatitude = ifelse(grepl('[a-zA-Z]', TargetLatitude), NA_character_, TargetLatitude),
         TargetLatitude = gsub('[[:space:]]','',TargetLatitude), #need to remove spaces in text so NAs aren't introduced by coercion
         TargetLatitude = as.numeric(TargetLatitude),

         TargetLongitude = ifelse(grepl('[a-zA-Z]',TargetLongitude), NA_character_, TargetLongitude),
         TargetLongitude = gsub('[[:space:]]','',TargetLongitude), #need to remove spaces in text so NAs aren't introduced by coercion
         TargetLongitude = as.numeric(TargetLongitude),
         TargetLongitude = ifelse(TargetLongitude > 0, -TargetLongitude, TargetLongitude)) #fix longitudes that were incorrectly entered as a positive value
unique(FishMaster$CoordSystem)
## [1] "NAD83" "NAD27" "WGS84"
missingLatLongPairs <- FishMaster %>%
  filter( (!is.na(TargetLatitude) & is.na(TargetLongitude)) | (is.na(TargetLatitude) & !is.na(TargetLongitude)) )
nrow(missingLatLongPairs) #should be 0
## [1] 0
# Identify Pond & Lake sites to keep in the linkage - Ideally this should have been done in the individual scripts but easiest to do here now
PondLakeSites <- FishMaster %>%
  distinct(StationName, .keep_all=T) %>%
  filter( grepl('pond|lake', StationName, ignore.case=T) | WBT %in% c('Pond','Lake/Reservoir') ) %>%
  #mutate(ResultID = gsub('[0-9]*$', '', ResultID)) %>% #removes row number from ResultID column
  select(SourceID, StationName, WBT, TargetLatitude, TargetLongitude, CitationCode)
writexl::write_xlsx(PondLakeSites, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/2b.1_PondLakeSites_pulledfromFishMaster.xlsx')

# Then exclude StationNames identified in '2b.2_PondLakeSites_SitesToKeep_ManuallyFilled.xlsx' as not part of the linkage
PondLakeSites <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/2b.2_PondLakeSites_SitesToKeep_ManuallyFilled.xlsx', sheet=1, guess_max = 30000)
excludedSites <- PondLakeSites %>%
  filter(grepl('no', Keep, ignore.case=T)) %>%
  pull(StationName)
all(excludedSites %in% FishMaster$StationName) #all should be TRUE - if all are TRUE then this statement result will be TRUE
## [1] TRUE
FishMaster <- FishMaster %>%
  filter(StationName %are not% excludedSites)
all(!excludedSites %in% FishMaster$StationName) #should all be FALSE - if all are FALSE then this statement result will be TRUE
## [1] TRUE
# Look at blank Lat Longs and see if any can be filled in
hasLatLong <- FishMaster %>%
  filter( !is.na(TargetLatitude) ) %>%
  distinct(StationName, .keep_all=T) %>%
  select(StationName, TargetLatitude, TargetLongitude, CitationCode)
nrow(hasLatLong)
## [1] 1066
noLatLong <- FishMaster %>%
  filter( is.na(TargetLatitude) ) %>%
  distinct(StationName, .keep_all=T) %>%
  select(SourceID, StationName, TargetLatitude, TargetLongitude, CitationCode)
nrow(noLatLong)
## [1] 105
mergeNoandHas <- rbind(select(noLatLong, -SourceID), hasLatLong)
availLatLong <- mergeNoandHas[duplicated(mergeNoandHas$StationName) | duplicated(mergeNoandHas$StationName, fromLast=TRUE),]
nrow(availLatLong)
## [1] 24
# View(availLatLong %>% arrange(StationName))  #Make sure it's appropriate for the site with blank Lat Longs to get coords from corresponding site with Lat Longs
#Not appropriate to add Lat/Long for "Cosumnes River" since we do not know if the sampling locations are the same on the river
hasLatLong <- hasLatLong %>%
  filter(StationName != "Cosumnes River")

#Fill in blank TargetLat and TargetLong using temp Lat Long columns - then delete temp Lat Long columns
hasLatLong <- hasLatLong %>%
  rename(tempLat = TargetLatitude,
         tempLong = TargetLongitude) %>%
  select(-CitationCode)

FishMaster <- FishMaster %>%
  left_join(., hasLatLong, by='StationName')%>%    #add Lat Long values so that all sites with the same name get the same coords
  mutate(TargetLatitude = ifelse(is.na(TargetLatitude), tempLat, TargetLatitude),     #fill in blank TargetLat
         TargetLongitude = ifelse(is.na(TargetLongitude), tempLong, TargetLongitude)) %>% #fill in blank TargetLong
  select(-tempLat, -tempLong)


#check to see which sites still miss Lat Long
hasLatLong <- FishMaster %>%
  filter( !is.na(TargetLatitude) ) %>%
  distinct(StationName, .keep_all=T) %>%
  select(StationName, TargetLatitude, TargetLongitude, CitationCode)
nrow(hasLatLong)
## [1] 1066
noLatLong <- FishMaster %>%
  filter( is.na(TargetLatitude) ) %>%
  distinct(StationName, .keep_all=T) %>%
  select(SourceID, StationName, TargetLatitude, TargetLongitude, CitationCode)
nrow(noLatLong)
## [1] 94
mergeNoandHas <- rbind(select(noLatLong, -SourceID),hasLatLong)
availLatLong <- mergeNoandHas[duplicated(mergeNoandHas$StationName) | duplicated(mergeNoandHas$StationName, fromLast=TRUE),]
nrow(availLatLong) # There are 2 results for "Cosumnes River", which we did not want to math Lat/Longs to
## [1] 2
#Since noLatLong is >0 and availLatLong is zero, we need to save noLatLong as Excel file and add Lat Long by hand - then merge Lat Longs with FishMaster
writexl::write_xlsx(noLatLong %>% arrange(SourceID, StationName), path='Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/2c.1_Fish_NeedLatLong.xlsx')

#Load woorkbook with Lat Longs that were found by hand
SitesWithLatLongByHand <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/2c.2_Fish_LatLong_ManuallyFilled.xlsx') %>%
  mutate(TargetLatitude  = if_else(!is.na(`Coords from Google`), gsub(",.*$", "", `Coords from Google`), TargetLatitude), #subsitutes comma & everything after comma with "", then moves everything before comma to TargetLatitude
         TargetLongitude = if_else(!is.na(`Coords from Google`), gsub("^.*,\\s", "", `Coords from Google`), TargetLongitude)) %>%  #subsitute space, comma, & everything before comma with "", then moves everything after space to TargetLongitude
  filter(!is.na(TargetLatitude)) %>% #filter out missing Lat/Longs
  mutate(tempLat = as.numeric(TargetLatitude),         # rename Lat/Long column for left_join, these columns will be removed after join
         tempLong = as.numeric(TargetLongitude)) %>%
  select(-SourceID, -CitationCode, -`Coords from Google`, -Notes, -TargetLatitude, -TargetLongitude) # remove these columns so Lat Long can be merged with FishMaster


#Join Lat Longs (found by hand) to MasterFish
FishMaster <- FishMaster %>%
  left_join(., SitesWithLatLongByHand, by='StationName') %>% #add hand entered tempLat & tempLong colmns
  mutate(TargetLatitude = ifelse(is.na(TargetLatitude), tempLat, TargetLatitude),     #fill in blank TargetLat
         TargetLongitude = ifelse(is.na(TargetLongitude), tempLong, TargetLongitude)) %>% #fill in blank TargetLong 
  select(-tempLat, -tempLong) %>% # remove tempLat and tempLong columns
  filter(!is.na(TargetLatitude)) # remove sites that still have a blank Lat/Long
nrow(FishMaster)
## [1] 22382
ScopeDaSites <- FishMaster %>%
  mutate(uniqName = paste(StationName, round(TargetLatitude,4), round(TargetLongitude,4))) %>%  # needs to be the same code used in 3rd Script when creating variable "AqMaster" in mutate(uniqName = ...)
  distinct(uniqName, .keep_all=T) %>%
  select(StationName, TargetLatitude, TargetLongitude, CoordSystem, uniqName) %>%
  arrange(StationName)
nrow(ScopeDaSites)
## [1] 1217
  ## SAVE MERGED CEDEN FORMATTED DATA & SITES FOR GIS##
writexl::write_xlsx(FishMaster, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/2a_FishMaster_GISprepped.xlsx')

writexl::write_xlsx(ScopeDaSites, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/2a_FishSitesToDemarcInGIS.xlsx')
# write.csv(ScopeDaSites, file='2a_FishSitesToDemarcInGIS.csv', row.names=FALSE)

The R session information (including the OS info, R version and all packages used):

    sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8  LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lubridate_1.8.0    plotly_4.10.0      readxl_1.4.1       actuar_3.3-0      
##  [5] NADA_1.6-1.1       forcats_0.5.2      stringr_1.4.1      dplyr_1.0.9       
##  [9] purrr_0.3.4        readr_2.1.2        tidyr_1.2.0        tibble_3.1.8      
## [13] ggplot2_3.3.6      tidyverse_1.3.2    fitdistrplus_1.1-8 survival_3.4-0    
## [17] MASS_7.3-58.1     
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-45     assertthat_0.2.1    digest_0.6.29       utf8_1.2.2         
##  [5] R6_2.5.1            cellranger_1.1.0    backports_1.4.1     reprex_2.0.2       
##  [9] evaluate_0.16       highr_0.9           httr_1.4.4          pillar_1.8.1       
## [13] rlang_1.0.5         lazyeval_0.2.2      googlesheets4_1.0.1 rstudioapi_0.14    
## [17] data.table_1.14.2   Matrix_1.5-1        splines_4.2.2       googledrive_2.0.0  
## [21] htmlwidgets_1.5.4   munsell_0.5.0       broom_1.0.1         compiler_4.2.2     
## [25] modelr_0.1.9        xfun_0.32           pkgconfig_2.0.3     htmltools_0.5.3    
## [29] tidyselect_1.1.2    viridisLite_0.4.1   fansi_1.0.3         crayon_1.5.1       
## [33] tzdb_0.3.0          dbplyr_2.2.1        withr_2.5.0         grid_4.2.2         
## [37] jsonlite_1.8.0      gtable_0.3.1        lifecycle_1.0.1     DBI_1.1.3          
## [41] magrittr_2.0.3      scales_1.2.1        writexl_1.4.0       cli_3.3.0          
## [45] stringi_1.7.8       fs_1.5.2            xml2_1.3.3          ellipsis_0.3.2     
## [49] generics_0.1.3      vctrs_0.4.1         expint_0.1-7        tools_4.2.2        
## [53] glue_1.6.2          hms_1.1.2           fastmap_1.1.0       colorspace_2.0-3   
## [57] gargle_1.2.0        rvest_1.0.3         knitr_1.40          haven_2.5.1
    Sys.time()
## [1] "2024-01-05 07:56:28 PST"