This report is automatically generated with the R package knitr (version 1.40) .

## LOAD PACKAGES USED IN SCRIPTS ##
source("R Functions/functions_QA data.R")


## LOAD MERGED AQ DATA ##
AqMaster <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/1a_AqMaster_merged.xlsx', sheet=1, guess_max = 30000) %>%
  mutate(WBT = if_else(StationName == 'Robinson Pond', 'River/Stream', WBT)) #after looking at Pond/Lake Sites Lauren requested that Robinson Pond WBT be ccorrected to 'River/Stream'
nrow(AqMaster)
## [1] 74496
  ## PREP DATA FOR GIS SCOPING ##
unique(AqMaster$CoordSystem)
## [1] NA      "NR"    "NAD83" "WGS84" "NAD27" "nr"    "OTHER" "UNKWN"
AqMaster <- AqMaster %>%
  # Fix CoordSystem nomenclature
  mutate(CoordSystem = recode(CoordSystem,
                              "NAD 83" = 'NAD83',
                              "NAD 1927" = 'NAD27',
                              "NR" = 'NAD83',
                              "nr" = 'NAD83',
                              "OTHER" = 'NAD83',
                              "UNKWN" = 'NAD83',
                              .missing = 'NAD83'),

         TargetLatitude = ifelse(grepl('[a-zA-Z]', TargetLatitude), NA_character_, TargetLatitude),
         TargetLatitude = gsub("[[:space:]]","",TargetLatitude), #need to remove spaces in text so NAs aren't introduced by coercion
         TargetLongitude = gsub(intToUtf8(160), "", TargetLongitude), #removes non-breaking space (found in Delta Mendota Canal " -121.578550")
         TargetLatitude = as.numeric(TargetLatitude),

         TargetLongitude = ifelse(grepl('[a-zA-Z]',TargetLongitude), NA_character_, TargetLongitude),
         TargetLongitude = gsub("[[:space:]]","",TargetLongitude), #need to removes whitespace in text so NAs aren't introduced by coercion
         TargetLongitude = gsub(intToUtf8(160), "", TargetLongitude), #removes non-breaking space (found in Delta Mendota Canal " -121.578550")
         TargetLongitude = as.numeric(TargetLongitude),
         TargetLongitude = ifelse(TargetLongitude > 0, -TargetLongitude, TargetLongitude)) #fix longitudes that were incorrectly entered as a positive value
unique(AqMaster$CoordSystem)
## [1] "NAD83" "WGS84" "NAD27"
missingLatLongPairs <- AqMaster %>%
  filter( (!is.na(TargetLatitude) & is.na(TargetLongitude)) | (is.na(TargetLatitude) & !is.na(TargetLongitude)) )
nrow(missingLatLongPairs) #should be 0
## [1] 0
# Identify Pond & Lake sites to keep in the linkage - Ideally this should have been done in the individual scripts but easiest to do here now
PondLakeSites <- AqMaster %>%
  distinct(StationName, .keep_all=T) %>%
  filter( grepl('pond|lake', StationName, ignore.case=T) | WBT %in% c('Pond','Lake/Reservoir') ) %>%
  select(SourceID, StationName, WBT, TargetLatitude, TargetLongitude, CitationCode) %>%
  arrange(SourceID, StationName)
# writexl::write_xlsx(PondLakeSites, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/2b.1_PondLakeSites_pulledfromAqMaster.xlsx')

## Then exclude StationNames identified in '2b.2_PondLakeSites_SitesToKeep.xlsx' as not part of the linkage
PondLakeSites <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/2b.2_PondLakeSites_SitesToKeep_ManuallyFilled.xlsx', sheet=1, guess_max = 30000)
 excludedSites <- PondLakeSites %>%
  filter(grepl('no', Keep, ignore.case=T)) %>%
  pull(StationName)
all(excludedSites %in% AqMaster$StationName) #should be TRUE - if all are TRUE then this statement result will be TRUE
## [1] TRUE
AqMaster <- AqMaster %>%
  filter(StationName %are not% excludedSites)
all(!excludedSites %in% AqMaster$StationName) #should be TRUE - if all are FALSE then this statement result will be TRUE
## [1] TRUE
# Look at blank Lat Longs and see if any can be filled in
hasLatLong <- AqMaster %>%
  filter( !is.na(TargetLatitude) ) %>%
  distinct(StationName, .keep_all=T) %>%
  select(StationName, TargetLatitude, TargetLongitude, CitationCode)
nrow(hasLatLong)
## [1] 4350
noLatLong <- AqMaster %>%
  filter( is.na(TargetLatitude) ) %>%
  distinct(StationName, .keep_all=T) %>%
  select(SourceID, StationName, TargetLatitude, TargetLongitude, CitationCode) #%>%
nrow(noLatLong)
## [1] 345
mergeNoandHas <- rbind(select(noLatLong, -SourceID), hasLatLong)
availLatLong <- mergeNoandHas[duplicated(mergeNoandHas$StationName) | duplicated(mergeNoandHas$StationName, fromLast=TRUE),]
nrow(availLatLong)
## [1] 34
# View(availLatLong %>% arrange(StationName))  #Make sure it's appropriate for the site with blank Lat Longs to get coords from corresponding site with Lat Longs 


#Fill in blank TargetLat and TargetLong using temp Lat Long columns - then delete temp Lat Long columns
hasLatLong <- hasLatLong %>%
  rename(tempLat = TargetLatitude,
         tempLong = TargetLongitude) %>%
  select(-CitationCode)

AqMaster <- AqMaster %>%
  left_join(., hasLatLong, by='StationName')%>%    #add Lat Long values so that all sites with the same name get the same coords
  mutate(TargetLatitude = ifelse(is.na(TargetLatitude), tempLat, TargetLatitude),     #fill in blank TargetLat
         TargetLongitude = ifelse(is.na(TargetLongitude), tempLong, TargetLongitude)) %>% #fill in blank TargetLong 
  select(-tempLat, -tempLong)

#check to see which sites still miss Lat Long
hasLatLong <- AqMaster %>%
  filter( !is.na(TargetLatitude) ) %>%
  distinct(StationName, .keep_all=T) %>%
  select(StationName, TargetLatitude, TargetLongitude, CitationCode)
nrow(hasLatLong)
## [1] 4350
noLatLong <- AqMaster %>%
  filter( is.na(TargetLatitude) ) %>%
  distinct(StationName, .keep_all=T) %>%
  select(SourceID, StationName, TargetLatitude, TargetLongitude, CitationCode) %>%
  arrange(SourceID, StationName)
nrow(noLatLong)
## [1] 328
mergeNoandHas <- rbind(select(noLatLong, -SourceID),hasLatLong)
availLatLong <- mergeNoandHas[duplicated(mergeNoandHas$StationName) | duplicated(mergeNoandHas$StationName, fromLast=TRUE),]
nrow(availLatLong) # should be 0 now
## [1] 0
#Since noLatLong is >0 and availLatLong is zero, we need to save noLatLong as Excel file and add Lat Long by hand - then merge Lat Longs with AqMaster
# writexl::write_xlsx(noLatLong %>% arrange(SourceID, StationName), path='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/2c.1_Aq_NeedLatLong.xlsx')


#Load woorkbook with Lat Longs that were found by hand
SitesWithLatLongByHand <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/2c.2_Aq_LatLong_ManuallyFilled.xlsx') %>%
  mutate(TargetLatitude  = if_else(!is.na(`Coords from Google`), as.numeric(gsub(",.*$", "", `Coords from Google`)), TargetLatitude), #subsitutes comma & everything after comma with "", then moves everything before comma to TargetLatitude
         TargetLongitude = if_else(!is.na(`Coords from Google`), as.numeric(gsub("^.*,\\s", "", `Coords from Google`)), TargetLongitude)) %>%  #subsitute space, comma, & everything before comma with "", then moves everything after space to TargetLongitude
  filter(!is.na(TargetLatitude)) %>% #filter out missing Lat/Longs
  select(-SourceID, -CitationCode, -`Coords from Google`, -Notes) %>% #remove these columns so Lat Long can be merged with AqMaster
  rename( tempLat = TargetLatitude, #rename Lat/Long column for left_join, these columns will be removed after join
          tempLong = TargetLongitude)

#Join Lat Longs (found by hand) to AqMaster & Save
AqMaster_GISprepped <- AqMaster %>%
  left_join(., SitesWithLatLongByHand, by='StationName') %>% #add hand entered Lat Longs
  mutate(TargetLatitude = ifelse(is.na(TargetLatitude), tempLat, TargetLatitude),     #fill in blank TargetLat
         TargetLongitude = ifelse(is.na(TargetLongitude), tempLong, TargetLongitude)) %>% #fill in blank TargetLong 
  select(-tempLat, -tempLong) %>% # remove tempLat and tempLong columns
  filter(!is.na(TargetLatitude)) # remove sites that still have a blank Lat/Long

writexl::write_xlsx(AqMaster_GISprepped, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/2a_AqMaster_GISprepped.xlsx')


#Pull Sampling Location Info for TMDL region & Delta Subarea Demarcation in GIS
ScopeDaSites <- AqMaster_GISprepped %>%
  mutate(uniqName = paste(StationName, round(TargetLatitude,4), round(TargetLongitude,4))) %>%  # needs to be the same code used in 3rd Script when creating variable "AqMaster" in mutate(uniqName = ...)
  distinct(uniqName, .keep_all=T) %>%
  select(StationName, TargetLatitude, TargetLongitude, CoordSystem, uniqName) %>%
  arrange(StationName)
nrow(ScopeDaSites)
## [1] 5795
writexl::write_xlsx(ScopeDaSites, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/2a_AqSitesToDemarcInGIS.xlsx')
# write.csv(ScopeDaSites, file='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/2a_AqSitesToDemarcInGIS.csv', row.names=FALSE)

The R session information (including the OS info, R version and all packages used):

    sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8  LC_CTYPE=C                            
## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## system code page: 65001
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lubridate_1.8.0    plotly_4.10.0      readxl_1.4.1       actuar_3.3-0      
##  [5] NADA_1.6-1.1       forcats_0.5.2      stringr_1.4.1      dplyr_1.0.9       
##  [9] purrr_0.3.4        readr_2.1.2        tidyr_1.2.0        tibble_3.1.8      
## [13] ggplot2_3.3.6      tidyverse_1.3.2    fitdistrplus_1.1-8 survival_3.4-0    
## [17] MASS_7.3-58.1     
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-45     assertthat_0.2.1    digest_0.6.29       utf8_1.2.2         
##  [5] R6_2.5.1            cellranger_1.1.0    backports_1.4.1     reprex_2.0.2       
##  [9] evaluate_0.16       highr_0.9           httr_1.4.4          pillar_1.8.1       
## [13] rlang_1.0.5         lazyeval_0.2.2      googlesheets4_1.0.1 data.table_1.14.2  
## [17] rstudioapi_0.14     Matrix_1.5-1        rmarkdown_2.16      splines_4.2.2      
## [21] googledrive_2.0.0   htmlwidgets_1.5.4   munsell_0.5.0       broom_1.0.1        
## [25] compiler_4.2.2      modelr_0.1.9        xfun_0.32           pkgconfig_2.0.3    
## [29] htmltools_0.5.3     tidyselect_1.1.2    viridisLite_0.4.1   fansi_1.0.3        
## [33] crayon_1.5.1        tzdb_0.3.0          dbplyr_2.2.1        withr_2.5.0        
## [37] jsonlite_1.8.0      gtable_0.3.1        lifecycle_1.0.1     DBI_1.1.3          
## [41] magrittr_2.0.3      scales_1.2.1        writexl_1.4.0       cli_3.3.0          
## [45] stringi_1.7.8       fs_1.5.2            xml2_1.3.3          ellipsis_0.3.2     
## [49] generics_0.1.3      vctrs_0.4.1         expint_0.1-7        tools_4.2.2        
## [53] glue_1.6.2          hms_1.1.2           fastmap_1.1.0       yaml_2.3.5         
## [57] colorspace_2.0-3    gargle_1.2.0        rvest_1.0.3         knitr_1.40         
## [61] haven_2.5.1
    Sys.time()
## [1] "2023-12-29 11:50:46 PST"