This report is automatically generated with the R package knitr (version 1.40) .

source("R Functions/functions_QA data.R")


### LOAD R5AQ DATA ###
### LOAD DATA ###
R5AQ <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/R5AQ.xlsx', sheet='Aqueous Hg data Rivers R5 only', guess_max = 30000)
nrow(R5AQ) #number of rows should match the Excel file (minus the header row)
## [1] 7290
### LIST COLUMNS TO BE USED, ADD USER DEFINED COLUMNS, & RENAME COLUMNS TO CEDEN STANDARDS ###
#Use 1.READ ME.xlsx, 'ColumnsForR' to list & identify columns that match corresponding CEDEN Standard columns
keep_cols <- c('SourceID','SourceRow','Source','Project','StationName','StationCode','SampleDate','CollectionTime','LabBatch','LabSampleID','MatrixName',
               'WBT','MethodName', "Fraction Name", 'Analyte', 'AnalytebyFormula','Unit','Result','MDL','RL','ResultQualCode','SampleID','SampleComments','TargetLatitude',
               'TargetLongitude','QACode','BatchVerification','ComplianceCode','CollectionComments','ResultsComments','BatchComments','SampleTypeCode'
)

temp_cols <- c('DataType') #Include columns that do not match CEDEN standards but may be useful (e.g., Unit columns for MDL & RL)
#temp_cols are removed before the data is merged with other datasets

R5AQ_new <- R5AQ %>%
  select( c(keep_cols,temp_cols) ) %>% #DO NOT CHANGE - selects columns specified above
  mutate(              #Add user defined columns
    CoordSystem = NA
  ) %>%
  rename(
    'SampleTime' = 'CollectionTime',
    'CitationCode' = 'Source'
  ) %>%
  filter(
    DataType == 'Reported' | DataType == 'Field dup',  #only keep rows where DataType is 'Reported' or 'Field dup' - these values are not part of the lab QA/QC 
    WBT != 'Not Applicable', #appears to be QC data - (only 2 occurrences when DataType == 'Reported')
    WBT != 'Well' #Well data does not concern lowland rivers or delta - visually checked that data was not applicable
  )
nrow(R5AQ_new)
## [1] 5689
#str(R5AQ_new)
#View(R5AQ_new)


### FORMAT COLUMN PARAMETERS ###
# Standardize MatrixName Groups - "Water", "Sediment", "Soil" #
unique(R5AQ_new$MatrixName) #Identifies OLDNAMES
## [1] "samplewater"
R5AQ_new <- R5AQ_new %>%
  mutate( MatrixName = recode(MatrixName, "samplewater" = "Aqueous") )
unique(R5AQ_new$MatrixName) #New naming structure should now be listed
## [1] "Aqueous"
# Standardize WBT (WaterBodyType) Groups - "River/Stream", "Drain/Canal", "Wetland", "Spring", "Slough", 
#                                          "Pond",  "Lake/Reservoir", "Delta", "Forebay/Afterbay", "Not Recorded" #
unique(R5AQ_new$WBT) #Identifies OLDNAMES
##  [1] "Creek"            "Drain/Canal"      "Slough"           "River"           
##  [5] "Spring"           "Pond"             "Wetland"          "Marsh"           
##  [9] "Not Recorded"     "Lake/Reservoir"   "Canal"            "Gulch"           
## [13] "Delta"            "Forebay/Afterbay" "Tributary"
R5AQ_new <- R5AQ_new %>%
  mutate(WBT = recode(WBT,
                      "Creek" = "River/Stream",
                      "River" = "River/Stream",
                      "Marsh" = "Wetland",
                      "Canal" = "Drain/Canal",
                      "Gulch" = "River/Stream", #only applies to Harley Gulch - will be filtered out in GIS since not within lowland river or delta scope
                      "Tributary" = "River/Stream"),
         #EXAMPLE FOR WHEN "OLDNAME" is 'NA' but we want a NEWNAME - if this example is deleted, also delete the comma after "Not Recorded" above
         WBT = case_when(is.na(WBT) ~ "Not Recorded", #Use "Not Recorded" when WBT value is NA
                         TRUE ~ WBT)                  #Keep original WBT value in all other cases
  )
unique(paste(R5AQ_new$MatrixName, R5AQ_new$WBT, sep='; ')) #New naming structure for Matrix Name & WBT Groupings
##  [1] "Aqueous; River/Stream"     "Aqueous; Drain/Canal"      "Aqueous; Slough"          
##  [4] "Aqueous; Spring"           "Aqueous; Pond"             "Aqueous; Wetland"         
##  [7] "Aqueous; Not Recorded"     "Aqueous; Lake/Reservoir"   "Aqueous; Delta"           
## [10] "Aqueous; Forebay/Afterbay"
# Standardize ResultQualCode Groups - "ND", "DNQ", NA#
unique(R5AQ_new$ResultQualCode) #Identifies OLDNAMES
## [1] "="   "ND"  "DNQ"
#[1] "="   "ND"  "DNQ" - no changes necessary


# Format Result Column to Numeric#
# Check column for text - based on text user needs to decide what to do
if(any(grepl('<|[a-df-zA-DF-Z]',R5AQ_new$Result))){
  old <-R5AQ_new$Result
  new <-R5AQ_new$Result
  new[grepl('<|[a-df-zA-DF-Z]', new)] <- NA #skip 'e' for exponential notation e.g., "8e-005"
  #Print what text was found and what is being done
  cat(paste0("'Result' column should be numeric but some cells contain ", grammaticList(setdiff(old, new)),
             ".\nACTIONS TAKEN:\n",
             "Numeric MDL value from SampleComments column was placed in MDL column when MDL column = '-88'.\n",
             "Result value of '<MDL' replaced with 'NA'.\n")
  )
  mdl_pattern <- ".*MDL=[[:space:]](.*)[[:space:]]ng.*"   #regex simplified: capture any character group between "MDL= " and " ng";
  #Regex explained: ".*"=any characters before; "MDL=[[:space:]]"=the string 'MDL= '; "(.*)"=capturing any character group ; "[[:space:]]ng"=characters ' ng'; ".*"=any characters after
  R5AQ_new <- R5AQ_new %>%
    mutate(
      MDL = as.character(MDL), #temporarily convert MDL column to character to insert the numeric text string from SampleComments column
      MDL = case_when(Result == "<MDL" & MDL == "-88" & grepl("ng/L", SampleComments) ~ sub(mdl_pattern, '\\1', SampleComments),  #"\\1" returns 1st pattern group; returns original string if no match
                      TRUE ~ MDL),
      MDL = as.numeric(MDL), # revert MDL column to numeric
      Result = case_when(Result == "<MDL" ~ NA_character_,
                         TRUE ~ Result),
      Result = as.numeric(new)
    )
} else {
  cat("'Result' column converted to numeric format\n")
  R5AQ_new$Result <- as.numeric(R5AQ_new$Result)
}
## 'Result' column should be numeric but some cells contain <MDL.
## ACTIONS TAKEN:
## Numeric MDL value from SampleComments column was placed in MDL column when MDL column = '-88'.
## Result value of '<MDL' replaced with 'NA'.
#View(R5AQ_new)


# Check if Result, MDL, & RL Columns all equal <NA> or 0 - these rows have no useful information
nrow(R5AQ_new) #Number rows before
## [1] 5689
#CODE BELOW REQUIRES USER TROUBLESHOOTING DEPENDING ON AVAILABLE COLUMNS AND SPREADSHEET SPECIFIC CONDITIONS#
R5AQ_new <- R5AQ_new %>%
  #Set 0 & negative values as blank
  mutate(Result = ifelse(Result <= 0, NA_real_, Result),
         MDL = ifelse(MDL <= 0, NA_real_, MDL),
         RL = ifelse(RL <= 0, NA_real_, RL))
na_results <- R5AQ_new %>% #Record rows where Result, MDL, & RL all equal <NA>
  filter( is.na(Result) & is.na(MDL) & is.na(RL) )
nrow(na_results)
## [1] 27
R5AQ_new <- anti_join(R5AQ_new, na_results, by='SourceRow') #returns rows from R5AQ_new not matching values in no_result
nrow(R5AQ_new) #Number rows after
## [1] 5662
# Format Units Column - "ng/L", "mg/Kg"
unique(R5AQ_new$Unit) #Identifies OLDNAMES
## [1] "ng/L"
R5AQ_new <- R5AQ_new %>%
  standardizeUnits()
unique(R5AQ_new$Unit)
## [1] "ng/L"
# Format Date and Time Column #
# NEED TO TALK ABOUT HOW WE WANT TO DO THIS - To graph in R we need Date and Time in same column
# THE EXAMPLE CODE BELOW ASSUMES DATE AND TIME ARE IN SEPERATE COLUMNS
R5AQ_new <- R5AQ_new %>%
  #rowise() %>%    # rowise is very slow - so used sapply to make this a rowise operation
  mutate(
    #If SampleDate & CollectioTIme are not in Character format by defualt, turn it into a character class so it exports better
    SampleDate = ifelse(sapply(SampleDate, is.character), SampleDate, as.character(as.Date(SampleDate))),
    SampleTime = ifelse(sapply(SampleTime, is.character), SampleTime, format(lubridate::ymd_hms(SampleTime), "%H:%M:%S")),
    #COMBINE DATE AND TIME INTO SampleDateTime COLUMN
    SampleDateTime = ifelse(!is.na(SampleTime), paste(SampleDate, SampleTime), paste(SampleDate, '00:00:00')),
    #FORMAT SampleDateTime COLUMN TO DATE FORMAT
    SampleDateTime = lubridate::ymd_hms(SampleDateTime)
  )


### REMOVE TEMPORARY COLUMNS ###
R5AQ_new <- R5AQ_new %>%
  select(-one_of(temp_cols)) #Remove temp columns since they are no longer needed
#View(R5AQ_new)



### LOAD SNIP DATA ###
#SNIP_Riginal.xlsx was modified from SNIP_OGDL.xlsx to reclassify 417 bed sediment samples as sediment not surface water.
SNIP <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/SNIP_Riginal.xlsx', sheet='SNIP_MasterDatabase_Final_HgRec', guess_max = 30000)
nrow(SNIP) #number of rows should match the Excel file (minus the header row)
## [1] 39729
### LIST COLUMNS TO BE USED, ADD USER DEFINED COLUMNS, & RENAME COLUMNS TO CEDEN STANDARDS ###
#Use 1.READ ME.xlsx, 'ColumnsForR' to list & identify columns that match corresponding CEDEN Standard columns
keep_cols <- c('SourceID','SourceRow','Datum','Project','StationName','StationCode','SampleDate','CollectionTime','LabBatch','LabSampleID','MatrixName','WaterBodyType','MethodName','Analyte','Unit','Result','SampleID','MDL','RL','ResultQualCode','SampleComments','TargetLatitude','TargetLongitude','QACode','BatchVerification','ComplianceCode','CollectionComments','ResultsComments','BatchComments','SampleTypeCode')
temp_cols <- c('AnalysisDate','Program','ParentProject') #Include columns that do not match CEDEN standards but may be useful (e.g., Unit columns for MDL & RL)
#temp_cols are removed before the data is merged with other datasets

SNIP_new <- SNIP %>%
  select( c(keep_cols,temp_cols) )%>%
  rename(
    'CoordSystem' = 'Datum',
    'SampleTime' = 'CollectionTime',
    'WBT' = 'WaterBodyType'
  ) %>%
  mutate(
    CitationCode = 'SNIP',
    `Fraction Name` = NA_character_,
    AnalytebyFormula = NA_character_,
    Project = paste(paste0('Project: ', Project),
                    paste0('ParentProject: ', ParentProject),
                    paste0('Program: ', Program),
                    sep=' ~ ')
  )
nrow(SNIP_new)
## [1] 39729
## LS - appending MatrixName infO (e.g. 1-3cm) into SampleComments
unique(SNIP_new$MatrixName) #Identifies OLDNAMES
##  [1] "Surface Water"          "Bed Sediment"           "Surface water"         
##  [4] "Bed Sediment, < 0.5cm"  "Bed Sediment, 1-3 cm"   "Tissue"                
##  [7] "Bed Sediment, < 63um"   "Soil"                   "Bed Sediment, < 1.5mm" 
## [10] "Bed Sediment, < 60um"   "Bed Sediment, < 2cm"    "Bed Sediment, 2-4 cm"  
## [13] "tissue"                 "Suspended Sediment"     "Bed Sediment, < 2mm"   
## [16] "Bottom Sediment, < 2mm" "Artificial"             "Bed Sediment, 12-16 cm"
## [19] "Bed Sediment, 6-9 cm"   "Suspended sediment"     "Bed Sediment, 1-2 cm"  
## [22] "Bed Sediment, 3-5 cm"   "Leachate"               "Bed Sediment, < 3 cm"  
## [25] "Bed Sediment, < 3cm"
SNIP_new <- SNIP_new %>%
  filter(MatrixName %are not% c("Tissue", "tissue", "Artificial", "Leachate", "Soil")) %>%
  mutate(
    SampleComments = ifelse(grepl("Bed Sediment,", MatrixName), ifelse(is.na(SampleComments), MatrixName, paste0(SampleComments, '; ', MatrixName)), SampleComments),
    SampleComments = ifelse(grepl("Bed Sediment", MatrixName), ifelse(is.na(SampleComments), Analyte, paste0(SampleComments, '; ', Analyte)), SampleComments),
    MatrixName = ifelse(grepl('Bed Sediment', MatrixName), "Sediment", MatrixName),
    SampleComments = ifelse(grepl("Bottom Sediment", MatrixName), ifelse(is.na(SampleComments), Analyte, paste0(SampleComments, '; ', MatrixName,'; ', Analyte)), SampleComments),
    MatrixName = ifelse(grepl('Bottom Sediment', MatrixName), "Sediment", MatrixName)
  ) %>%
  filter(!is.na(WBT)) %>% #filtered samples without WBT; no lat/long or site descriptions
  mutate(MatrixName = recode(MatrixName, #COLUMN NAME WHERE CHANGES WILL BE MADE
                             "Surface Water"   = "Aqueous",
                             "Surface water"   = "Aqueous",
                             "Suspended sediment" = "Suspended Sediment")) # Change "Suspended Sediment" to "Aqueous" after fixing Analyte names
unique(SNIP_new$MatrixName)
## [1] "Aqueous"            "Sediment"           "Suspended Sediment"
# Standardize WBT (WaterBodyType) Groups - "River/Stream", "Drain/Canal", "Wetland", "Spring", "Slough", 
#                                          "Pond",  "Lake/Reservoir", "Delta", "Forebay/Afterbay", "Not Recorded" #
sort(unique(paste(SNIP_new$MatrixName, SNIP_new$WBT, sep='; ')))
##  [1] "Aqueous; Delta"                        "Aqueous; Estuary"                     
##  [3] "Aqueous; Hatchery"                     "Aqueous; Land Runoff"                 
##  [5] "Aqueous; Marsh"                        "Aqueous; Outfall"                     
##  [7] "Aqueous; Reservoir/Lake"               "Aqueous; river/Creek"                 
##  [9] "Aqueous; River/Creek"                  "Aqueous; Spring"                      
## [11] "Aqueous; Storm Sewer"                  "Aqueous; Tunnel, shaft, or mine"      
## [13] "Aqueous; Wastewater-treatment plant"   "Aqueous; Wastewater land application" 
## [15] "Aqueous; Wetland"                      "Sediment; Delta"                      
## [17] "Sediment; Estuary"                     "Sediment; Reservoir/Lake"             
## [19] "Sediment; river/Creek"                 "Sediment; River/Creek"                
## [21] "Sediment; Specific Source"             "Sediment; Spring"                     
## [23] "Sediment; Storm Sewer"                 "Sediment; Tunnel, shaft, or mine"     
## [25] "Sediment; Wastewater land application" "Sediment; Wetland"                    
## [27] "Suspended Sediment; river/Creek"       "Suspended Sediment; River/Creek"
#If an incosistent grouping exists, add comment to 'CollectionComments' column using code:  mutate(CollectionComments = case_when(MatrixName=='CONDITION' ~ 'COMMENT', TRUE ~ MatrixName)), 

#STANDARD CODE TO CHANGE GROUPING NAMES
SNIP_new <- SNIP_new %>%
  filter(WBT %are not% c("Estuary", "Land Runoff", "Storm Sewer", "Spring", "Tunnel, shaft, or mine", "Wastewater land application", "Outfall", "Specific Source", "Wastewater-treatment plant", "Hatchery")) %>%
  mutate(WBT = recode(WBT,
                      "river/Creek" = "River/Stream",
                      "River/Creek" = "River/Stream",
                      "Reservoir/Lake" = "Lake/Reservoir", #Jennie confirmed some Lake/Reservoirs are in scope
                      "Marsh" = "Wetland",
                      "NA" = "Not Recorded"))
sort(unique(paste(SNIP_new$MatrixName, SNIP_new$WBT, sep='; '))) #New naming structure for Matrix Name & WBT Groupings
## [1] "Aqueous; Delta"                   "Aqueous; Lake/Reservoir"         
## [3] "Aqueous; River/Stream"            "Aqueous; Wetland"                
## [5] "Sediment; Delta"                  "Sediment; Lake/Reservoir"        
## [7] "Sediment; River/Stream"           "Sediment; Wetland"               
## [9] "Suspended Sediment; River/Stream"
#Filter to include specified 'reservoirs' that are in scope
SNIP_new <- SNIP_new %>%
  filter(WBT %in% c("River/Stream", "Wetland", "Not Recorded") | StationName %in% c('"old F&G pond",238', "Cachement Basin", "Cachment basin", "Central Pond,222", "Greens Lake,226", "PG&E pond,223", "seasonal flooded pond,245", "Shag Slough"))

unique(SNIP_new$Analyte) #IF ANALYTE IS IN 2 COLUMNS e.g., (Hg & MeHg); (Total, Dissolved, Suspended) SEE WQP SCRIPT FOR EXAMPLE
##  [1] "Mercury, Total"                                                                               
##  [2] "Mercury, Methyl"                                                                              
##  [3] "Mercury,Total"                                                                                
##  [4] "Mercury, water, unfiltered, recoverable, micrograms pe"                                       
##  [5] "Mercury, water, filtered, micrograms per liter"                                               
##  [6] "Mercury, bed sediment, recoverable, dry weight, millig"                                       
##  [7] "Mercury, suspended sediment, recoverable, micrograms p"                                       
##  [8] "Mercury, bed sediment smaller than 62.5 microns, dry s"                                       
##  [9] "Mercury, Dissolved"                                                                           
## [10] "Mercury, bed sediment, recoverable, dry weight, milligrams per kilogram"                      
## [11] "Methylmercury, solids, total, dry weight, micrograms per kilogram"                            
## [12] "Methylmercury, water, unfiltered, recoverable, nanogra"                                       
## [13] "Methylmercury, water, filtered, recoverable, nanograms"                                       
## [14] "Mercury, water, unfiltered, nanograms per liter"                                              
## [15] "Mercury, water, filtered, nanograms per liter"                                                
## [16] "Methyl Mercury, Total"                                                                        
## [17] "Mercury, bed sediment smaller than 62.5 microns, wet s"                                       
## [18] "Methylmercury, water, unfiltered, recoverable, nanograms per liter"                           
## [19] "Methylmercury, water, filtered, recoverable, nanograms per liter"                             
## [20] "Mercury"                                                                                      
## [21] "Mercury, solids [streambed sediment], total, dry weight, nanograms per gram"                  
## [22] "Methylmercury, solids [streambed sediment], total, dry weight, nanograms per gram"            
## [23] "Methylmercury, water, unfiltered, nanograms per liter"                                        
## [24] "Methylmercury, suspended sediment, total, nanograms per liter"                                
## [25] "Mercury, suspended sediment, total, nanograms per liter"                                      
## [26] "Mercury,Dissolved"                                                                            
## [27] "Mercury, suspended sediment [particulate matter], total, nanograms per liter"                 
## [28] "Methylmercury, suspended sediment [particulate matter], total, nanograms per liter"           
## [29] "Mercury, solids, total, dry weight, micrograms per kil"                                       
## [30] "Methylmercury, solids, total, dry weight, micrograms p"                                       
## [31] "Mercury, suspended sediment, total, nanograms per lite"                                       
## [32] "Methylmercury, suspended sediment, total, nanograms pe"                                       
## [33] "Mercury, Methyl, Total"                                                                       
## [34] "Mercury, Methyl, Dissolved"                                                                   
## [35] "Mercury, suspended sediment [particulate matter], total, dry weight, nanograms per gram"      
## [36] "Methylmercury, suspended sediment [particulate matter], total, dry weight, nanograms per gram"
## [37] "Mercury, Methyl,Total"
# Analyte name indicates if sample is dry/wet weight but not not indicated in Unit so paste Analyte info to Unit
SNIP_new <- SNIP_new %>%
  mutate(
    Unit = ifelse(grepl('dry', Analyte)& !grepl('dry|dw', Unit), paste(Unit, 'dw'), Unit),
    Unit = ifelse(grepl('wet', Analyte)& !grepl('wet|ww', Unit), paste(Unit, 'ww'), Unit)
  )

sort(unique(paste(SNIP_new$MatrixName, SNIP_new$Analyte, sep=' ~ ')))
##  [1] "Aqueous ~ Mercury"                                                                                                 
##  [2] "Aqueous ~ Mercury, Dissolved"                                                                                      
##  [3] "Aqueous ~ Mercury, Methyl"                                                                                         
##  [4] "Aqueous ~ Mercury, Methyl, Dissolved"                                                                              
##  [5] "Aqueous ~ Mercury, Methyl, Total"                                                                                  
##  [6] "Aqueous ~ Mercury, Methyl,Total"                                                                                   
##  [7] "Aqueous ~ Mercury, suspended sediment, recoverable, micrograms p"                                                  
##  [8] "Aqueous ~ Mercury, suspended sediment, total, nanograms per lite"                                                  
##  [9] "Aqueous ~ Mercury, suspended sediment, total, nanograms per liter"                                                 
## [10] "Aqueous ~ Mercury, Total"                                                                                          
## [11] "Aqueous ~ Mercury, water, filtered, micrograms per liter"                                                          
## [12] "Aqueous ~ Mercury, water, filtered, nanograms per liter"                                                           
## [13] "Aqueous ~ Mercury, water, unfiltered, nanograms per liter"                                                         
## [14] "Aqueous ~ Mercury, water, unfiltered, recoverable, micrograms pe"                                                  
## [15] "Aqueous ~ Mercury,Dissolved"                                                                                       
## [16] "Aqueous ~ Mercury,Total"                                                                                           
## [17] "Aqueous ~ Methyl Mercury, Total"                                                                                   
## [18] "Aqueous ~ Methylmercury, suspended sediment, total, nanograms pe"                                                  
## [19] "Aqueous ~ Methylmercury, suspended sediment, total, nanograms per liter"                                           
## [20] "Aqueous ~ Methylmercury, water, filtered, recoverable, nanograms"                                                  
## [21] "Aqueous ~ Methylmercury, water, filtered, recoverable, nanograms per liter"                                        
## [22] "Aqueous ~ Methylmercury, water, unfiltered, nanograms per liter"                                                   
## [23] "Aqueous ~ Methylmercury, water, unfiltered, recoverable, nanogra"                                                  
## [24] "Aqueous ~ Methylmercury, water, unfiltered, recoverable, nanograms per liter"                                      
## [25] "Sediment ~ Mercury, bed sediment smaller than 62.5 microns, dry s"                                                 
## [26] "Sediment ~ Mercury, bed sediment smaller than 62.5 microns, wet s"                                                 
## [27] "Sediment ~ Mercury, bed sediment, recoverable, dry weight, millig"                                                 
## [28] "Sediment ~ Mercury, bed sediment, recoverable, dry weight, milligrams per kilogram"                                
## [29] "Sediment ~ Mercury, Methyl"                                                                                        
## [30] "Sediment ~ Mercury, Methyl, Total"                                                                                 
## [31] "Sediment ~ Mercury, solids [streambed sediment], total, dry weight, nanograms per gram"                            
## [32] "Sediment ~ Mercury, solids, total, dry weight, micrograms per kil"                                                 
## [33] "Sediment ~ Mercury, Total"                                                                                         
## [34] "Sediment ~ Mercury,Total"                                                                                          
## [35] "Sediment ~ Methylmercury, solids [streambed sediment], total, dry weight, nanograms per gram"                      
## [36] "Sediment ~ Methylmercury, solids, total, dry weight, micrograms p"                                                 
## [37] "Sediment ~ Methylmercury, solids, total, dry weight, micrograms per kilogram"                                      
## [38] "Suspended Sediment ~ Mercury, bed sediment smaller than 62.5 microns, wet s"                                       
## [39] "Suspended Sediment ~ Mercury, suspended sediment [particulate matter], total, dry weight, nanograms per gram"      
## [40] "Suspended Sediment ~ Mercury, suspended sediment [particulate matter], total, nanograms per liter"                 
## [41] "Suspended Sediment ~ Mercury, water, filtered, micrograms per liter"                                               
## [42] "Suspended Sediment ~ Methylmercury, suspended sediment [particulate matter], total, dry weight, nanograms per gram"
## [43] "Suspended Sediment ~ Methylmercury, suspended sediment [particulate matter], total, nanograms per liter"
sort(unique(paste(SNIP_new$Analyte, SNIP_new$MatrixName, sep=' ~ ')))
##  [1] "Mercury ~ Aqueous"                                                                                                 
##  [2] "Mercury, bed sediment smaller than 62.5 microns, dry s ~ Sediment"                                                 
##  [3] "Mercury, bed sediment smaller than 62.5 microns, wet s ~ Sediment"                                                 
##  [4] "Mercury, bed sediment smaller than 62.5 microns, wet s ~ Suspended Sediment"                                       
##  [5] "Mercury, bed sediment, recoverable, dry weight, millig ~ Sediment"                                                 
##  [6] "Mercury, bed sediment, recoverable, dry weight, milligrams per kilogram ~ Sediment"                                
##  [7] "Mercury, Dissolved ~ Aqueous"                                                                                      
##  [8] "Mercury, Methyl ~ Aqueous"                                                                                         
##  [9] "Mercury, Methyl ~ Sediment"                                                                                        
## [10] "Mercury, Methyl, Dissolved ~ Aqueous"                                                                              
## [11] "Mercury, Methyl, Total ~ Aqueous"                                                                                  
## [12] "Mercury, Methyl, Total ~ Sediment"                                                                                 
## [13] "Mercury, Methyl,Total ~ Aqueous"                                                                                   
## [14] "Mercury, solids [streambed sediment], total, dry weight, nanograms per gram ~ Sediment"                            
## [15] "Mercury, solids, total, dry weight, micrograms per kil ~ Sediment"                                                 
## [16] "Mercury, suspended sediment [particulate matter], total, dry weight, nanograms per gram ~ Suspended Sediment"      
## [17] "Mercury, suspended sediment [particulate matter], total, nanograms per liter ~ Suspended Sediment"                 
## [18] "Mercury, suspended sediment, recoverable, micrograms p ~ Aqueous"                                                  
## [19] "Mercury, suspended sediment, total, nanograms per lite ~ Aqueous"                                                  
## [20] "Mercury, suspended sediment, total, nanograms per liter ~ Aqueous"                                                 
## [21] "Mercury, Total ~ Aqueous"                                                                                          
## [22] "Mercury, Total ~ Sediment"                                                                                         
## [23] "Mercury, water, filtered, micrograms per liter ~ Aqueous"                                                          
## [24] "Mercury, water, filtered, micrograms per liter ~ Suspended Sediment"                                               
## [25] "Mercury, water, filtered, nanograms per liter ~ Aqueous"                                                           
## [26] "Mercury, water, unfiltered, nanograms per liter ~ Aqueous"                                                         
## [27] "Mercury, water, unfiltered, recoverable, micrograms pe ~ Aqueous"                                                  
## [28] "Mercury,Dissolved ~ Aqueous"                                                                                       
## [29] "Mercury,Total ~ Aqueous"                                                                                           
## [30] "Mercury,Total ~ Sediment"                                                                                          
## [31] "Methyl Mercury, Total ~ Aqueous"                                                                                   
## [32] "Methylmercury, solids [streambed sediment], total, dry weight, nanograms per gram ~ Sediment"                      
## [33] "Methylmercury, solids, total, dry weight, micrograms p ~ Sediment"                                                 
## [34] "Methylmercury, solids, total, dry weight, micrograms per kilogram ~ Sediment"                                      
## [35] "Methylmercury, suspended sediment [particulate matter], total, dry weight, nanograms per gram ~ Suspended Sediment"
## [36] "Methylmercury, suspended sediment [particulate matter], total, nanograms per liter ~ Suspended Sediment"           
## [37] "Methylmercury, suspended sediment, total, nanograms pe ~ Aqueous"                                                  
## [38] "Methylmercury, suspended sediment, total, nanograms per liter ~ Aqueous"                                           
## [39] "Methylmercury, water, filtered, recoverable, nanograms ~ Aqueous"                                                  
## [40] "Methylmercury, water, filtered, recoverable, nanograms per liter ~ Aqueous"                                        
## [41] "Methylmercury, water, unfiltered, nanograms per liter ~ Aqueous"                                                   
## [42] "Methylmercury, water, unfiltered, recoverable, nanogra ~ Aqueous"                                                  
## [43] "Methylmercury, water, unfiltered, recoverable, nanograms per liter ~ Aqueous"
SNIP_new <- SNIP_new %>% #Do not know what to do about suspended sediment analytes that are classified as surface water samples
  mutate(
    Analyte = case_when(MatrixName == 'Suspended Sediment' & Analyte == 'Mercury, bed sediment smaller than 62.5 microns, wet s' ~ 'Mercury, Suspended',
                        MatrixName == 'Sediment'           & Analyte == 'Mercury, bed sediment smaller than 62.5 microns, wet s' ~ 'Mercury, Total',
                        MatrixName == 'Suspended Sediment' & Analyte == 'Mercury, water, filtered, micrograms per liter' ~ 'Mercury, Suspended',
                        MatrixName == 'Aqueous'            & Analyte == 'Mercury, water, filtered, micrograms per liter' ~ 'Mercury, Dissolved',
                        TRUE ~ Analyte),
    # Analyte = recode(Analyte,
    #                  "Mercury,Total" = "Mercury, Total",
    #                  "Mercury" = "Mercury, Total",
    #                  "Mercury,Dissolved" = "Mercury, Dissolved",
    #                  "Mercury, water, unfiltered, recoverable, micrograms pe" = "Mercury, Total",
    #                  "Mercury, water, unfiltered, nanograms per liter" = "Mercury, Total",
    #                  "Mercury, bed sediment, recoverable, dry weight, millig" = "Mercury, Total",
    #                  "Mercury, suspended sediment, recoverable, micrograms p" = "Mercury, Suspended",
    #                  "Mercury, solids [streambed sediment], total, dry weight, nanograms per gram" = "Mercury, Total",
    #                  "Mercury, solids, total, dry weight, micrograms per kil" = "Mercury, Total",
    #                  "Mercury, water, filtered, nanograms per liter" = "Mercury, Dissolved","Methyl Mercury, Total" = "Methylmercury, Total",
    #                  "Mercury, Methyl,Total" = "Methylmercury, Total",
    #                  "Mercury, Methyl" = "Methylmercury, Total",
    #                  "Methylmercury, water, unfiltered, recoverable, nanogra" = "Methylmercury, Total",
    #                  "Methylmercury, water, unfiltered, recoverable, nanograms per liter" = "Methylmercury, Total",
    #                  "Methylmercury, water, unfiltered, nanograms per liter" = "Methylmercury, Total",
    #                  "Methylmercury, solids [streambed sediment], total, dry weight, nanograms per gram" = "Methylmercury, Total",
    #                  "Methylmercury, solids, total, dry weight, micrograms per kilogram" = "Methylmercury, Total",
    #                  "Methylmercury, solids, total, dry weight, micrograms p" = "Methylmercury, Total",
    #                  "Mercury, Methyl, Total" = "Methylmercury, Total",
    #                  "Mercury, Methyl, Dissolved" = "Methylmercury, Dissolved",
    #                  "Methylmercury, water, filtered, recoverable, nanograms" = "Methylmercury, Dissolved",
    #                  "Methylmercury, water, filtered, recoverable, nanograms per liter" = "Methylmercury, Dissolved",
    #                  "Methylmercury, suspended sediment, total, nanograms per liter" = "Methylmercury, Suspended",
    #                  "Mercury, suspended sediment, total, nanograms per liter" = "Mercury, Suspended",               
    #                  "Mercury, suspended sediment, total, nanograms per lite" = "Mercury, Suspended",
    #                  "Methylmercury, suspended sediment, total, nanograms pe" = "Methylmercury, Suspended",
    #                  "Mercury, bed sediment, recoverable, dry weight, milligrams per kilogram" = "Mercury, Total",
    #                  "Mercury, bed sediment smaller than 62.5 microns, dry s" = "Mercury, Total",
    #                  "Mercury, suspended sediment [particulate matter], total, nanograms per liter" = "Mercury, Suspended",      
    #                  "Methylmercury, suspended sediment [particulate matter], total, nanograms per liter" = "Methylmercury, Suspended",
    #                  "Mercury, suspended sediment [particulate matter], total, dry weight, nanograms per gram" = "Mercury, Suspended",
    #                  "Methylmercury, suspended sediment [particulate matter], total, dry weight, nanograms per gram" = "Methylmercury, Suspended"),
    MatrixName = recode(MatrixName,
                        "Suspended Sediment" = "Aqueous")

  )
sort(unique(SNIP_new$Analyte)) #New naming structure for Analyte Groupings
##  [1] "Mercury"                                                                                      
##  [2] "Mercury, bed sediment smaller than 62.5 microns, dry s"                                       
##  [3] "Mercury, bed sediment, recoverable, dry weight, millig"                                       
##  [4] "Mercury, bed sediment, recoverable, dry weight, milligrams per kilogram"                      
##  [5] "Mercury, Dissolved"                                                                           
##  [6] "Mercury, Methyl"                                                                              
##  [7] "Mercury, Methyl, Dissolved"                                                                   
##  [8] "Mercury, Methyl, Total"                                                                       
##  [9] "Mercury, Methyl,Total"                                                                        
## [10] "Mercury, solids [streambed sediment], total, dry weight, nanograms per gram"                  
## [11] "Mercury, solids, total, dry weight, micrograms per kil"                                       
## [12] "Mercury, Suspended"                                                                           
## [13] "Mercury, suspended sediment [particulate matter], total, dry weight, nanograms per gram"      
## [14] "Mercury, suspended sediment [particulate matter], total, nanograms per liter"                 
## [15] "Mercury, suspended sediment, recoverable, micrograms p"                                       
## [16] "Mercury, suspended sediment, total, nanograms per lite"                                       
## [17] "Mercury, suspended sediment, total, nanograms per liter"                                      
## [18] "Mercury, Total"                                                                               
## [19] "Mercury, water, filtered, nanograms per liter"                                                
## [20] "Mercury, water, unfiltered, nanograms per liter"                                              
## [21] "Mercury, water, unfiltered, recoverable, micrograms pe"                                       
## [22] "Mercury,Dissolved"                                                                            
## [23] "Mercury,Total"                                                                                
## [24] "Methyl Mercury, Total"                                                                        
## [25] "Methylmercury, solids [streambed sediment], total, dry weight, nanograms per gram"            
## [26] "Methylmercury, solids, total, dry weight, micrograms p"                                       
## [27] "Methylmercury, solids, total, dry weight, micrograms per kilogram"                            
## [28] "Methylmercury, suspended sediment [particulate matter], total, dry weight, nanograms per gram"
## [29] "Methylmercury, suspended sediment [particulate matter], total, nanograms per liter"           
## [30] "Methylmercury, suspended sediment, total, nanograms pe"                                       
## [31] "Methylmercury, suspended sediment, total, nanograms per liter"                                
## [32] "Methylmercury, water, filtered, recoverable, nanograms"                                       
## [33] "Methylmercury, water, filtered, recoverable, nanograms per liter"                             
## [34] "Methylmercury, water, unfiltered, nanograms per liter"                                        
## [35] "Methylmercury, water, unfiltered, recoverable, nanogra"                                       
## [36] "Methylmercury, water, unfiltered, recoverable, nanograms per liter"
sort(unique(paste(SNIP_new$MatrixName, SNIP_new$Analyte, sep=' ~ ')))
##  [1] "Aqueous ~ Mercury"                                                                                      
##  [2] "Aqueous ~ Mercury, Dissolved"                                                                           
##  [3] "Aqueous ~ Mercury, Methyl"                                                                              
##  [4] "Aqueous ~ Mercury, Methyl, Dissolved"                                                                   
##  [5] "Aqueous ~ Mercury, Methyl, Total"                                                                       
##  [6] "Aqueous ~ Mercury, Methyl,Total"                                                                        
##  [7] "Aqueous ~ Mercury, Suspended"                                                                           
##  [8] "Aqueous ~ Mercury, suspended sediment [particulate matter], total, dry weight, nanograms per gram"      
##  [9] "Aqueous ~ Mercury, suspended sediment [particulate matter], total, nanograms per liter"                 
## [10] "Aqueous ~ Mercury, suspended sediment, recoverable, micrograms p"                                       
## [11] "Aqueous ~ Mercury, suspended sediment, total, nanograms per lite"                                       
## [12] "Aqueous ~ Mercury, suspended sediment, total, nanograms per liter"                                      
## [13] "Aqueous ~ Mercury, Total"                                                                               
## [14] "Aqueous ~ Mercury, water, filtered, nanograms per liter"                                                
## [15] "Aqueous ~ Mercury, water, unfiltered, nanograms per liter"                                              
## [16] "Aqueous ~ Mercury, water, unfiltered, recoverable, micrograms pe"                                       
## [17] "Aqueous ~ Mercury,Dissolved"                                                                            
## [18] "Aqueous ~ Mercury,Total"                                                                                
## [19] "Aqueous ~ Methyl Mercury, Total"                                                                        
## [20] "Aqueous ~ Methylmercury, suspended sediment [particulate matter], total, dry weight, nanograms per gram"
## [21] "Aqueous ~ Methylmercury, suspended sediment [particulate matter], total, nanograms per liter"           
## [22] "Aqueous ~ Methylmercury, suspended sediment, total, nanograms pe"                                       
## [23] "Aqueous ~ Methylmercury, suspended sediment, total, nanograms per liter"                                
## [24] "Aqueous ~ Methylmercury, water, filtered, recoverable, nanograms"                                       
## [25] "Aqueous ~ Methylmercury, water, filtered, recoverable, nanograms per liter"                             
## [26] "Aqueous ~ Methylmercury, water, unfiltered, nanograms per liter"                                        
## [27] "Aqueous ~ Methylmercury, water, unfiltered, recoverable, nanogra"                                       
## [28] "Aqueous ~ Methylmercury, water, unfiltered, recoverable, nanograms per liter"                           
## [29] "Sediment ~ Mercury, bed sediment smaller than 62.5 microns, dry s"                                      
## [30] "Sediment ~ Mercury, bed sediment, recoverable, dry weight, millig"                                      
## [31] "Sediment ~ Mercury, bed sediment, recoverable, dry weight, milligrams per kilogram"                     
## [32] "Sediment ~ Mercury, Methyl"                                                                             
## [33] "Sediment ~ Mercury, Methyl, Total"                                                                      
## [34] "Sediment ~ Mercury, solids [streambed sediment], total, dry weight, nanograms per gram"                 
## [35] "Sediment ~ Mercury, solids, total, dry weight, micrograms per kil"                                      
## [36] "Sediment ~ Mercury, Total"                                                                              
## [37] "Sediment ~ Mercury,Total"                                                                               
## [38] "Sediment ~ Methylmercury, solids [streambed sediment], total, dry weight, nanograms per gram"           
## [39] "Sediment ~ Methylmercury, solids, total, dry weight, micrograms p"                                      
## [40] "Sediment ~ Methylmercury, solids, total, dry weight, micrograms per kilogram"
# SNIP_new <- SNIP_new %>% 
#   filter(Analyte == "Mercury" | Analyte == "Mercury, Methyl")  # test which Analyate matches these Analyte names in R5AQ
# sort(unique(paste(SNIP_new$MatrixName, SNIP_new$Analyte, sep=' ~ ')))

# Standardize ResultQualCode Groups - "ND", "DNQ", NA#
unique(SNIP_new$ResultQualCode) #Identifies OLDNAMES
## [1] "="   "<"   "DNQ" "ND"  "E"   "NC"  "NR"
SNIP_new <- SNIP_new %>%
  mutate(ResultQualCode = recode(ResultQualCode,
                                 "NR" = "ND",
                                 "NC" = "ND",
                                 "<" = "DNQ"
  )
  )
unique(SNIP_new$ResultQualCode) #New naming structure for ResultQualCode Groupings
## [1] "="   "DNQ" "ND"  "E"
if(any(grepl('<|[a-df-zA-DF-Z]',SNIP_new$Result))){
  old <-SNIP_new$Result
  new <-SNIP_new$Result
  new[grepl('<|[a-df-zA-DF-Z]', new)] <- NA #skip 'e' for exponential notation e.g., "8e-005"
  #Print what text was found and what is being done
  cat(paste0("'Result' column should be numeric but some cells contain ", grammaticList(setdiff(old, new)),
             ".\nACTIONS TAKEN:\n",
             "~explain here~.\n"))
  #SNIP_new <- SNIP_new %>%
  #  mutate( #Due stuff to prep column to be converted to Numeric
  #    Result = as.numeric(new)
  #    )
} else {
  cat("'Result' column converted to numeric format\n")
  SNIP_new$Result <- as.numeric(SNIP_new$Result)}
## 'Result' column converted to numeric format
# Format MDL Column to Numeric#
# Check column for text - based on text user needs to decide what to do
if(any(grepl('[a-df-zA-DF-Z]',SNIP_new$MDL))){
  old <-SNIP_new$MDL
  new <-SNIP_new$MDL
  new[grepl('[a-df-zA-DF-Z]', new)] <- NA #skip 'e' for exponential notation e.g., "8e-005"
  #Print what text was found and what is being done
  cat(paste0("'Result' column should be numeric but some cells contain ", grammaticList(setdiff(old, new)),
             ".\nACTIONS TAKEN:\n",
             "~explain here~.\n"))
  #SNIP_new <- SNIP_new %>%
  #  mutate( #Due stuff to prep column to be converted to Numeric
  #    MDL = as.numeric(new)
  #  )
} else {
  cat("'MDL' column converted to numeric format\n")
  SNIP_new$MDL <- as.numeric(SNIP_new$MDL)}
## 'MDL' column converted to numeric format
if(any(grepl('[a-df-zA-DF-Z]',SNIP_new$RL))){
  old <-SNIP_new$RL
  new <-SNIP_new$RL
  new[grepl('[a-df-zA-DF-Z]', new)] <- NA #skip 'e' for exponential notation e.g., "8e-005"
  #Print what text was found and what is being done
  cat(paste0("'Result' column should be numeric but some cells contain ", grammaticList(setdiff(old, new)),
             ".\nACTIONS TAKEN:\n",
             "~explain here~.\n"))
  #SNIP_new <- SNIP_new %>%
  #  mutate( #Due stuff to prep column to be converted to Numeric
  #    RL = as.numeric(new)
  #  )
} else {
  cat("'RL' column converted to numeric format\n")
  SNIP_new$RL <- as.numeric(SNIP_new$RL)}
## 'RL' column converted to numeric format
# Check if Result, MDL, & RL Columns all equal <NA> or 0 - these rows have no useful information
nrow(SNIP_new) #Number rows before
## [1] 22992
#CODE BELOW REQUIRES USER TROUBLESHOOTING DEPENDING ON AVAILABLE COLUMNS AND SPREADSHEET SPECIFIC CONDITIONS#
SNIP_new <- SNIP_new %>%
  #Set 0 & negative values as blank
  mutate(Result = ifelse(Result <= 0, NA_real_, Result),
         MDL = ifelse(MDL <= 0, NA_real_, MDL),
         RL = ifelse(RL <= 0, NA_real_, RL))
na_results <- SNIP_new %>% #Record rows where Result, MDL, & RL all equal <NA>
  filter( is.na(Result) & is.na(MDL) & is.na(RL) )
#View(na_results)
SNIP_new <- anti_join(SNIP_new, na_results, by='SourceRow') #returns rows from SNIP_new not matching values in no_result
nrow(SNIP_new) #Number rows after
## [1] 22022
# Format Units Column - "ng/L", "mg/Kg"
unique(SNIP_new$Unit) #Identifies OLDNAMES
##  [1] "ng/L"        "ng/g"        "ppm"         "mg/Kg dw"    "µg/l"        "mg/kg dw"   
##  [7] "µg/kg dw"    "ng/l"        "mg/kg ww"    "mg/kg"       "ng/g dw"     "µg/g dw"    
## [13] "Percent"     "mg/Kg"       "µg/kg"       "mg/l"        "ng/g dry wt"
unique(paste(SNIP_new$MatrixName, SNIP_new$Unit, sep='; '))
##  [1] "Aqueous; ng/L"         "Sediment; ng/g"        "Sediment; ppm"        
##  [4] "Sediment; mg/Kg dw"    "Aqueous; µg/l"         "Sediment; mg/kg dw"   
##  [7] "Sediment; µg/kg dw"    "Aqueous; ng/l"         "Sediment; mg/kg ww"   
## [10] "Sediment; mg/kg"       "Sediment; ng/g dw"     "Sediment; µg/g dw"    
## [13] "Sediment; Percent"     "Sediment; mg/Kg"       "Sediment; µg/kg"      
## [16] "Aqueous; mg/l"         "Aqueous; ng/g dw"      "Sediment; ng/g dry wt"
## [19] "Aqueous; mg/kg ww"
SNIP_new <- SNIP_new %>%
  filter(Unit != 'Percent') %>% #filter our the 6 results with Percent unit; can not use these results to compare to other data
  standardizeUnits(pp="mass")
unique(paste(SNIP_new$MatrixName, SNIP_new$Unit, sep='; '))
## [1] "Aqueous; ng/L"          "Sediment; mg/Kg"        "Sediment; mg/Kg dw"    
## [4] "Sediment; mg/Kg ww"     "Aqueous; mg/Kg dw"      "Sediment; mg/Kg dry wt"
## [7] "Aqueous; mg/Kg ww"
# Format Date and Time Column #
# NEED TO TALK ABOUT HOW WE WANT TO DO THIS - To graph in R we need Date and Time in same column
# THE EXAMPLE CODE BELOW ASSUMES DATE AND TIME ARE IN SEPERATE COLUMNS

SNIP_new <- SNIP_new %>%
  mutate(SampleDate = recode(SampleDate,
                             "Sept-Oct 2002" ="9/15/2002",
                             "9/12/2007 - 9/13/2007" = "9/12/2007"
  )
  )%>%
  mutate(SampleComments = if_else(is.na(SampleDate), paste0('No SampleDate provided so AnalysisDate was used for SampleDate; ',SampleComments), SampleComments),
         SampleDate = if_else(is.na(SampleDate), AnalysisDate, SampleDate)
  ) %>%
  mutate(SampleDate = lubridate::parse_date_time(SampleDate, orders = c("ymd", "mdy", "mdyHMS")) #had to break this into a seperate mutate for it to work properly
  )
#length(SNIP_new$SourceRow[is.na(SNIP_new$SampleDate)]) #this needs to be 0


SNIP_new <- SNIP_new %>%
  mutate(SampleTime = if_else(SampleTime == '9999', NA_character_, SampleTime),
         SampleTime = lubridate::parse_date_time(SampleTime, orders = c("HM", "HMS"))
  )
## Warning: 2 failed to parse.
SNIP_new<- SNIP_new %>%
  mutate(
    #rowise() %>%    # rowise is very slow - so used sapply to make this a rowise operation
    #If SampleDate & CollectioTIme are not in Character format by defualt, turn it into a character class so it exports better
    SampleDate = ifelse(sapply(SampleDate, is.character), SampleDate, as.character(as.Date(SampleDate))),
    SampleTime = ifelse(sapply(SampleTime, is.character), SampleTime, format(SampleTime, "%H:%M:%S")),
    #COMBINE DATE AND TIME INTO SampleDateTime COLUMN
    SampleDateTime = ifelse(!is.na(SampleTime), paste(SampleDate, SampleTime), paste(SampleDate, '00:00:00')),
    #FORMAT SampleDateTime COLUMN TO DATE FORMAT
    SampleDateTime = lubridate::ymd_hms(SampleDateTime)
  )


### REMOVE TEMPORARY COLUMNS ###
SNIP_new <- SNIP_new %>%
  select(-one_of(temp_cols)) #Remove temp columns since they are no longer needed





### MERGE SNIP & R5AQ ###
R5AQ_SNIP <- rbind(R5AQ_new, SNIP_new)

### ADD TMDL & SUBAREA DESIGNATION ###
R5AQ_SNIP <- R5AQ_SNIP %>%
  # Fix CoordSystem nomenclature
  mutate(CoordSystem = recode(CoordSystem,
                              "NAD 83" = 'NAD83',
                              "NAD 1927" = 'NAD27',
                              "NR" = 'NAD83',
                              "nr" = 'NAD83',
                              "OTHER" = 'NAD83',
                              "UNKWN" = 'NAD83',
                              .missing = 'NAD83'),

         TargetLatitude = ifelse(grepl('[a-zA-Z]', TargetLatitude), NA_character_, TargetLatitude),
         TargetLatitude = gsub('[[:space:]]','',TargetLatitude), #need to remove spaces in text so NAs aren't introduced by coercion
         TargetLatitude = as.numeric(TargetLatitude),

         TargetLongitude = ifelse(grepl('[a-zA-Z]',TargetLongitude), NA_character_, TargetLongitude),
         TargetLongitude = gsub('[[:space:]]','',TargetLongitude), #need to remove spaces in text so NAs aren't introduced by coercion
         TargetLongitude = as.numeric(TargetLongitude),
         TargetLongitude = ifelse(TargetLongitude > 0, -TargetLongitude, TargetLongitude)) #fix longitudes that were incorrectly entered as a positive value
# LOAD WORKBOOK OF SITES THAT HAVE TMDL & DELTA SUBREGION DESIGNATIONS #
demarcatedSites <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/3b_AqSites_DemarcFromGIS.xlsx', sheet=1) %>%
  mutate(uniqName = paste(StationName, round(TargetLatitude,4), round(TargetLongitude,4))) %>% # needs to be the same code used in Script2 ScopeDaSites %>% distinct()
  select(uniqName, TMDL, Subarea)

# JOIN R5AQ_SNIP with TMDL & DeltaSubregion COLUMNS 
R5AQ_SNIP <- R5AQ_SNIP %>%
  mutate(uniqName = paste(StationName, round(TargetLatitude,4), round(TargetLongitude,4))) %>% # needs to be same code for uniqName above
  left_join(., demarcatedSites, by='uniqName') %>% # adds TMDL & Subarea columns
  select(-uniqName) %>%  #Put TMDL column in first position
  select(TMDL, Subarea, everything())
nrow(R5AQ_SNIP)
## [1] 27678
# Fix Subareas that GIS pulled into wrong Subarea using SpatialJoin and 100m extension (StationName identified by looking at GIS map)
R5AQ_SNIP <- R5AQ_SNIP %>%
  mutate(Subarea = ifelse(grepl("San Joaquin River near Burns Canal", StationName, ignore.case=T),
                          "San Joaquin River", Subarea))

R5AQ_SNIP <- R5AQ_SNIP %>%
  filter(TMDL == 'Delta',
         SampleDateTime > '1992-01-01',    #Date specific to Aq data (carried over from Script 5 - QA data) 
         !is.na(Result))               #Not sure how to handle NA results yet)



### TEST FOR HIDDEN REPEATS ###
hiddenRepeatTest <- R5AQ_SNIP %>%
  select(SourceID, SourceRow, Subarea, StationName, `Fraction Name`, SampleDate, Analyte, AnalytebyFormula, Unit, Result, MDL, RL, TargetLatitude, TargetLongitude, SampleDateTime, MatrixName, WBT, CitationCode, Project) %>%
  mutate(RepeatsByDateTime = paste(Subarea, Result, Unit, SampleDateTime)) %>%
  group_by(RepeatsByDateTime) %>%
  filter(n() > 1) %>%
  ungroup %>%
  mutate(hiddenRepeat1      = paste(Subarea, Analyte, Result, Unit, SampleDateTime)) %>%
  group_by(hiddenRepeat1) %>%
  filter(n() == 1) %>%
  ungroup %>%
  mutate(hiddenRepeat2      = paste(SourceID, Subarea, Analyte, Result, Unit, SampleDateTime)) %>%
  group_by(hiddenRepeat2) %>%
  filter(n() == 1) %>%
  group_by(RepeatsByDateTime) %>%
  mutate(n = n()) %>%
  arrange(RepeatsByDateTime) %>%   # arrange by Heirarchy column so SourceID we want to keep is on top. This allows it to be captured by distinct() and then removed from repeated data. 
  ungroup()
nrow(hiddenRepeatTest)
## [1] 1446
View(hiddenRepeatTest)
writexl::write_xlsx(hiddenRepeatTest, 'Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/R5AQ & SNIP - Hidden Repeat Test Results.xlsx')

## Evaluating the results shows that "Mercury" & "Mercury, Methyl" in SNIP should be labeled as "dissolved" not "total"

The R session information (including the OS info, R version and all packages used):

    sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8  LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lubridate_1.8.0    plotly_4.10.0      readxl_1.4.1       actuar_3.3-0      
##  [5] NADA_1.6-1.1       forcats_0.5.2      stringr_1.4.1      dplyr_1.0.9       
##  [9] purrr_0.3.4        readr_2.1.2        tidyr_1.2.0        tibble_3.1.8      
## [13] ggplot2_3.3.6      tidyverse_1.3.2    fitdistrplus_1.1-8 survival_3.4-0    
## [17] MASS_7.3-58.1     
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-45     assertthat_0.2.1    digest_0.6.29       utf8_1.2.2         
##  [5] R6_2.5.1            cellranger_1.1.0    backports_1.4.1     reprex_2.0.2       
##  [9] evaluate_0.16       highr_0.9           httr_1.4.4          pillar_1.8.1       
## [13] rlang_1.0.5         lazyeval_0.2.2      googlesheets4_1.0.1 rstudioapi_0.14    
## [17] data.table_1.14.2   Matrix_1.5-1        splines_4.2.2       googledrive_2.0.0  
## [21] htmlwidgets_1.5.4   munsell_0.5.0       broom_1.0.1         compiler_4.2.2     
## [25] modelr_0.1.9        xfun_0.32           pkgconfig_2.0.3     htmltools_0.5.3    
## [29] tidyselect_1.1.2    viridisLite_0.4.1   fansi_1.0.3         crayon_1.5.1       
## [33] tzdb_0.3.0          dbplyr_2.2.1        withr_2.5.0         grid_4.2.2         
## [37] jsonlite_1.8.0      gtable_0.3.1        lifecycle_1.0.1     DBI_1.1.3          
## [41] magrittr_2.0.3      scales_1.2.1        writexl_1.4.0       cli_3.3.0          
## [45] stringi_1.7.8       fs_1.5.2            xml2_1.3.3          ellipsis_0.3.2     
## [49] generics_0.1.3      vctrs_0.4.1         expint_0.1-7        tools_4.2.2        
## [53] glue_1.6.2          hms_1.1.2           fastmap_1.1.0       colorspace_2.0-3   
## [57] gargle_1.2.0        rvest_1.0.3         knitr_1.40          haven_2.5.1
    Sys.time()
## [1] "2024-01-04 13:54:17 PST"