This report is automatically generated with the R package knitr (version 1.40) .

source("R Functions/functions_QA data.R")


### LOAD DATA ###
DRMP <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/DRMP_2016_2019_Aq_Sed_Data.xlsx', sheet='Sheet1', guess_max = 30000)
nrow(DRMP) #number of rows should match the Excel file (minus the header row)
## [1] 3366
### LIST COLUMNS TO BE USED, ADD USER DEFINED COLUMNS, & RENAME COLUMNS TO CEDEN STANDARDS ###
#Use 1.READ ME.xlsx, 'ColumnsForR' to list & identify columns that match corresponding CEDEN Standard columns
keep_cols <- c('SourceID','SourceRow', 'ProjectCode', 'StationCode', 'SampleDate', 'CollectionTime', 'LabBatch', 'LabSampleID', 'MatrixName', 'MethodName', 'UnitName', 'Result', 'MDL', 'RL', 'ResQualCode',
               'SampleID', 'SampleComments', 'QACode', 'ComplianceCode', 'LabCollectionComments', 'SampleTypeCode')

### LabSampleID shows that some data are matrix spikes '-MS' or '-MSD' or duplicates '-dup'. Remove matrix spikes and make note of duplicate in SampleTypeCode column 

temp_cols <- c('AnalyteName', 'FractionName', 'CollectionDeviceName', 'CollectionMethodCode') #Include columns that do not match CEDEN standards but may be useful (e.g., Unit columns for MDL & RL)
#temp_cols are removed before the data is merged with other datasets

DRMP_new <- DRMP %>%
  select( c(keep_cols,temp_cols) ) %>% #DO NOT CHANGE - selects columns specified above
  rename(
    #Rename DRMP columns to CEDEN format here: CEDEN 'COLUMNNAME' = WORKSHEET 'COLUMNNAME'
    #DELTE COLUMN NAMES THAT DO NOT HAVE AN EQUIVALENT COLUMN IN THE DRMP
#CitationCode = , added with mutate below
#CoordSystem = , added with left_join(DRMP_locations)
    Project = ProjectCode,
#StationName = , added with left_join(DRMP_locations)
    SampleTime = CollectionTime,
#WBT = '', added with left_join(DRMP_locations)
#Analyte = , added with mutate below
    Unit = UnitName,
    ResultQualCode = ResQualCode,
    SampleID = SampleID,
    SampleComments = SampleComments,
#TargetLatitude = , added with left_join(DRMP_locations)
#TargetLongitude = , added with left_join(DRMP_locations)
#BatchVerification = , added with mutate below
#CollectionComments = , added with mutate below
    ResultsComments = LabCollectionComments
#BatchComments = '' added with mutate below
  ) %>%
  mutate(
    #Create Missing column or modify existing column here: CEDEN COLUMNNAME = 'SPECIFIED VALUE' or FUNCTION
    #DELTE COLUMN NAMES THAT DO NOT NEED TO BE CHANGED
    CitationCode = 'DRMP',
    Analyte = paste(AnalyteName, FractionName, sep=', '),
#CoordSystem = '', added with left_join(DRMP_locations)
#StationName = '', added with left_join(DRMP_locations)
#WBT = '', added with left_join(DRMP_locations)
#TargetLatitude = '', added with left_join(DRMP_locations)
#TargetLongitude = '', added with left_join(DRMP_locations)
    BatchVerification = NA_character_,
    CollectionComments = paste(CollectionDeviceName, CollectionMethodCode, sep=' ~ '),
    BatchComments = NA_character_
  )

nrow(DRMP_new)
## [1] 3366
#str(DRMP_new) #just to check data class of different columns - e.g., is Date column in POSIX format?
#View(DRMP_new)


#Add columns from "DRMP_locations.xlsx"
DRMP_locations <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/DRMP_locations.xlsx', sheet='Locations', guess_max = 30000)
DRMP_new <- left_join(DRMP_new, DRMP_locations, by='StationCode') %>%                   #Adds columns StationName, WBT, LocationCode, TargetLatitude, TargetLongitude, CoordSystem based on StationCode
  mutate(CollectionComments = paste(LocationCode, CollectionComments, sep=' ~ ')) %>%  #Add LocationCode to CollectionComments
  select(-LocationCode)                                                                #Remove LocationCode
nrow(DRMP_new)
## [1] 3366
#Filter Satation Codes
unique(DRMP_new$StationCode)
##  [1] "000NONPJ_A" "000NONPJ_B" "000NONPJ_C" "000NONPJ_D" "000NONPJ_E" "000NONPJ_F"
##  [7] "000NONPJ_G" "000NONPJ_H" "000NONPJ_I" "000NONPJ_J" "000NONPJ_K" "000NONPJ_L"
## [13] "000NONPJ_M" "000NONPJ_N" "000NONPJ_O" "510ADVLIM"  "510ST1317"  "541SJC501" 
## [19] "544LILPSL"  "544MDRBH4"  "LABQA"      "544ADVLM6"  "207SRD10A"  "544DMC020" 
## [25] "FIELDQA"    "000NONPJ"   "FieldQA"
dropStatioCodes <- c("LABQA", "FIELDQA", "FieldQA", grep("000NONPJ", unique(DRMP_new$StationCode), value=T)) #Do not want these StationCodes
DRMP_new <- DRMP_new %>%
  filter(StationCode %are not% dropStatioCodes)
unique(DRMP_new$StationCode)
## [1] "510ADVLIM" "510ST1317" "541SJC501" "544LILPSL" "544MDRBH4" "544ADVLM6" "207SRD10A"
## [8] "544DMC020"
nrow(DRMP_new)
## [1] 2499
### FORMAT COLUMN PARAMETERS ###
  # Standardize MatrixName Groups - "Aqueous", "Sediment", "Soil" #
unique(DRMP_new$MatrixName) #Identifies OLDNAMES
## [1] "samplewater" "blankwater"  "Samplewater" "Sediment"
#STANDARD CODE TO CHANGE GROUPING NAMES
DRMP_new <- DRMP_new %>%
  filter(MatrixName != 'blankwater') %>%
  mutate(MatrixName = recode(MatrixName, #COLUMN NAME WHERE CHANGES WILL BE MADE
                      #"OLDNAME" = "NEWNAME" - create a new line for each OLDNAME that needs to be changed
                      "Samplewater" = "Aqueous",
                      "samplewater" = "Aqueous"
                      )
         )
unique(DRMP_new$MatrixName) #Lists new naming structure
## [1] "Aqueous"  "Sediment"
nrow(DRMP_new)
## [1] 2471
  # Standardize WBT (WaterBodyType) Groups - "River/Stream", "Drain/Canal", "Wetland", "Spring", "Slough", 
  #                                          "Pond",  "Lake/Reservoir", "Delta", "Forebay/Afterbay", "Not Recorded" #
unique(DRMP_new$WBT) #Identifies OLDNAMES
## [1] "River/Stream" "X2"
#Look for inconsistent group pairings between MatrixName and WBT (e.g., 'Soil; Stream')
unique(paste(DRMP_new$MatrixName, DRMP_new$WBT, sep='; '))
## [1] "Aqueous; River/Stream"  "Aqueous; X2"            "Sediment; River/Stream"
#WBT & MatrixName look good


  # Standardize Analyte Groups - "Mercury, Total", "Mercury, Dissolved", "Mercury, Suspended", & same for Methylmercury #
unique(DRMP_new$Analyte) #IF ANALYTE IS IN 2 COLUMNS e.g., (Hg & MeHg); (Total, Dissolved, Suspended) SEE WQP SCRIPT FOR EXAMPLE
##  [1] "Dissolved Organic Carbon, Dissolved" "Mercury, Methyl, Dissolved"         
##  [3] "Mercury, Methyl, Total"              "Chlorophyll a, Particulate"         
##  [5] "Mercury, Dissolved"                  "Mercury, Total"                     
##  [7] "Total Suspended Solids, Total"       "Total Suspended Solids, Volatile"   
##  [9] "Clay, <0.0039 mm"                    "Mercury, methyl, Dissolved"         
## [11] "Mercury, methyl, Total"              "Moisture, Total"                    
## [13] "Sand, 0.0625 to <2.0 mm"             "Silt, 0.0039 to <0.0625 mm"         
## [15] "Total Organic Carbon, Total"         "Total Suspended Solids, Particulate"
DRMP_new <- DRMP_new %>%
  mutate(Analyte = recode(Analyte,
                          "Mercury, methyl, Total" = "Methylmercury, Total",
                          "Mercury, Methyl, Total" = "Methylmercury, Total",
                          "Mercury, methyl, Dissolved" = "Methylmercury, Dissolved",
                          "Mercury, Methyl, Dissolved" = "Methylmercury, Dissolved"
                              )
         ) %>%
  filter(grepl('mercury', Analyte, ignore.case=T))
nrow(DRMP_new)
## [1] 1244
#Create 'Analyte' column from Analyte & Analyte_part2 columns - then delete Analyte_part2 column##
unique(DRMP_new$Analyte) #New naming structure for Analyte Groupings
## [1] "Methylmercury, Dissolved" "Methylmercury, Total"     "Mercury, Dissolved"      
## [4] "Mercury, Total"
  # Standardize ResultQualCode Groups - "ND", "DNQ", NA#
unique(DRMP_new$ResultQualCode) #Identifies OLDNAMES
## [1] "="   "DNQ" "ND"
#ResultQualCodes are good


  # Format Result Column to Numeric#  
  # Check column for text - based on text user needs to decide what to do
if(!is.numeric(DRMP_new$Result)){
  if(all(is.na(DRMP_new$Result))){
    DRMP_new <- DRMP_new %>%
      mutate( #Column is all blanks and will be converted to Numeric
        RL = as.numeric(new)
      )
    cat("'Result' column is all blanks and was converted to numeric format\n")
  }else{
    old <-DRMP_new$Result
    new <-DRMP_new$Result
    new[grepl('[a-df-zA-DF-Z]', new)] <- NA #skip 'e' for exponential notation e.g., "8e-005"
    #Print what text was found and what is being done
    cat(paste0("'Result' column should be numeric but some cells contain ", grammaticList(setdiff(old, new)),
               ".\nACTIONS TAKEN:\n",
               "~explain here~.\n"))
    #DRMP_new <- DRMP_new %>%
    #  mutate( #Do stuff to prep column to be converted to Numeric
    #    Result = as.numeric(new)
    #    )
  }
}else{
  cat("'Result' column is in numeric format\n")}
## 'Result' column is in numeric format
  # Format MDL Column to Numeric#
  # Check column for text - based on text user needs to decide what to do
if(!is.numeric(DRMP_new$MDL)){
  if(all(is.na(DRMP_new$MDL))){
    DRMP_new <- DRMP_new %>%
      mutate( #Column is all blanks and will be converted to Numeric
        RL = as.numeric(new)
      )
    cat("'MDL' column is all blanks and was converted to numeric format\n")
  }else{
    old <-DRMP_new$MDL
    new <-DRMP_new$MDL
    new[grepl('[a-df-zA-DF-Z]', new)] <- NA #skip 'e' for exponential notation e.g., "8e-005"
    #Print what text was found and what is being done
    cat(paste0("'MDL' column should be numeric but some cells contain ", grammaticList(setdiff(old, new)),
               ".\nACTIONS TAKEN:\n",
               "~explain here~.\n"))
    #DRMP_new <- DRMP_new %>%
    #  mutate( #Do stuff to prep column to be converted to Numeric
    #    MDL = as.numeric(new)
    #    )
  }
}else{
  cat("'MDL' column is in numeric format\n")}
## 'MDL' column is in numeric format
  # Format RL Column to Numeric#  
  # Check column for text - based on text user needs to decide what to do
if(!is.numeric(DRMP_new$RL)){
  if(all(is.na(DRMP_new$RL))){
    DRMP_new <- DRMP_new %>%
      mutate( #Column is all blanks and will be converted to Numeric
        RL = as.numeric(new)
      )
    cat("'RL' column is all blanks and was converted to numeric format\n")
  }else{
    old <-DRMP_new$RL
    new <-DRMP_new$RL
    new[grepl('[a-df-zA-DF-Z]', new)] <- NA #skip 'e' for exponential notation e.g., "8e-005"
    #Print what text was found and what is being done
    cat(paste0("'RL' column should be numeric but some cells contain ", grammaticList(setdiff(old, new)),
               ".\nACTIONS TAKEN:\n",
               "~explain here~.\n"))
    #DRMP_new <- DRMP_new %>%
    #  mutate( #Do stuff to prep column to be converted to Numeric
    #    RL = as.numeric(new)
    #    )
  }
}else{
  cat("'RL' column is in numeric format\n")}
## 'RL' column is in numeric format
  # Check if Result, MDL, & RL Columns all equal <NA> or 0 - these rows have no useful information
nrow(DRMP_new) #Number rows before
## [1] 1244
#CODE BELOW REQUIRES USER TROUBLESHOOTING DEPENDING ON AVAILABLE COLUMNS AND SPREADSHEET SPECIFIC CONDITIONS #
DRMP_new <- DRMP_new %>%
  #Set 0 & negative values as blank
  mutate(Result = ifelse(Result <= 0, NA_real_, Result),
         MDL = ifelse(MDL <= 0, NA_real_, MDL),
         RL = ifelse(RL <= 0, NA_real_, RL))
na_results <- DRMP_new %>% #Record rows where Result, MDL, & RL all equal <NA>
  filter( is.na(Result) & is.na(MDL) & is.na(RL) )
nrow(na_results)
## [1] 0
DRMP_new <- anti_join(DRMP_new, na_results, by='SourceRow') #returns rows from DRMP_new not matching values in no_result
nrow(DRMP_new) #Number rows after
## [1] 1244
# LabSampleID shows that some data are matrix spikes '-MS' or '-MSD' or duplicates '-dup', either remove dups or average with sample
DRMP_new$LabSampleID[grep('[A-z]',DRMP_new$LabSampleID)]   #show which LabSampleIDs have alphabetic characters
##   [1] "2016-2197-ms"  "2016-2197-msd" "2016-2758-ms"  "2016-2758-msd" "2017-0458-msd"
##   [6] "2017-0458-ms"  "2017-1310-dup" "2016-2204-ms"  "2016-2204-msd" "2017-0472-msd"
##  [11] "2017-0472-ms"  "2016-2189-msd" "2016-2189-ms"  "2016-2757-msd" "2016-2757-ms" 
##  [16] "2017-0457-msd" "2017-0457-ms"  "2016-2203-msd" "2016-2203-ms"  "2017-0469-dup"
##  [21] "2017-0471-ms"  "2017-0471-msd" "2017-1313-ms"  "2017-1313-msd" "2016-2201-dup"
##  [26] "2016-2191-dup" "2016-2759-dup" "2017-0459-dup" "2017-1311-ms"  "2017-1311-msd"
##  [31] "2017-2759-ms"  "2017-2759-msd" "2017-2758-ms"  "2017-2758-msd" "2018-0194-ms" 
##  [36] "2018-0194-msd" "2018-1106-ms"  "2018-1106-msd" "2018-0193-dup" "2018-0280-ms" 
##  [41] "2018-0280-msd" "2018-0279-ms"  "2018-0279-msd" "2018-0277-dup" "2018-0430-ms" 
##  [46] "2018-0430-msd" "2018-0428-dup" "2018-0437-ms"  "2018-0437-msd" "2018-0525-ms" 
##  [51] "2018-0525-msd" "2018-0526-ms"  "2018-0526-msd" "2018-0523-dup" "2018-0738-ms" 
##  [56] "2018-0738-msd" "2018-0739-ms"  "2018-0739-msd" "2018-0736-dup" "2018-1096-ms" 
##  [61] "2018-1096-msd" "2018-1097-ms"  "2018-1097-msd" "2018-1098-ms"  "2018-1098-msd"
##  [66] "2018-1094-dup" "2017-2798-MS"  "2017-2798-MSD" "2018-0125-dup" "2018-0581-MS" 
##  [71] "2018-0581-MSD" "2018-0582-dup" "2018-0587-MS"  "2018-0587-MSD" "2018-1143-dup"
##  [76] "2018-1155-MS"  "2018-1155-MSD" "2018-1086-ms"  "2018-1086-msd" "2018-1087-ms" 
##  [81] "2018-1087-msd" "2017-2735-ms"  "2017-2735-msd" "2017-2734-dup" "2017-2737-ms" 
##  [86] "2017-2737-msd" "2018-0166-dup" "2018-0172-ms"  "2018-0172-msd" "2018-0173-ms" 
##  [91] "2018-0173-msd" "2018-0260-ms"  "2018-0260-msd" "2018-0261-dup" "2018-0259-ms" 
##  [96] "2018-0259-msd" "2018-0413-ms"  "2018-0413-msd" "2018-0412-dup" "2018-0415-ms" 
## [101] "2018-0415-msd" "2018-0507-ms"  "2018-0507-msd" "2018-0505-dup" "2018-0509-ms" 
## [106] "2018-0509-msd" "2018-0721-ms"  "2018-0721-msd" "2018-0718-dup" "2018-0719-ms" 
## [111] "2018-0719-msd" "2018-1076-ms"  "2018-1076-msd" "2018-1077-ms"  "2018-1077-msd"
## [116] "2018-1079-dup" "2018-1074-ms"  "2018-1074-msd" "2018-1078-ms"  "2018-1078-msd"
## [121] "2017-2800-ms"  "2017-2800-msd" "2017-2788-dup" "2018-0126-ms"  "2018-0126-msd"
## [126] "2018-0125-dup" "2018-0576-ms"  "2018-0576-msd" "2018-0575-dup" "2018-1143-dup"
## [131] "2018-1155-ms"  "2018-1155-msd" "2018-1283-ms"  "2018-1284-ms"  "2018-1281-dup"
## [136] "2018-1284-msd" "2018-1283-msd" "2018-1833-msd" "2018-1833-ms"  "2018-2092-dup"
## [141] "2019-0092-dup" "2019-0093-ms"  "2019-0094-ms"  "2019-0093-msd" "2019-0094-msd"
## [146] "2019-0211-dup" "2019-0213-msd" "2019-0213-ms"  "2019-0212-ms"  "2019-0212-msd"
## [151] "2019-0432-msd" "2019-0431-ms"  "2019-0432-ms"  "2019-0431-msd" "2019-0634-dup"
## [156] "2019-0635-ms"  "2019-0635-msd" "2019-1088-dup" "2018-1274-msd" "2018-1271-dup"
## [161] "2018-1273-ms"  "2018-1273-msd" "2018-1274-ms"  "2018-1547-msd" "2018-1548-ms" 
## [166] "2018-1545-dup" "2018-1548-msd" "2018-1547-ms"  "2018-1819-ms"  "2018-1818-ms" 
## [171] "2018-1818-msd" "2018-1819-msd" "2018-1816-dup" "2018-1822-dup" "2018-2076-ms" 
## [176] "2018-2076-msd" "2018-2074-ms"  "2018-2074-msd" "2019-0417-msd" "2019-0416-msd"
## [181] "2019-0414-dup" "2019-0416-ms"  "2019-0417-ms"  "2019-0823-msd" "2019-0823-ms" 
## [186] "2019-0822-ms"  "2019-0822-msd" "2019-0819-dup" "2019-1076-msd" "2019-1076-ms" 
## [191] "2019-1080-msd" "2019-1080-ms"  "2018-1270-dup" "2019-0409-dup" "2019-0611-msd"
## [196] "2019-0611-ms"  "2019-0616-msd" "2019-0616-ms"  "2018-1258-msd" "2018-1258-ms" 
## [201] "2018-1255-dup" "2018-1253-ms"  "2018-1256-ms"  "2018-1254-ms"  "2018-1253-msd"
## [206] "2018-1256-msd" "2018-1254-msd" "2018-1260-ms"  "2018-1259-msd" "2018-1260-msd"
## [211] "2018-1257-dup" "2018-1259-ms"  "2018-1525-msd" "2018-1525-ms"  "2018-1532-msd"
## [216] "2018-1532-ms"  "2018-1799-msd" "2018-1798-ms"  "2018-1798-msd" "2018-1800-dup"
## [221] "2018-1799-ms"  "2018-2056-msd" "2018-2056-ms"  "2018-2060-dup" "2018-2059-ms" 
## [226] "2018-2059-msd" "2019-0059-ms"  "2019-0059-msd" "2019-0060-ms"  "2019-0060-msd"
## [231] "2019-0061-dup" "2019-0185-msd" "2019-0182-ms"  "2019-0182-msd" "2019-0183-dup"
## [236] "2019-0185-ms"  "2019-0403-dup" "2019-0401-ms"  "2019-0401-msd" "2019-0402-ms" 
## [241] "2019-0402-msd" "2019-0600-ms"  "2019-0601-dup" "2019-0600-msd" "2019-0607-ms" 
## [246] "2019-0607-msd" "2019-0803-ms"  "2019-0804-ms"  "2019-0801-dup" "2019-0804-msd"
## [251] "2019-0803-msd" "2019-1055-ms"  "2019-1055-msd" "2019-1056-ms"  "2019-1057-dup"
## [256] "2019-1056-msd" "2019-1267-dup" "2019-1269-ms"  "2019-1269-msd" "2019-1270-ms" 
## [261] "2019-1270-msd" "2019-1251-ms"  "2019-1251-msd" "2019-1252-ms"  "2019-1252-msd"
## [266] "2019-1253-dup" "2019-1448-dup" "2019-1449-ms"  "2019-1449-msd" "2019-1450-ms" 
## [271] "2019-1450-msd" "2019-1442-ms"  "2019-1442-msd" "2019-1443-ms"  "2019-1443-msd"
## [276] "2019-1444-dup" "2019-1779-dup" "2019-1782-ms"  "2019-1782-msd" "2019-1783-ms" 
## [281] "2019-1783-msd" "2019-1801-dup" "2019-1807-ms"  "2019-1807-msd" "2019-1808-ms" 
## [286] "2019-1808-msd" "2019-2018-ms"  "2019-2018-msd" "2019-2019-ms"  "2019-2019-msd"
## [291] "2019-2020-dup" "2019-2026-dup" "2019-2029-ms"  "2019-2029-msd" "2019-2030-ms" 
## [296] "2019-2030-msd"
unique(sub('[^A-z]*', '', DRMP_new$LabSampleID, perl=T))   #show unique alphabetic tags
## [1] ""    "ms"  "msd" "dup" "MS"  "MSD"
DRMP_new <- DRMP_new %>%
  filter(!grepl('ms', LabSampleID, ignore.case=T))         #remove data marked as matrix spike 'ms', 'MS', 'msd', or 'MSD'

unique(sub('[^A-z]*', '', DRMP_new$LabSampleID, perl=T))   #only dups are left
## [1] ""    "dup"
nrow(DRMP_new)
## [1] 1006
#Add Field Rep note to LabSampleID & SampleTypeCode
DRMP_new <- DRMP_new %>%
  mutate(tempLabSampleID = sub('-[A-z]*$', '', DRMP_new$LabSampleID, perl=T)) %>%                                                # remove '-dup' and use temp column for grouping same IDs
  arrange(tempLabSampleID, AnalyteName) %>%                                                                                          # arrange so dup sample is always second in list
  group_by(tempLabSampleID, AnalyteName) %>%
  mutate(n = n(),
        #LabSampleID    = ifelse(n>1, paste(tempLabSampleID, 'Field Rep', sep='-'), tempLabSampleID),                            # adds 'Field Rep' to both duplicates in LabSampleID Column
         LabSampleID = ifelse(n>1, paste(tempLabSampleID, paste0('Field Rep', row_number()), sep='-'), tempLabSampleID),         # use this code to add replicate numb e.g., Field Rep1, Field Rep2
        #SampleTypeCode = ifelse(n>1, paste(SampleTypeCode, 'Field Rep', sep=' ~ '), SampleTypeCode)
         SampleTypeCode = ifelse(n>1, paste(SampleTypeCode,  paste0('Field Rep', row_number()), sep=' ~ '), SampleTypeCode)) %>% # adds 'Field Rep' to both duplicates in SampleTypeCode Column
  ungroup %>%
  select(-tempLabSampleID, -n)                                                                                                   # remove temp col used for grouping & 'n' col used to identify duplicates
nrow(DRMP_new)
## [1] 1006
#OPT #1 - AVERAGE SAMPLE & DUPLICATE
temp1 <- DRMP_new %>%
  mutate(LabSampleID = ifelse(grepl('Field Rep[0-9]?', LabSampleID), sub('Field Rep[0-9]?', 'Field Rep Avg', LabSampleID), LabSampleID),
         SampleTypeCode = ifelse(grepl('Field Rep[0-9]?', SampleTypeCode), sub('Field Rep[0-9]?', 'Field Rep Avg', SampleTypeCode), SampleTypeCode)) %>%
  group_by(LabSampleID, AnalyteName) %>%
  mutate(Result = mean(Result)) %>%
  distinct(LabSampleID, AnalyteName, .keep_all=TRUE)
nrow(temp1)
## [1] 948
#OPT #2 - REMOVE DUPLICATES
temp2 <- DRMP_new %>%
  mutate(tempLabSampleID = sub('Rep[0-9]?', 'Rep', LabSampleID)) %>%  # Remove digit so LabSampleIDs are the same for the same Analyte 
  distinct(tempLabSampleID, AnalyteName, .keep_all=TRUE) %>%
  select(-tempLabSampleID)
nrow(temp2)
## [1] 948
  # Format Units Column - "ng/L", "mg/Kg"
unique(DRMP_new$Unit) #Identifies OLDNAMES
## [1] "ng/L"     "mg/Kg dw" "ug/Kg dw"
# If more than 1 unit colmn exists (e.g., for RL and MDL columns) see WQP script for example on merging into 1 column
DRMP_new <- DRMP_new %>%
  standardizeUnits
unique(DRMP_new$Unit) #New naming structure for Unit groupings
## [1] "ng/L"     "mg/Kg dw"
  # Format Date and Time Column #
# NEED TO TALK ABOUT HOW WE WANT TO DO THIS - To graph in R we need Date and Time in same column
# THE EXAMPLE CODE BELOW ASSUMES DATE AND TIME ARE IN SEPERATE COLUMNS
DRMP_new <- DRMP_new %>%
  #rowise() %>%    # rowise is very slow - so used sapply to make this a rowise operation
  mutate(
    #If SampleDate & CollectioTIme are not in Character format by defualt, turn it into a character class so it exports better
    SampleDate = ifelse(sapply(SampleDate, is.character), SampleDate, as.character(as.Date(SampleDate))),
    SampleTime = ifelse(sapply(SampleTime, is.character), SampleTime, format(lubridate::ymd_hms(SampleTime), "%H:%M:%S")),
    #COMBINE DATE AND TIME INTO SampleDateTime COLUMN
    SampleDateTime = ifelse(!is.na(SampleTime), paste(SampleDate, SampleTime), paste(SampleDate, '00:00:00')),
    #FORMAT SampleDateTime COLUMN TO DATE FORMAT
    SampleDateTime = lubridate::ymd_hms(SampleDateTime)
  )


### REMOVE TEMPORARY COLUMNS ###
DRMP_new <- DRMP_new %>%
  select(-one_of(temp_cols)) #Remove temp columns since they are no longer needed
#View(DRMP_new)

## SAVE FORMATTED DATA AS EXCEL FILE ##
writexl::write_xlsx(DRMP_new, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/DRMP_ceden_format.xlsx')

# In excel, to convert SampleDate column to Date format
# 1 - Select the date column.
# 2 - Go to the Data-tab and choose "Text to Columns".
# 3 - On the first screen, leave radio button on "delimited" and click Next.
# 4 - Unselect any delimiter boxes (everything blank) and click Next.
# 5 - Under column data format choose Date, select YMD
# 6 - Click Finish.

The R session information (including the OS info, R version and all packages used):

    sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8  LC_CTYPE=C                            
## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## system code page: 65001
## 
## attached base packages:
## [1] grid      stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] mgcv_1.8-41        nlme_3.1-160       lubridate_1.8.0    plotly_4.10.0     
##  [5] readxl_1.4.1       actuar_3.3-0       NADA_1.6-1.1       forcats_0.5.2     
##  [9] stringr_1.4.1      dplyr_1.0.9        purrr_0.3.4        readr_2.1.2       
## [13] tidyr_1.2.0        tibble_3.1.8       ggplot2_3.3.6      tidyverse_1.3.2   
## [17] fitdistrplus_1.1-8 survival_3.4-0     MASS_7.3-58.1     
## 
## loaded via a namespace (and not attached):
##  [1] httr_1.4.4          jsonlite_1.8.0      viridisLite_0.4.1   splines_4.2.2      
##  [5] modelr_0.1.9        assertthat_0.2.1    highr_0.9           googlesheets4_1.0.1
##  [9] cellranger_1.1.0    yaml_2.3.5          pillar_1.8.1        backports_1.4.1    
## [13] lattice_0.20-45     glue_1.6.2          digest_0.6.29       rvest_1.0.3        
## [17] colorspace_2.0-3    htmltools_0.5.3     Matrix_1.5-1        pkgconfig_2.0.3    
## [21] broom_1.0.1         haven_2.5.1         webshot_0.5.3       scales_1.2.1       
## [25] tzdb_0.3.0          googledrive_2.0.0   generics_0.1.3      ellipsis_0.3.2     
## [29] withr_2.5.0         lazyeval_0.2.2      cli_3.3.0           magrittr_2.0.3     
## [33] crayon_1.5.1        evaluate_0.16       fs_1.5.2            fansi_1.0.3        
## [37] xml2_1.3.3          tools_4.2.2         data.table_1.14.2   hms_1.1.2          
## [41] expint_0.1-7        gargle_1.2.0        lifecycle_1.0.1     munsell_0.5.0      
## [45] reprex_2.0.2        writexl_1.4.0       compiler_4.2.2      rlang_1.0.5        
## [49] rstudioapi_0.14     htmlwidgets_1.5.4   crosstalk_1.2.0     rmarkdown_2.16     
## [53] gtable_0.3.1        DBI_1.1.3           R6_2.5.1            knitr_1.40         
## [57] fastmap_1.1.0       utf8_1.2.2          stringi_1.7.8       vctrs_0.4.1        
## [61] dbplyr_2.2.1        tidyselect_1.1.2    xfun_0.32
    Sys.time()
## [1] "2023-12-29 14:45:32 PST"