This report is automatically generated with the R package knitr (version 1.40) .

source("R Functions/functions_QA data.R")


#use activitytypecode to filter out QA - 1 field replicate
#there were a few samples with 'Sample-Composite Without Parents' in the ActivityTypeCode' column; 
      #I believe these are all in Lake Natmoas and therefore not in our official scope but I included them in this scope b/c of how the GIS layer is currently (all the way up to folsom)
#since there was no info on composite data for any samples, I put '1' in the composite number amount but these should be filtered out anyway
#(we didn't put NA_Character b/c we needed a number for weighted averaging. we put 1 to be safe)
#there was no 'mean' or 'MPN' in 'StatisticalBaseCode' column for WQp fish


### LOAD DATA ###
WQP_FISH <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/WQP.xlsx', sheet='WQP FISH filtered 021519', guess_max = 30000)


WQP_FISH_STATION <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/WQP.xlsx', sheet='station', guess_max = 30000)%>%
  select(MonitoringLocationIdentifier,MonitoringLocationName, HorizontalCoordinateReferenceSystemDatumName, LatitudeMeasure, LongitudeMeasure,
         MonitoringLocationTypeName)%>%
  rename(StationName = 'MonitoringLocationName')%>%
  rename(CoordSystem = 'HorizontalCoordinateReferenceSystemDatumName')%>%
  rename(TargetLatitude = 'LatitudeMeasure')%>%
  rename(TargetLongitude = 'LongitudeMeasure')%>%
  rename(StationCode = 'MonitoringLocationIdentifier')%>%
  rename(WBT = 'MonitoringLocationTypeName')


WQP_FISH_PROJECT <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/WQP.xlsx', sheet='project', guess_max = 30000)%>%
  select(ProjectIdentifier,ProjectName)%>%
  rename(ProjectCode = ProjectIdentifier)

WQP_FISH_RES_DETECT <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/WQP.xlsx', sheet='res-detect-qnt-limit', guess_max = 30000)%>%
  filter(DetectionQuantitationLimitTypeName == 'Method Detection Level' & `DetectionQuantitationLimitMeasure/MeasureUnitCode`=='ng/g')%>%
  select(ResultIdentifier, `DetectionQuantitationLimitMeasure/MeasureValue`)%>%
  rename(MDL = `DetectionQuantitationLimitMeasure/MeasureValue`,
         SampleID = ResultIdentifier)

WQP_FISH_COMMON_NAME <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/WQP.xlsx', sheet='TissueSpecies', guess_max = 30000)%>%
  select(SubjectTaxonomicName, CommonName)%>%
  rename(TaxonomicName = SubjectTaxonomicName)

nrow(WQP_FISH) #number of rows
## [1] 132
### SELECT USED COLUMNS, ADD USER DEFINED COLUMNS, & RENAME COLUMNS TO CEDEN STANDARDS ###
keep_cols <- c('SourceID','SourceRow','ActivityStartDate','CharacteristicName',
               'ResultMeasureValue', 'DetectionQuantitationLimitMeasure/MeasureValue',
               'SampleTissueAnatomyName', 'SubjectTaxonomicName', 'ActivityStartTime/Time',
               'MonitoringLocationIdentifier','ResultIdentifier','ProjectIdentifier'
)

temp_cols <- c('ActivityTypeCode',
               'ActivityMediaName','ResultStatusIdentifier','ResultWeightBasisText',
               'DetectionQuantitationLimitMeasure/MeasureUnitCode',
               'DetectionQuantitationLimitTypeName', 'ResultAnalyticalMethod/MethodIdentifier',
               'ActivityCommentText','HydrologicCondition','HydrologicEvent','ResultMeasure/MeasureUnitCode',
               'ResultAnalyticalMethod/MethodName','MethodDescriptionText'
)


WQP_FISH_new <- WQP_FISH %>%
  select( c(keep_cols,temp_cols) ) %>% #DO NOT CHANGE - selects columns specified above
  rename(
    #Rename worksheet columns to CEDEN format here: CEDEN 'COLUMNNAME' = WORKSHEET 'COLUMNNAME'
    #DELETE COLUMN NAMES THAT DO NOT HAVE AN EQUIVALENT COLUMN IN THE WORKSHEET

  'StationCode' = 'MonitoringLocationIdentifier',
  'SampleDate' = 'ActivityStartDate',
  'Result' = 'ResultMeasureValue',
  'SampleID' = 'ResultIdentifier',
  'ProjectCode' = 'ProjectIdentifier',
  'TissueName' = 'SampleTissueAnatomyName',
  'RL' = 'DetectionQuantitationLimitMeasure/MeasureValue',
  TaxonomicName = SubjectTaxonomicName,
  'SampleTime' = 'ActivityStartTime/Time',
  'Analyte' = 'CharacteristicName' #total added later during standardizing
  ) %>%

   mutate(
    #Create Missing column or modify existing column here: CEDEN COLUMNNAME = 'SPECIFIED VALUE' or FUNCTION
    #DELTE COLUMN NAMES THAT DO NOT NEED TO BE CHANGED
    CitationCode = 'WQP_FISH_2018',
    CompositeRowID = NA_character_,
    CompositeID = NA_character_,
    `WeightAvg g` = NA_real_,
    `TLMin mm` = NA_real_,
    `TLMax mm` = NA_real_,
    `TLAvgLength mm` = NA_real_,
    ProgramName = NA_character_,
    ParentProjectName = NA_character_,
    QACode = NA_character_,
    BatchVerification = NA_character_,
    ComplianceCode = NA_character_,
    LabSubmissionCode = NA_character_,
    Method = paste(`ResultAnalyticalMethod/MethodIdentifier`, `ResultAnalyticalMethod/MethodName`, MethodDescriptionText, sep='; '), #can fix NA if necessary
      Method = gsub('NA', '', Method),     # fix issues of pasting instances with NAs
      Method = gsub('^(; )+|(; )+$', '', Method),

    ResultComments = paste(paste0('HydrologicalEvent: ', HydrologicEvent),
                    paste0('HydrologicCondition: ', HydrologicCondition),
                    paste0('ActivityCommentText: ', ActivityCommentText),
                    paste0('ActivityTypeCode: ', ActivityTypeCode),
                    paste0('ResultStatusIdentifier: ', ResultStatusIdentifier), sep=' ~ '), #can fix NA if necessary
    Unit = paste(`ResultMeasure/MeasureUnitCode`, ifelse(grepl('Dry', ResultWeightBasisText, ignore.case=TRUE),'dw','ww'), sep=' '),
    NumberFishPerComp = 1  #1 bc need number for weight of averaging
  )


nrow(WQP_FISH_new)
## [1] 132
#str(WQP_FISH_new) #just to check data class of different columns - e.g., is Date column in POSIX format?
#View(WQP_FISH_new)

### MATCH COLUMN PARAMETERS from other tabs in spreadsheet, these add columns###

#MDL
WQP_FISH_new <- WQP_FISH_new %>%
  left_join(., WQP_FISH_RES_DETECT, by = 'SampleID') # adds MDL column

#project name
WQP_FISH_new <- WQP_FISH_new %>%
  left_join(., WQP_FISH_PROJECT, by = 'ProjectCode')

#station name, latitude, longitude, WBT, coordinate system
WQP_FISH_new <- WQP_FISH_new %>%
  left_join(., WQP_FISH_STATION, by = 'StationCode')

#common name from taxonomic name
WQP_FISH_new <- WQP_FISH_new %>%
  left_join(., WQP_FISH_COMMON_NAME, by = 'TaxonomicName')

### filter locations in scope, biologic tissue and tissue in ActivityMediaName column, put dry or wet weight with unit)
### FORMAT COLUMN PARAMETERS ###

  # Standardize WBT (WaterBodyType) Groups - "River/Stream", "Estuary", Drain/Canal", "Wetland", "Spring", "Slough", 
  #                                          "Pond",  "Lake/Reservoir", "Delta", "Forebay/Afterbay", "Not Recorded" #
unique(WQP_FISH_new$WBT) #Identifies OLDNAMES
## [1] "Stream"       "River/Stream"
#STANDARD CODE TO CHANGE GROUPING NAMES
WQP_FISH_new <- WQP_FISH_new %>%
  mutate(WBT = recode(WBT, "Stream" = "River/Stream"))
unique(WQP_FISH_new$WBT) #New naming structure for WBT Groupings
## [1] "River/Stream"
  # Standardize TissueName Groups - "Fillet" or "Whole Body" #
unique(WQP_FISH_new$TissueName)
## [1] "Fillet"                       "Organism, whole"             
## [3] "Fillet dorsal piece"          "Fish Fillet, Homog., Skin On"
WQP_FISH_new <- WQP_FISH_new %>%
  mutate(TissueName = recode(TissueName,
                             #"Fish Fillet, Homog., Skin On" = "Fillet", not allowable per LS 
                             #"Fillet dorsal piece" = "Fillet",not allowable per LS 
                             "Organism, whole" = "Whole Body"

                             )
         ) %>%
  filter(TissueName %in% c('Fillet','Whole Body'))
unique(WQP_FISH_new$TissueName)
## [1] "Fillet"     "Whole Body"
  # Standardize Analyte Groups - "Mercury, Total" (we consider Total Mercury and Methylmercury to be approx equal) #
unique(WQP_FISH_new$Analyte)
## [1] "Mercury"           "Methylmercury(1+)"
WQP_FISH_new <- WQP_FISH_new %>%
  mutate( Analyte = recode(Analyte,
                           "Mercury" = "Mercury, Total",
                           "Methylmercury(1+)" = "Mercury, Total"
                           ) )
unique(WQP_FISH_new$Analyte)
## [1] "Mercury, Total"
  # Format Result Column to Numeric#
  # Check column for text - based on text user needs to decide what to do
if(!is.numeric(WQP_FISH_new$Result) & any(grepl('<|[a-df-zA-DF-Z]', WQP_FISH_new$Result))){
  old <-WQP_FISH_new$Result
  new <-WQP_FISH_new$Result
  new[grepl('<|[a-df-zA-DF-Z]', new)] <- NA #skip 'e' for exponential notation e.g., "8e-005"
  #Print what text was found and what is being done
  cat(paste0("'Result' column should be numeric but some cells contain ", grammaticList(setdiff(old, new)),
             ".\nACTIONS TAKEN:\n",
             "~explain here~.\n"))
  WQP_FISH_new <- WQP_FISH_new %>%
    mutate( Result = ifelse(Result=="", NA_character_, Result),
      Result = as.numeric(new)
      )
} else {
  WQP_FISH_new$Result = as.numeric(WQP_FISH_new$Result)
  cat("'Result' column is in numeric format\n")}
## 'Result' column is in numeric format
  # Format MDL Column to Numeric#
  # Check column for text - based on text user needs to decide what to do
if(!is.numeric(WQP_FISH_new$MDL) & any(grepl('<|[a-df-zA-DF-Z]', WQP_FISH_new$MDL))){
  old <-WQP_FISH_new$MDL
  new <- WQP_FISH_new$MDL
  new[grepl('[a-df-zA-DF-Z]', new)] <- NA #skip 'e' for exponential notation e.g., "8e-005"
  #Print what text was found and what is being done
  cat(paste0("'Result' column should be numeric but some cells contain ", grammaticList(setdiff(old, new)),
             ".\nACTIONS TAKEN:\n",
             "~explain here~.\n"))
  #WQP_FISH_new <- WQP_FISH_new %>%
  #  mutate( #Do stuff to prep column to be converted to Numeric
  #    MDL = as.numeric(new)
  #  )
} else {
  WQP_FISH_new$MDL = as.numeric(WQP_FISH_new$MDL)
  cat("'MDL' column is in numeric format\n")}
## 'MDL' column is in numeric format
  # Format RL Column to Numeric#  
  # Check column for text - based on text user needs to decide what to do
if(!is.numeric(WQP_FISH_new$RL) & any(grepl('<|[a-df-zA-DF-Z]', WQP_FISH_new$RL))){
  old <-WQP_FISH_new$RL
  new <-WQP_FISH_new$RL
  new[grepl('[a-df-zA-DF-Z]', new)] <- NA #skip 'e' for exponential notation e.g., "8e-005"
  #Print what text was found and what is being done
  cat(paste0("'Result' column should be numeric but some cells contain ", grammaticList(setdiff(old, new)),
             ".\nACTIONS TAKEN:\n",
             "~explain here~.\n"))
  #WQP_FISH_new <- WQP_FISH_new %>%
  #  mutate( #Due stuff to prep column to be converted to Numeric
  #    RL = as.numeric(new)
  #  )
} else {
  WQP_FISH_new$RL = as.numeric(WQP_FISH_new$RL)
  cat("'RL' column is in numeric format\n")}
## 'RL' column is in numeric format
# Standardize ResultQualCode Groups - "ND", "DNQ", NA#
WQP_FISH_new <- WQP_FISH_new %>%
  rowwise()%>% #necessary to do the following between() as a single row comparison
  mutate(ResultQualCode = case_when(between(Result, MDL, RL) ~ "DNQ",
                                 Result < MDL ~ "ND",
                                 Result < RL ~ "ND",
                                 TRUE ~ "=")
  )
unique(WQP_FISH_new$ResultQualCode) #New naming structure for ResultQualCode Groupings
## [1] "="
  # Check if Result, MDL, & RL Columns all equal <NA> or 0 - these rows have no useful information
nrow(WQP_FISH_new) #Number rows before
## [1] 99
#CODE BELOW REQUIRES USER TROUBLESHOOTING DEPENDING ON AVAILABLE COLUMNS AND SPREADSHEET SPECIFIC CONDITIONS#
WQP_FISH_new <- WQP_FISH_new %>%
  #Set 0 & negative values as blank
  mutate(Result = ifelse(Result <= 0, NA_real_, Result),
         MDL = ifelse(MDL <= 0, NA_real_, MDL),
         RL = ifelse(RL <= 0, NA_real_, RL))
na_results <- WQP_FISH_new %>% #Record rows where Result, MDL, & RL all equal <NA>
  filter( is.na(Result) & is.na(MDL) & is.na(RL) )
#View(na_results)
WQP_FISH_new <- anti_join(WQP_FISH_new, na_results, by='SourceRow') #returns rows from WQP_FISH_new not matching values in no_result
nrow(WQP_FISH_new) #Number rows after
## [1] 99
  # Format Units Column - "mg/Kg ww" or "mg/Kg dw"
unique(WQP_FISH_new$Unit) #Identifies OLDNAMES
## [1] "mg/kg dw" "ug/kg dw" "mg/kg ww"
# If more than 1 unit colmn exists (e.g., for RL and MDL columns) see WQP script for example on merging into 1 column
WQP_FISH_new <- WQP_FISH_new %>%
  standardizeUnits(pp='mass')
unique(WQP_FISH_new$Unit) #New naming structure for Unit Groupings
## [1] "mg/Kg dw" "mg/Kg ww"
  # Format Date and Time Column #
# THE EXAMPLE CODE BELOW ASSUMES DATE AND TIME ARE IN SAME COLUMNS - IF TIME IS IN SEPERATE COLUMN LOOK AT AQ LINKAGE DATA TEMPLATE
WQP_FISH_new <- WQP_FISH_new %>%
  #rowise() %>%    # rowise is very slow - so used sapply to make this a rowise operation
  mutate(
    #If SampleDate & CollectioTIme are not in Character format by defualt, turn it into a character class so it exports better
    SampleDate = ifelse(sapply(SampleDate, is.character), SampleDate, as.character(as.Date(SampleDate))),
    SampleTime = ifelse(sapply(SampleTime, is.character), SampleTime, format(lubridate::ymd_hms(SampleTime), "%H:%M:%S")),
    #COMBINE DATE AND TIME INTO SampleDateTime COLUMN
    SampleDateTime = ifelse(!is.na(SampleTime), paste(SampleDate, SampleTime), paste(SampleDate, '00:00:00')),
    #FORMAT SampleDateTime COLUMN TO DATE FORMAT
    SampleDateTime = lubridate::ymd_hms(SampleDateTime)
  )


### REMOVE TEMPORARY COLUMNS ###
WQP_FISH_new <- WQP_FISH_new %>%
  select(-one_of(temp_cols)) #Remove temp columns since they are no longer needed
#View(WQP_FISH_new)


## SAVE FORMATTED DATA AS EXCEL FILE ##
writexl::write_xlsx(WQP_FISH_new, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Fish/WQP_FISH_ceden_format.xlsx')
# In excel, to convert SampleDate column to Date format
# 1 - Select the date column.
# 2 - Go to the Data-tab and choose "Text to Columns".
# 3 - On the first screen, leave radio button on "delimited" and click Next.
# 4 - Unselect any delimiter boxes (everything blank) and click Next.
# 5 - Under column data format choose Date, select YMD
# 6 - Click Finish.

The R session information (including the OS info, R version and all packages used):

    sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8  LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lubridate_1.8.0    plotly_4.10.0      readxl_1.4.1       actuar_3.3-0      
##  [5] NADA_1.6-1.1       forcats_0.5.2      stringr_1.4.1      dplyr_1.0.9       
##  [9] purrr_0.3.4        readr_2.1.2        tidyr_1.2.0        tibble_3.1.8      
## [13] ggplot2_3.3.6      tidyverse_1.3.2    fitdistrplus_1.1-8 survival_3.4-0    
## [17] MASS_7.3-58.1     
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-45     assertthat_0.2.1    digest_0.6.29       utf8_1.2.2         
##  [5] R6_2.5.1            cellranger_1.1.0    backports_1.4.1     reprex_2.0.2       
##  [9] evaluate_0.16       highr_0.9           httr_1.4.4          pillar_1.8.1       
## [13] rlang_1.0.5         lazyeval_0.2.2      googlesheets4_1.0.1 rstudioapi_0.14    
## [17] data.table_1.14.2   Matrix_1.5-1        splines_4.2.2       googledrive_2.0.0  
## [21] htmlwidgets_1.5.4   munsell_0.5.0       broom_1.0.1         compiler_4.2.2     
## [25] modelr_0.1.9        xfun_0.32           pkgconfig_2.0.3     htmltools_0.5.3    
## [29] tidyselect_1.1.2    fansi_1.0.3         viridisLite_0.4.1   crayon_1.5.1       
## [33] tzdb_0.3.0          dbplyr_2.2.1        withr_2.5.0         grid_4.2.2         
## [37] jsonlite_1.8.0      gtable_0.3.1        lifecycle_1.0.1     DBI_1.1.3          
## [41] magrittr_2.0.3      scales_1.2.1        writexl_1.4.0       cli_3.3.0          
## [45] stringi_1.7.8       fs_1.5.2            xml2_1.3.3          ellipsis_0.3.2     
## [49] generics_0.1.3      vctrs_0.4.1         expint_0.1-7        tools_4.2.2        
## [53] glue_1.6.2          hms_1.1.2           fastmap_1.1.0       colorspace_2.0-3   
## [57] gargle_1.2.0        rvest_1.0.3         knitr_1.40          haven_2.5.1
    Sys.time()
## [1] "2024-01-05 11:42:04 PST"