This report is automatically generated with the R
package knitr
(version 1.40
)
.
source("R Functions/functions_QA data.R") ### LOAD DATA ### CEDENAqSed <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/CEDENAqSed.xlsx', sheet='ceden_data_20181024082759', guess_max = 30000) nrow(CEDENAqSed) #number of rows should match the Excel file (minus the header row)
## [1] 8942
#Load CEDEN StationCodes to look up CoordSystem by StationCode CedenCoordSys <- readxl::read_excel('Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/0_CEDEN_StationCode_CoordSystem lookup.xlsx', sheet='Worksheet', guess_max = 30000) CedenCoordSys <- CedenCoordSys %>% select(StationCode, Datum) %>% rename('CoordSystem' = 'Datum') ### LIST COLUMNS TO BE USED, ADD USER DEFINED COLUMNS, & RENAME COLUMNS TO CEDEN STANDARDS ### #Use 1.READ ME.xlsx, 'ColumnsForR' to list & identify columns that match corresponding CEDEN Standard columns keep_cols <- c('SourceID','SourceRow', 'Project', 'StationName', 'StationCode', 'SampleDate', 'CollectionTime', 'LabBatch', 'LabSampleID', 'MatrixName', 'SWRCBWatTypeCode', 'MethodName', 'Analyte', 'Unit', 'Result', 'MDL', 'RL', 'ResultQualCode', 'SampleID', 'SampleComments', 'TargetLatitude', 'TargetLongitude', 'QACode', 'BatchVerification', 'ComplianceCode', 'CollectionComments', 'ResultsComments', 'BatchComments', 'SampleTypeCode') temp_cols <- c('Program', 'ParentProject') CEDENAqSed_new <- CEDENAqSed %>% select( c(keep_cols, temp_cols)) %>% #DO NOT CHANGE - selects columns specified above rename('SampleTime' = 'CollectionTime') %>% mutate( #Add user defined columns #COLUMNNAME = 'THE SPECIFIED VALUE' CitationCode = 'CEDENAqSed', Project = paste(paste0('Project: ', Project), paste0('ParentProject: ', ParentProject), paste0('Program: ', Program), sep=' ~ '), ) %>% left_join( ., CedenCoordSys, by='StationCode' #adds in CoordSystem column ) %>% #rename(#Rename columns to CEDEN standards) rename( 'WBT' = 'SWRCBWatTypeCode' ) nrow(CEDENAqSed_new)
## [1] 8942
#str(CEDENAqSed_new) #just to check data class of different columns - e.g., is Date column in POSIX format? #View(CEDENAqSed_new) ### FORMAT COLUMN PARAMETERS ### # Standardize MatrixName Groups - "Water", "Sediment", "Soil" # unique(CEDENAqSed_new$MatrixName) #Identifies OLDNAMES
## [1] "samplewater" "sediment" "sediment, <63 um"
#STANDARD CODE TO CHANGE GROUPING NAMES CEDENAqSed_new <- CEDENAqSed_new %>% mutate(MatrixName = recode(MatrixName, #COLUMN NAME WHERE CHANGES WILL BE MADE #"OLDNAME" = "NEWNAME" - create a new line for each OLDNAME that needs to be changed "samplewater" = "Aqueous", "sediment" = "Sediment", "sediment, <63 um" = "Sediment" ) ) unique(CEDENAqSed_new$MatrixName) #New naming structure should now be listed
## [1] "Aqueous" "Sediment"
# Standardize WBT (WaterBodyType) Groups - "River/Stream", "Drain/Canal", "Wetland", "Spring", "Slough", # "Pond", "Lake/Reservoir", "Delta", "Forebay/Afterbay", "Not Recorded" # unique(CEDENAqSed_new$WBT) #Identifies OLDNAMES
## [1] "R_Un" "R_NW" "NR" "W_F" "L_F" "R" "R_W" "E" "B"
#Look for inconsistent group pairings between MatrixName and WBT (e.g., 'Soil; Stream') unique(paste(CEDENAqSed_new$MatrixName, CEDENAqSed_new$WBT, sep='; '))
## [1] "Aqueous; R_Un" "Sediment; R_Un" "Aqueous; R_NW" "Aqueous; NR" "Sediment; W_F" ## [6] "Aqueous; L_F" "Aqueous; W_F" "Aqueous; R" "Sediment; R_W" "Aqueous; E" ## [11] "Aqueous; B" "Sediment; B"
#If an incosistent grouping exists, add comment to 'CollectionComments' column using code: mutate(CollectionComments = case_when(MatrixName=='CONDITION' ~ 'COMMENT', TRUE ~ MatrixName)), #STANDARD CODE TO CHANGE GROUPING NAMES CEDENAqSed_new <- CEDENAqSed_new %>% mutate(WBT = recode(WBT, #"OLDNAME" = "NEWNAME" - create a new line for each OLDNAME that needs to be changed "R" = "River/Stream", "R_Un" = "River/Stream", "R_NW" = "River/Stream", "R_W" = "River/Stream", "W_F" = "Wetland", "L_F" = "Lake/Reservoir", "NR" = "Not Recorded", "E" = "Bay/Estuary", "B" = "Bay/Estuary" )) unique(paste(CEDENAqSed_new$MatrixName, CEDENAqSed_new$WBT, sep='; ')) #New naming structure for Matrix Name & WBT Groupings
## [1] "Aqueous; River/Stream" "Sediment; River/Stream" "Aqueous; Not Recorded" ## [4] "Sediment; Wetland" "Aqueous; Lake/Reservoir" "Aqueous; Wetland" ## [7] "Aqueous; Bay/Estuary" "Sediment; Bay/Estuary"
# Standardize Analyte Groups - "Mercury, Total", "Mercury, Dissolved", "Mercury, Suspended", & same for Methylmercury # #Following code Added by Robin unique(CEDENAqSed_new$Analyte)
## [1] "Mercury, Total" "Mercury, Dissolved" ## [3] "Mercury, Methyl, Dissolved" "Mercury, Methyl, Total" ## [5] "Mercury, Particulate" "Mercury, Methyl, Particulate"
CEDENAqSed_new <- CEDENAqSed_new %>% mutate(Analyte = recode(Analyte, #"OLDNAME" = "NEWNAME" - create a new line for each OLDNAME that needs to be changed 'Mercury, Methyl, Dissolved' = 'Methylmercury, Dissolved', 'Mercury, Methyl, Total' = 'Methylmercury, Total', 'Mercury, Methyl, Particulate' = 'Methylmercury, Suspended', 'Mercury, Particulate' = 'Mercury, Suspended') ) unique(CEDENAqSed_new$Analyte)
## [1] "Mercury, Total" "Mercury, Dissolved" "Methylmercury, Dissolved" ## [4] "Methylmercury, Total" "Mercury, Suspended" "Methylmercury, Suspended"
# Standardize ResultQualCode Groups - "ND", "DNQ", NA# unique(CEDENAqSed_new$ResultQualCode) #Identifies OLDNAMES
## [1] "=" "ND" "DNQ" "NR"
#CEDENAqSed_new <- CEDENAqSed_new %>% #mutate(ResultQualCode = recode(ResultQualCode, #Not Needed CEDENAqSed_new <- CEDENAqSed_new %>% filter(ResultQualCode != "NR") unique(CEDENAqSed_new$ResultQualCode) #New naming structure for ResultQualCode Groupings
## [1] "=" "ND" "DNQ"
# Format Result Column to Numeric# # Check column for text - based on text user needs to decide what to do #If/Else not needed since 'Result', 'MDL', and 'RL' are numeric # Format MDL Column to Numeric# # Check column for text - based on text user needs to decide what to do # If/Else not needed since 'Result', 'MDL', and 'RL' are numeric # Check if Result, MDL, & RL Columns all equal <NA> or 0 - these rows have no useful information nrow(CEDENAqSed_new) #Number rows before
## [1] 8939
CEDENAqSed_new <- CEDENAqSed_new %>% #Set 0 & negative values as blank mutate(Result = ifelse(Result <= 0, NA_real_, Result), MDL = ifelse(MDL <= 0, NA_real_, MDL), RL = ifelse(RL <= 0, NA_real_, RL)) na_results <- CEDENAqSed_new %>% #Record rows where Result, MDL, & RL all equal <NA> or 0 filter( is.na(Result) & is.na(MDL) & is.na(RL) ) nrow(na_results)
## [1] 87
CEDENAqSed_new <- anti_join(CEDENAqSed_new, na_results, by='SourceRow') #returns rows from CEDENAqSed_new not matching values in no_result nrow(CEDENAqSed_new) #Number rows after - if number rows is the same as before this section can be deleted
## [1] 8852
# Format Units Column - "ng/L", "mg/Kg" unique(CEDENAqSed_new$Unit) #Identifies OLDNAMES
## [1] "ng/L" "mg/Kg dw" "ug/L" "ug/Kg dw"
# If more than 1 unit colmn exists (e.g., for RL and MDL columns) see WQP script for example on merging into 1 column CEDENAqSed_new <- CEDENAqSed_new %>% standardizeUnits unique(CEDENAqSed_new$Unit) #New naming structure for ResultQualCode Groupings
## [1] "ng/L" "mg/Kg dw"
# Format Date and Time Column # # NEED TO TALK ABOUT HOW WE WANT TO DO THIS - To graph in R we need Date and Time in same column # THE EXAMPLE CODE BELOW ASSUMES DATE AND TIME ARE IN SEPERATE COLUMNS CEDENAqSed_new <- CEDENAqSed_new %>% #rowise() %>% # rowise is very slow - so used sapply to make this a rowise operation mutate( #If SampleDate & CollectioTIme are not in Character format by defualt, turn it into a character class so it exports better SampleDate = ifelse(sapply(SampleDate, is.character), SampleDate, as.character(as.Date(SampleDate))), SampleTime = ifelse(sapply(SampleTime, is.character), SampleTime, format(SampleTime, "%H:%M:%S")), #COMBINE DATE AND TIME INTO SampleDateTime COLUMN SampleDateTime = ifelse(!is.na(SampleTime), paste(SampleDate, SampleTime), paste(SampleDate, '00:00:00')), #FORMAT SampleDateTime COLUMN TO DATE FORMAT SampleDateTime = lubridate::ymd_hms(SampleDateTime) ) ### REMOVE TEMPORARY COLUMNS ### CEDENAqSed_new <- CEDENAqSed_new %>% select(-one_of(temp_cols)) #Remove temp columns since they are no longer needed #View(CEDENAqSed_new) ## SAVE FORMATTED DATA AS EXCEL FILE ## #Save Mercury data writexl::write_xlsx(CEDENAqSed_new, path='Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/CEDENAqSed_ceden_format.xlsx') # In excel, to convert SampleDate column to Date format # 1 - Select the date column. # 2 - Go to the Data-tab and choose "Text to Columns". # 3 - On the first screen, leave radio button on "delimited" and click Next. # 4 - Unselect any delimiter boxes (everything blank) and click Next. # 5 - Under column data format choose Date, select YMD # 6 - Click Finish.
The R session information (including the OS info, R version and all packages used):
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt) ## Platform: x86_64-w64-mingw32/x64 (64-bit) ## Running under: Windows 10 x64 (build 22621) ## ## Matrix products: default ## ## locale: ## [1] LC_COLLATE=English_United States.utf8 LC_CTYPE=C ## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C ## [5] LC_TIME=English_United States.utf8 ## system code page: 65001 ## ## attached base packages: ## [1] grid stats graphics grDevices utils datasets methods base ## ## other attached packages: ## [1] mgcv_1.8-41 nlme_3.1-160 lubridate_1.8.0 plotly_4.10.0 ## [5] readxl_1.4.1 actuar_3.3-0 NADA_1.6-1.1 forcats_0.5.2 ## [9] stringr_1.4.1 dplyr_1.0.9 purrr_0.3.4 readr_2.1.2 ## [13] tidyr_1.2.0 tibble_3.1.8 ggplot2_3.3.6 tidyverse_1.3.2 ## [17] fitdistrplus_1.1-8 survival_3.4-0 MASS_7.3-58.1 ## ## loaded via a namespace (and not attached): ## [1] httr_1.4.4 jsonlite_1.8.0 viridisLite_0.4.1 splines_4.2.2 ## [5] modelr_0.1.9 assertthat_0.2.1 highr_0.9 googlesheets4_1.0.1 ## [9] cellranger_1.1.0 yaml_2.3.5 pillar_1.8.1 backports_1.4.1 ## [13] lattice_0.20-45 glue_1.6.2 digest_0.6.29 rvest_1.0.3 ## [17] colorspace_2.0-3 htmltools_0.5.3 Matrix_1.5-1 pkgconfig_2.0.3 ## [21] broom_1.0.1 haven_2.5.1 webshot_0.5.3 scales_1.2.1 ## [25] tzdb_0.3.0 googledrive_2.0.0 generics_0.1.3 ellipsis_0.3.2 ## [29] withr_2.5.0 lazyeval_0.2.2 cli_3.3.0 magrittr_2.0.3 ## [33] crayon_1.5.1 evaluate_0.16 fs_1.5.2 fansi_1.0.3 ## [37] xml2_1.3.3 tools_4.2.2 data.table_1.14.2 hms_1.1.2 ## [41] expint_0.1-7 gargle_1.2.0 lifecycle_1.0.1 munsell_0.5.0 ## [45] reprex_2.0.2 writexl_1.4.0 compiler_4.2.2 rlang_1.0.5 ## [49] rstudioapi_0.14 htmlwidgets_1.5.4 crosstalk_1.2.0 rmarkdown_2.16 ## [53] gtable_0.3.1 DBI_1.1.3 R6_2.5.1 knitr_1.40 ## [57] fastmap_1.1.0 utf8_1.2.2 stringi_1.7.8 vctrs_0.4.1 ## [61] dbplyr_2.2.1 tidyselect_1.1.2 xfun_0.32
Sys.time()
## [1] "2023-12-29 14:09:03 PST"