This report is automatically generated with the R
package knitr
(version 1.40
)
.
## Random Sampling Function Script File ## # library(tidyverse) # functions to work with tibbles # library(lubridate) # makes working with dates easier source(paste0(rstudioapi::getActiveProject(),"/R Functions/function_predictionModels.R")) geomean = function(x, na.rm=TRUE){ exp(sum(log(x[x > 0]), na.rm=na.rm) / length(x)) } AqFishSamplePairing <- function(aqData, fishData, bylocationColumn=NULL, aqOverlapYrs=5, stdzFishLength=350){ # FUNCTION VARIABLES # aqData - dataframe of Aq data containing Result, SampleDate, Unit # fishData - dataframe of Fish data containing Result, TLAvgLength, SampleDate, Unit # bylocationColumn - column of location names. LOcation must be shared between Fish & Aq to be paired # aqStat - Name of statistic to summarize aq data; 'mean', 'geomean', or 'median' # aqOverlapYrs - number years prior Fish sample available to draw aq random sample # stdzFishLength - length to determine Standardized Fish [MeHg] # bylocationColumn <- quo(Subarea) - to test function line-wise locationColumn <- enquo(bylocationColumn) # FISH DATA PREP fishPool <- fishData %>% mutate(Year = year(SampleDate)) # AQ DATA PREP aqPool <- aqData %>% mutate(Year = year(SampleDate)) # START PAIRING Locations <- fishPool %>% distinct(!!locationColumn) %>% pull FishAqResults <- NULL for(location in Locations){ # location <- Locations[1] # - to test function line-wise fishPool_loc <- fishPool %>% filter(!!locationColumn == location) Years <- fishPool_loc %>% distinct(Year) %>% pull tempFishAqResults <- NULL for(yr in Years){ # yr <- Years[1] # - to test function line-wise ## Gather Fish Samples ## fishPool_locYr <- fishPool_loc %>% filter(Year == yr) #data check# if(nrow(fishPool_locYr) == 1){next} # Calc Standardized Fish [MeHg] #if prediction requires extrapolation, make log model first (so selected with distinct(). Under ideal/optimized eating conditions a linear model would be appropriate but assume less than ideal environmental consumption conditions such that MeHg accumulation rate decreases as fish gets bigger/older. tempFishConcs <- predictionModels(fishPool_locYr, .knownYcol=Result, .knownXcol=TLAvgLength, knownX=stdzFishLength, preferExtrapModel='log') %>% rename(!!paste0("fish_Stdz",stdzFishLength) := predictedY, fish_n = n) %>% filter(!grepl('gam', ModelName, ignore.case=T)) %>% # remove 'gam' because regression model can increase then decrease as length goes up, which is unlikely filter( if(all(Extrapolated)) ModelName == 'log' else ModelName != 'doesnt matter') %>% #if fish_Stdz requires extrapolation, assume log model. Under ideal/optimized eating conditions a linear model would be appropriate but assume less than ideal environmental consumption conditions such that MeHg accumulation rate decreases as fish gets bigger/older. filter(RMSE == min(RMSE)) %>% ungroup tempFishResult <- cbind(sampleLocation=location, fish_year=yr, tempFishConcs, fish_Unit=unique(fishData$Unit)) # Adds fish sample year & unit columns to StandFishConcs ## Gather Aq Samples ## aqYrSamp <- aqPool %>% filter(!!locationColumn == location, waterYear <= yr & waterYear > yr-aqOverlapYrs) # Includes year both fish & Aq are sampled plus aqOverlapYrs-1 previous years of Aq data #data check# # if(is.null(aqYrSamp) | nrow(aqYrSamp) < 1){next} # Calc Avg Aq Concs aqYrs <- paste(aqYrSamp %>% distinct(Year) %>% pull %>% sort, collapse=", ") # Collapse Aq Smple Years for storage in dataframe tempAqSamp <- tibble(aq_waterYears = aqYrs, aq_n = nrow(aqYrSamp), aq_Mean_pool = mean(aqYrSamp$Result), aq_Mean_wateryear = aqYrSamp %>% group_by(waterYear) %>% summarise(Avg= mean(Result)) %>% summarise(Mean=mean(Avg), .groups = 'drop') %>% pull(Mean), aq_Wt.Mean = aqYrSamp %>% group_by(waterYear) %>% summarise(Avg= mean(Result), n=n()) %>% summarise(WtMean=weighted.mean(Avg, n), .groups = 'drop') %>% pull(WtMean), aq_Geomean_pool = geomean(aqYrSamp$Result), aq_Geomean_wateryear = aqYrSamp %>% group_by(waterYear) %>% summarise(GeoAvg= geomean(Result)) %>% summarise(GeoMean=geomean(GeoAvg), .groups = 'drop') %>% pull(GeoMean), aq_Median_pool = median(aqYrSamp$Result), aq_Median_wateryear = aqYrSamp %>% group_by(waterYear) %>% summarise(Medi= median(Result)) %>% summarise(Median=median(Medi), .groups = 'drop') %>% pull(Median), aq_Unit = unique(aqData$Unit)) # Store Results tempFishAqResults <- rbind(tempFishAqResults, cbind(tempFishResult, tempAqSamp)) } FishAqResults <- rbind(FishAqResults, tempFishAqResults) } return(FishAqResults) }
The R session information (including the OS info, R version and all packages used):
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt) ## Platform: x86_64-w64-mingw32/x64 (64-bit) ## Running under: Windows 10 x64 (build 22621) ## ## Matrix products: default ## ## locale: ## [1] LC_COLLATE=English_United States.utf8 LC_CTYPE=English_United States.utf8 ## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C ## [5] LC_TIME=English_United States.utf8 ## ## attached base packages: ## [1] stats graphics grDevices utils datasets methods base ## ## other attached packages: ## [1] mgcv_1.8-41 nlme_3.1-160 lubridate_1.8.0 plotly_4.10.0 ## [5] readxl_1.4.1 actuar_3.3-0 NADA_1.6-1.1 forcats_0.5.2 ## [9] stringr_1.4.1 dplyr_1.0.9 purrr_0.3.4 readr_2.1.2 ## [13] tidyr_1.2.0 tibble_3.1.8 ggplot2_3.3.6 tidyverse_1.3.2 ## [17] fitdistrplus_1.1-8 survival_3.4-0 MASS_7.3-58.1 ## ## loaded via a namespace (and not attached): ## [1] lattice_0.20-45 assertthat_0.2.1 digest_0.6.29 utf8_1.2.2 ## [5] R6_2.5.1 cellranger_1.1.0 backports_1.4.1 reprex_2.0.2 ## [9] evaluate_0.16 highr_0.9 httr_1.4.4 pillar_1.8.1 ## [13] rlang_1.0.5 lazyeval_0.2.2 googlesheets4_1.0.1 rstudioapi_0.14 ## [17] data.table_1.14.2 Matrix_1.5-1 splines_4.2.2 googledrive_2.0.0 ## [21] htmlwidgets_1.5.4 munsell_0.5.0 broom_1.0.1 compiler_4.2.2 ## [25] modelr_0.1.9 xfun_0.32 pkgconfig_2.0.3 htmltools_0.5.3 ## [29] tidyselect_1.1.2 fansi_1.0.3 viridisLite_0.4.1 crayon_1.5.1 ## [33] tzdb_0.3.0 dbplyr_2.2.1 withr_2.5.0 grid_4.2.2 ## [37] jsonlite_1.8.0 gtable_0.3.1 lifecycle_1.0.1 DBI_1.1.3 ## [41] magrittr_2.0.3 scales_1.2.1 cli_3.3.0 stringi_1.7.8 ## [45] fs_1.5.2 xml2_1.3.3 ellipsis_0.3.2 generics_0.1.3 ## [49] vctrs_0.4.1 expint_0.1-7 tools_4.2.2 glue_1.6.2 ## [53] hms_1.1.2 fastmap_1.1.0 colorspace_2.0-3 gargle_1.2.0 ## [57] rvest_1.0.3 knitr_1.40 haven_2.5.1
Sys.time()
## [1] "2023-12-26 08:30:16 PST"