This report is automatically generated with the R package knitr (version 1.40) .

# Had issue trying to set WD with Shiny in R project, reset working directory of rproj
wd <- rstudioapi::getActiveProject()
setwd(wd)

source("R Functions/functions_estimate NDDNQ values.R")
source("R Functions/functions_QA data.R")

file <- function(file_name){
  paste0(wd, "/Reeval_Source_Analysis/Source Data/01_Tributaries/Flow/Incomplete Datasets/", file_name)
}

# Allows removing NAs from sample & sampling from a single value to get to the same value (as expected); base sample() has unpredictable behavior
sampleFrom <- function(x, size=length(x), na.rm=F, ...) {
  if (length(x) == 1L) {
    res <- rep(x, size)
  } else if(na.rm==T){
    res <- sample(x[!is.na(x)], size=size, ...)
  } else {
    res <- sample(x, size=size, ...)
  }
  res
}

# Read Data set
flow_data <- readxl::read_xlsx(file("Freemont Weir.xlsx"), guess_max = 300000, sheet = "water year cfs",
                               col_types = c("numeric", "text", rep("numeric", 12)))

years_of_data <- nrow(flow_data)

# Monthly Data  as Reported
allMonths_cfs_reported <- flow_data %>%
  select(-c(WaterYear, WYType))

# Replace NAs with Zeros for Dry & Critical Years
allMonths_cfs_CritDry <- flow_data %>%
  mutate(across(Oct:Sep, ~replace(.x, is.na(.x) & (WYType == "Critical" | WYType == "Dry"), 0))) %>%
  select(-c(WaterYear, WYType))


# Convert cubic feet per second to cubic feet
SecsPerDay <- 86400

# Create a dataframe of days in month the same number of rows as the flow data frame so it can be multiplied
month_days <- enframe(days_in_month(1:12)) %>%
  pivot_wider(names_from = name, values_from = value) %>%
  slice(rep(1, each=nrow(allMonths_cfs_reported))) %>%  # repeat days of month so it has the same # rows as data
  select(names(allMonths_cfs_reported)) # put days of month in same order as data


# Calculate Flow in Cubic Feet
allMonths_cf_reported <- allMonths_cfs_reported[names(allMonths_cfs_reported)] * month_days[names(month_days)] * SecsPerDay
months_cf_reported <- allMonths_cf_reported %>%
  select(which(colSums(., na.rm=T) > 0)) # remove columns that only have 0's or NAs

allMonths_cf_CritDry <- allMonths_cfs_CritDry[names(allMonths_cfs_CritDry)] * month_days[names(month_days)] * SecsPerDay
months_cf_CritDry <- allMonths_cf_CritDry %>%
  select(which(colSums(., na.rm=T) > 0)) # remove columns that only have 0's or NAs


# Randomly Select Month Volume & Sum Annual Volume Using different methods
samples <- 100000
bootstrap_annual_flow_reported <- vector()
bootstrap_annual_flow_CritDry <- vector()
annual_flow_median_reported <- vector()
annual_flow_mean_reported <- vector()
annual_flow_median_CritDry <- vector()
annual_flow_mean_CritDry <- vector()

set.seed(189)

for(n in 1:samples){

  ## Randomly impute NAs ##

  # Randomly Impute NAs Using Known Reported values
  random_fill_reported <- months_cf_reported # base dataset to replace unknown volumes
  # for each column
  for(j in 1:ncol(random_fill_reported)){
    month_sample_pool <- random_fill_reported[,j]

    # for each NA in Month (i.e., column)
    row_indices <- which(is.na(random_fill_reported[,j]))
    for(i in row_indices){
      random_fill_reported[i,j] <- sampleFrom(month_sample_pool, na.rm=T, size=1)
    }
  }
  # Sum each year and Calculate Median or Mean of years
  row_sums_reported <- rowSums(random_fill_reported)  # same as apply(random_fill_reported, 1, sum)

  annual_flow_median_reported[n] <- median(row_sums_reported)
  annual_flow_mean_reported[n]   <- mean(row_sums_reported)


  # Randomly impute NAs using Month Volumes where NAs in Critical & Dry Years were replaced by 0 
  random_fill_CritDry <- months_cf_CritDry # base dataset to replace unknown volumes
  # for each column
  for(j in 1:ncol(random_fill_CritDry)){
    month_sample_pool <- random_fill_CritDry[,j]

    # for each NA in Month (i.e., column)
    row_indices <- which(is.na(random_fill_CritDry[,j]))
    for(i in row_indices){
      random_fill_CritDry[i,j] <- sampleFrom(month_sample_pool, na.rm=T, size=1)
    }
  }
  # Sum each year and Calculate Median or Mean of years
  row_sums_CritDry <- rowSums(random_fill_CritDry)  # same as apply(random_fill_CritDry, 1, sum)

  annual_flow_median_CritDry[n] <- median(row_sums_CritDry)
  annual_flow_mean_CritDry[n]   <- mean(row_sums_CritDry)



  ## Bootstrap Monthly Volumes using known Volumes  ##
  bootstrap_annual_flow_reported[n] <- enframe(apply(months_cf_reported, 2, sampleFrom, size = 1, na.rm=T)) %>%
    pull(value) %>%
    sum(., na.rm=T)

  bootstrap_annual_flow_CritDry[n] <- enframe(apply(months_cf_CritDry, 2, sampleFrom, size = 1, na.rm=T)) %>%
    pull(value) %>%
    sum(., na.rm=T)
}


### GRAPHING RESULTS ###

# Median Annual Flow using Imputation of Reported Month Volumes
ggplot() +
  ggtitle("Annual Flow_Impute NAs using Reported Month Volumes") +
  xlab(paste0("Freemont Weir Predicted ",years_of_data,"-yr Median Annual Volume (cf/yr)")) +
  ylab("Frequency") +
  geom_histogram(aes(x=annual_flow_median_reported, y=after_stat(count)),
                 alpha = 0.5, position = 'identity') +

  geom_vline(xintercept=quantile(annual_flow_median_reported, 0), color='red', size=1.2, linetype='dashed') +
  annotate(geom='text', x=quantile(annual_flow_median_reported, 0), y=samples*.12, hjust=1.02,
           label=paste('0th Percentile =', prettyNum(quantile(annual_flow_median_reported, 0), big.mark=",")), color='red', angle=0) +

  geom_vline(xintercept=quantile(annual_flow_median_reported, .5),  color='red', size=1) +
  annotate(geom='text', x=quantile(annual_flow_median_reported, .5), y=samples*.10, hjust=1.02,
           label=paste('50th Percentile =', prettyNum(quantile(annual_flow_median_reported, .5), big.mark=",")), color='red', angle=0)+

  geom_vline(xintercept=quantile(annual_flow_median_reported, .95), color='red', size=1.2, linetype='dashed') +
  annotate(geom='text', x=quantile(annual_flow_median_reported, .95), y=samples*.08, hjust=1.02,
           label=paste('95th Percentile =', prettyNum(quantile(annual_flow_median_reported, .95), big.mark=",")), color='red', angle=0) +

  scale_y_continuous(expand=expansion(mult = c(0, .01))) +
  scale_x_continuous(expand=expansion(mult = c(0, 0)), limits=c(0,max(annual_flow_median_reported))) +
  theme_light() +
  theme(text = element_text(size=14, family="sans"),
        axis.title=element_text(size=14, face="bold"),
        axis.text = element_text(size=14, angle = 0, hjust = 0.5),
        panel.grid.major.y=element_line(size=1),
        # panel.grid.minor=element_blank(),
        legend.text = element_text(size = 14),
        legend.title = element_text(size = 14, face="bold"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
plot of chunk auto-report
ggsave(filename = file('Freemont Weir_1a.Imputed Volumes_Reported_annual median.png'),
       # plot = wg_hist,
       width = 9,
       height = 5.75,
       units = "in",
       dpi = 300)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
# Mean Annual Flow using Imputation of Reported Month Volumes
ggplot() +
  ggtitle("Annual Flow_Impute NAs using Reported Month Volumes") +
  xlab(paste0("Freemont Weir Predicted ",years_of_data,"-yr Mean Annual Volume (cf/yr)")) +
  ylab("Frequency") +
  geom_histogram(aes(x=annual_flow_mean_reported, y=after_stat(count)),
                 alpha = 0.5, position = 'identity') +

  geom_vline(xintercept=quantile(annual_flow_mean_reported, 0), color='red', size=1.2, linetype='dashed') +
  annotate(geom='text', x=quantile(annual_flow_mean_reported, 0), y=samples*.12, hjust=1.02,
           label=paste('0th Percentile =', prettyNum(quantile(annual_flow_mean_reported, 0), big.mark=",")), color='red', angle=0) +

  geom_vline(xintercept=quantile(annual_flow_mean_reported, .5),  color='red', size=1) +
  annotate(geom='text', x=quantile(annual_flow_mean_reported, .5), y=samples*.10, hjust=1.02,
           label=paste('50th Percentile =', prettyNum(quantile(annual_flow_mean_reported, .5), big.mark=",")), color='red', angle=0)+

  geom_vline(xintercept=quantile(annual_flow_mean_reported, .95), color='red', size=1.2, linetype='dashed') +
  annotate(geom='text', x=quantile(annual_flow_mean_reported, .95), y=samples*.08, hjust=1.02,
           label=paste('95th Percentile =', prettyNum(quantile(annual_flow_mean_reported, .95), big.mark=",")), color='red', angle=0) +

  scale_y_continuous(expand=expansion(mult = c(0, .01))) +
  scale_x_continuous(expand=expansion(mult = c(0, 0)), limits=c(0,max(annual_flow_mean_reported))) +
  theme_light() +
  theme(text = element_text(size=14, family="sans"),
        axis.title=element_text(size=14, face="bold"),
        axis.text = element_text(size=14, angle = 0, hjust = 0.5),
        panel.grid.major.y=element_line(size=1),
        # panel.grid.minor=element_blank(),
        legend.text = element_text(size = 14),
        legend.title = element_text(size = 14, face="bold"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
plot of chunk auto-report
ggsave(filename = file('Freemont Weir_1b.Imputed Volumes_Reported_annual mean.png'),
       # plot = wg_hist,
       width = 9,
       height = 5.75,
       units = "in",
       dpi = 300)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Warning: Removed 2 rows containing missing values (geom_bar).
# Median Annual Flow using Imputation Where Reported Crit/Dry Year NAs Replaced by 0
ggplot() +
  ggtitle("Annual Flow_Impute NAs using Crit&Dry Year NAs Replaced by 0") +
  xlab(paste0("Freemont Weir Predicted ",years_of_data,"-yr Median Annual Volume (cf/yr)")) +
  ylab("Frequency") +
  geom_histogram(aes(x=annual_flow_median_CritDry, y=after_stat(count)),
                 alpha = 0.5, position = 'identity') +

  geom_vline(xintercept=quantile(annual_flow_median_CritDry, 0), color='red', size=1.2, linetype='dashed') +
  annotate(geom='text', x=quantile(annual_flow_median_CritDry, 0), y=samples*.08, hjust=-0.02,
           label=paste('0th Percentile =', prettyNum(quantile(annual_flow_median_CritDry, 0), big.mark=",")), color='red', angle=0) +

  geom_vline(xintercept=quantile(annual_flow_median_CritDry, .5),  color='red', size=1) +
  annotate(geom='text', x=quantile(annual_flow_median_CritDry, .5), y=samples*.06, hjust=-0.02,
           label=paste('50th Percentile =', prettyNum(quantile(annual_flow_median_CritDry, .5), big.mark=",")), color='red', angle=0)+

  geom_vline(xintercept=quantile(annual_flow_median_CritDry, .95), color='red', size=1.2, linetype='dashed') +
  annotate(geom='text', x=quantile(annual_flow_median_CritDry, .95), y=samples*.04, hjust=-0.02,
           label=paste('95th Percentile =', prettyNum(quantile(annual_flow_median_CritDry, .95), big.mark=",")), color='red', angle=0) +

  scale_y_continuous(expand=expansion(mult = c(0, .01))) +
  # scale_x_continuous(expand=expansion(mult = c(0, 0)), limits=c(0,max(annual_flow_median_CritDry))) +
  theme_light() +
  theme(text = element_text(size=14, family="sans"),
        axis.title=element_text(size=14, face="bold"),
        axis.text = element_text(size=14, angle = 0, hjust = 0.5),
        panel.grid.major.y=element_line(size=1),
        # panel.grid.minor=element_blank(),
        legend.text = element_text(size = 14),
        legend.title = element_text(size = 14, face="bold"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
plot of chunk auto-report
ggsave(filename = file('Freemont Weir_2a.Imputed Volumes_Crit_annual median.png'),
       # plot = wg_hist,
       width = 9,
       height = 5.75,
       units = "in",
       dpi = 300)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
# Mean Annual Flow using Imputation Where Reported Crit/Dry Year NAs Replaced by 0
ggplot() +
  ggtitle("Annual Flow_Impute NAs using Crit&Dry Year NAs Replaced by 0") +
  xlab(paste0("Freemont Weir Predicted ",years_of_data,"-yr Mean Annual Volume (cf/yr)")) +
  ylab("Frequency") +
  geom_histogram(aes(x=annual_flow_mean_CritDry, y=after_stat(count)),
                 alpha = 0.5, position = 'identity') +

  geom_vline(xintercept=quantile(annual_flow_mean_CritDry, 0), color='red', size=1.2, linetype='dashed') +
  annotate(geom='text', x=quantile(annual_flow_mean_CritDry, 0), y=samples*.08, hjust=-0.02,
           label=paste('0th Percentile =', prettyNum(quantile(annual_flow_mean_CritDry, 0), big.mark=",")), color='red', angle=0) +

  geom_vline(xintercept=quantile(annual_flow_mean_CritDry, .5),  color='red', size=1) +
  annotate(geom='text', x=quantile(annual_flow_mean_CritDry, .5), y=samples*.06, hjust=-0.02,
           label=paste('50th Percentile =', prettyNum(quantile(annual_flow_mean_CritDry, .5), big.mark=",")), color='red', angle=0)+

  geom_vline(xintercept=quantile(annual_flow_mean_CritDry, .95), color='red', size=1.2, linetype='dashed') +
  annotate(geom='text', x=quantile(annual_flow_mean_CritDry, .95), y=samples*.04, hjust=-0.02,
           label=paste('95th Percentile =', prettyNum(quantile(annual_flow_mean_CritDry, .95), big.mark=",")), color='red', angle=0) +

  scale_y_continuous(expand=expansion(mult = c(0, .01))) +
  # scale_x_continuous(expand=expansion(mult = c(0, 0)), limits=c(0,max(annual_flow_mean_CritDry))) +
  theme_light() +
  theme(text = element_text(size=14, family="sans"),
        axis.title=element_text(size=14, face="bold"),
        axis.text = element_text(size=14, angle = 0, hjust = 0.5),
        panel.grid.major.y=element_line(size=1),
        # panel.grid.minor=element_blank(),
        legend.text = element_text(size = 14),
        legend.title = element_text(size = 14, face="bold"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
plot of chunk auto-report
ggsave(filename = file('Freemont Weir_2b.Imputed Volumes_Crit_annual mean.png'),
       # plot = wg_hist,
       width = 9,
       height = 5.75,
       units = "in",
       dpi = 300)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Bootstrap Annual Flow using Reported Month Volumes (Excludes NAs)
ggplot() +
  ggtitle("Bootstrap Annual Flow_Reported Month Volumes_Excludes NAs") +
  xlab("Freemont Weir Annual Volume (cf/yr)") +
  ylab("Frequency") +
  geom_histogram(aes(x=bootstrap_annual_flow_reported, y=after_stat(count)),
                 alpha = 0.5, position = 'identity') +

  geom_vline(xintercept=quantile(bootstrap_annual_flow_reported, .5),  color='red', size=1) +
  annotate(geom='text', x=quantile(bootstrap_annual_flow_reported, .5), y=samples*.08, hjust=-0.02,
           label=paste('50th Percentile =', prettyNum(quantile(bootstrap_annual_flow_reported, .5), big.mark=",")), color='red', angle=0)+

  geom_vline(xintercept=quantile(bootstrap_annual_flow_reported, .95), color='red', size=1.2, linetype='dashed') +
  annotate(geom='text', x=quantile(bootstrap_annual_flow_reported, .95), y=samples*.06, hjust=-0.02,
           label=paste('95th Percentile =', prettyNum(quantile(bootstrap_annual_flow_reported, .95), big.mark=",")), color='red', angle=0) +

  scale_y_continuous(expand=expansion(mult = c(0, .01))) +
  scale_x_continuous(expand=expansion(mult = c(0, 0))) +
  theme_light() +
  theme(text = element_text(size=14, family="sans"),
        axis.title=element_text(size=14, face="bold"),
        axis.text = element_text(size=14, angle = 0, hjust = 0.5),
        panel.grid.major.y=element_line(size=1),
        # panel.grid.minor=element_blank(),
        legend.text = element_text(size = 14),
        legend.title = element_text(size = 14, face="bold"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
plot of chunk auto-report
ggsave(filename = file('Freemont Weir_3a.Bootstrap Annual Flow_Reported.png'), # 
       # plot = wg_hist,
       width = 9,
       height = 5.75,
       units = "in",
       dpi = 300)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
## Bootstrap Annual Flow with Crit & Dry Year NAs Replaced by 0 (Excludes NAs)
ggplot() +
  ggtitle("Bootstrap Annual Flow_Crit&Dry Year NAs Replaced by 0_Excludes Other NAs") +
  xlab("Freemont Weir Annual Volume (cf/yr)") +
  ylab("Frequency") +
  geom_histogram(aes(x=bootstrap_annual_flow_CritDry, y=after_stat(count)),
                 alpha = 0.5, position = 'identity') +

  geom_vline(xintercept=quantile(bootstrap_annual_flow_CritDry, .5),  color='red', size=1) +
  annotate(geom='text', x=quantile(bootstrap_annual_flow_CritDry, .5), y=samples*.12, hjust=-0.02,
           label=paste('50th Percentile =', prettyNum(quantile(bootstrap_annual_flow_CritDry, .5), big.mark=",")), color='red', angle=0)+

  geom_vline(xintercept=quantile(bootstrap_annual_flow_CritDry, .95), color='red', size=1.2, linetype='dashed') +
  annotate(geom='text', x=quantile(bootstrap_annual_flow_CritDry, .95), y=samples*.10, hjust=-0.02,
           label=paste('95th Percentile =', prettyNum(quantile(bootstrap_annual_flow_CritDry, .95), big.mark=",")), color='red', angle=0) +

  scale_y_continuous(expand=expansion(mult = c(0, .01))) +
  scale_x_continuous(expand=expansion(mult = c(0, 0))) +
  theme_light() +
  theme(text = element_text(size=14, family="sans"),
        axis.title=element_text(size=14, face="bold"),
        axis.text = element_text(size=14, angle = 0, hjust = 0.5),
        panel.grid.major.y=element_line(size=1),
        # panel.grid.minor=element_blank(),
        legend.text = element_text(size = 14),
        legend.title = element_text(size = 14, face="bold"))
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
plot of chunk auto-report
ggsave(filename = file('Freemont Weir_3b.Bootstrap Annual Flow_CritDry.png'), # 
       # plot = wg_hist,
       width = 9,
       height = 5.75,
       units = "in",
       dpi = 300)
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

The R session information (including the OS info, R version and all packages used):

    sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8  LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lubridate_1.8.0    plotly_4.10.0      readxl_1.4.1       actuar_3.3-0      
##  [5] NADA_1.6-1.1       forcats_0.5.2      stringr_1.4.1      dplyr_1.0.9       
##  [9] purrr_0.3.4        readr_2.1.2        tidyr_1.2.0        tibble_3.1.8      
## [13] ggplot2_3.3.6      tidyverse_1.3.2    fitdistrplus_1.1-8 survival_3.4-0    
## [17] MASS_7.3-58.1     
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-45     assertthat_0.2.1    digest_0.6.29       utf8_1.2.2         
##  [5] R6_2.5.1            cellranger_1.1.0    backports_1.4.1     reprex_2.0.2       
##  [9] evaluate_0.16       highr_0.9           httr_1.4.4          pillar_1.8.1       
## [13] rlang_1.0.5         lazyeval_0.2.2      googlesheets4_1.0.1 rstudioapi_0.14    
## [17] data.table_1.14.2   Matrix_1.5-1        labeling_0.4.2      splines_4.2.2      
## [21] googledrive_2.0.0   htmlwidgets_1.5.4   munsell_0.5.0       broom_1.0.1        
## [25] compiler_4.2.2      modelr_0.1.9        xfun_0.32           pkgconfig_2.0.3    
## [29] htmltools_0.5.3     tidyselect_1.1.2    fansi_1.0.3         viridisLite_0.4.1  
## [33] crayon_1.5.1        tzdb_0.3.0          dbplyr_2.2.1        withr_2.5.0        
## [37] grid_4.2.2          jsonlite_1.8.0      gtable_0.3.1        lifecycle_1.0.1    
## [41] DBI_1.1.3           magrittr_2.0.3      scales_1.2.1        cli_3.3.0          
## [45] stringi_1.7.8       farver_2.1.1        fs_1.5.2            xml2_1.3.3         
## [49] ellipsis_0.3.2      generics_0.1.3      vctrs_0.4.1         expint_0.1-7       
## [53] tools_4.2.2         glue_1.6.2          hms_1.1.2           fastmap_1.1.0      
## [57] colorspace_2.0-3    gargle_1.2.0        rvest_1.0.3         knitr_1.40         
## [61] haven_2.5.1
    Sys.time()
## [1] "2024-01-29 15:36:51 PST"