This report is automatically generated with the R
package knitr
(version 1.39
)
.
--- title: "02_Southern CA" author: "Mercury Program and Basin Planning Unit" date: '2022-05-16' output: html_document: code_folding: show toc: TRUE toc_float: TRUE toc_depth: 3 runtime: shiny assets: css: - "http://fonts.googleapis.com/css?family=Raleway:300" - "http://fonts.googleapis.com/css?family=Oxygen" --- <style> body{ font-family: 'Oxygen', sans-serif; font-size: 16px; line-height: 24px; } h1,h2,h3,h4 { font-family: 'Raleway', sans-serif; } .container { width: 1250px; } h3 { background-color: #D4DAEC; text-indent: 50px; } h4 { text-indent: 75px; margin-top: 35px; margin-bottom: 5px; } </style> ```{r setup, echo=TRUE, warning=FALSE, message=FALSE, fig.width=9.5} knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE, fig.width=9.5) ``` ```{r Libraries, echo=FALSE} # library(readxl) library(shiny) # Had issue trying to set WD with Shiny in R project, reset working directory of rproj wd <- rstudioapi::getActiveProject() source(paste0(wd, "/R Functions/functions_estimate NDDNQ values.R")) source(paste0(wd, "/R Functions/functions_QA data.R")) ``` # Load and Tidy Data Define some variables for repeat use later ```{r} # Columns to select columns_mehg_select <- c("SourceRow", "SampleDate", "StationName", "SimpleStationName", "ReferenceCode", "SourceID", "MDL", "RL", "Unit", "Result", "ResultQualCode", "Comments", "Month", "Year", "DaysInMonth", "WaterYear") ``` ## MeHg: Updated Linkage Data Load data ```{r} linkage_data <- readxl::read_xlsx(paste0(wd, "/Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/6a_AqMaster_noRepeats.xlsx"), sheet=1) %>% filter(grepl("Mendota Canal|State Water Project", StationName)) ``` Tidy data ```{r} dmc <- c("Delta Mendota Canal", "Delta-Mendota Mendota Canal at Byron-Bethany Road", "Delta Mendota Canal @ Tracy") swp <- c("State Water Project", "State Water Project @ Tracy") linkage_clean <- linkage_data %>% filter(Analyte == "Methylmercury, Total") %>% mutate(SourceRow = 1:nrow(.), Year = year(SampleDate), Month = month(SampleDate), DaysInMonth = days_in_month(month(SampleDate)), SimpleStationName = case_when(StationName %in% dmc ~ "Delta Mendota Canal", StationName %in% swp ~ "California Aqueduct"), Project = case_when(SourceID == "R5AQ" ~ Project, T ~ NA_character_)) %>% rename(ReferenceCode = Project, Comments = ResultsComments) %>% mutate(SampleDate = as.Date(SampleDate)) %>% mutate(WetDryPeriod = case_when(Month >=10 & Month <=12 ~ "Wet1", # Wet season Oct-Dec - Start of WaterYear Month >=1 & Month <=4 ~ "Wet2", # Wet season Jan-Apr of Year Month >=5 & Month <=9 ~ "Dry"), # Dry season May-Sept of Year WaterYear = if_else(WetDryPeriod == 'Wet1', Year+1, Year)) %>% # Set WaterYear for "Wet1" to next Year, Set WaterYear for Wet2 & Dry same as Year dplyr::select(all_of(columns_mehg_select)) ``` Estimate ND/DNQ ```{r} mehgNDDNQs <- robinNDDNQ(linkage_clean, ResultQualCode) ``` View Fitted Distribution Graphs Remove comment marks (#'s) and run chunk to see fitted distribution graph options ```{r, echo=FALSE} ui <- fluidPage( selectInput('distName', label='Select Distribution', choices=names(mehgNDDNQs), selected=1), plotOutput('distPlot') ) server <- function(input, output, session) { output$distPlot <- renderPlot({ plot(mehgNDDNQs[[input$distName]]$fitmodel) # view plot fits title(input$distName, outer=T) }) } shinyApp(ui, server, options=list(height=500)) ``` Select Best Distribution Model ```{r} mehg_final <- mehgNDDNQs$gamma$fitted # Selected distribution based on plot showing best fit to data ``` ## Flow: Dayflow Load data ```{r} dayflow <- read_csv(paste0(wd, "/Reeval_Source_Analysis/Loss Data/02_Southern CA/dayflow-results-1997-2020.csv")) ``` Tidy data ```{r} # Central Valley Project data represents export at Delta Mendota Canal dayflow_clean <- dayflow %>% dplyr::select(c("Year", "Month", "Date", "SWP", "CVP")) dayflow_final <- dayflow_clean %>% # Add water year mutate(WetDryPeriod = case_when(Month >=10 & Month <=12 ~ "Wet1", # Wet season Oct-Dec - Start of WaterYear Month >=1 & Month <=4 ~ "Wet2", # Wet season Jan-Apr of Year Month >=5 & Month <=9 ~ "Dry"), # Dry season May-Sept of Year WaterYear = if_else(WetDryPeriod == 'Wet1', Year+1, Year)) %>% # Set WaterYear for "Wet1" to next Year, Set WaterYear for Wet2 & Dry same as Year mutate(DaysInMonth = days_in_month(Month)) %>% # Add column for days in month pivot_longer(cols = c("SWP", "CVP"), names_to = "StationName", values_to = "Flow") %>% mutate(SimpleStationName = case_when(StationName == "CVP" ~ "Delta Mendota Canal", StationName == "SWP" ~ "California Aqueduct"), SampleDate = as.Date(Date, format = "%m/%d/%Y")) %>% filter(WaterYear >= 2000 & WaterYear < 2020) ``` # Visualize Data ```{r} colors_graphs <- RColorBrewer::brewer.pal(n = 11, name = 'RdYlBu') # [1] "#A50026" "#D73027" "#F46D43" "#FDAE61" "#FEE090" "#FFFFBF" "#E0F3F8" "#ABD9E9" "#74ADD1" "#4575B4" "#313695" colors_graphs_2 <- colors_graphs[c(9,3)] ``` ## MeHg ### By Month ```{r} monthly_mehg_plot <- ggplot(data=mehg_final, aes(x=month(Month, label=T, abbr=T), y=Result, color = SimpleStationName)) + geom_point(size = 3) + scale_y_continuous(expand = expansion(mult = c(0, .1))) + scale_color_manual(name = "Site", values = colors_graphs_2, labels = function(x) str_wrap(x, width = 15)) + ylab("MeHg Result (ng/L)") + xlab("Month") + theme_light() + theme( text = element_text(size=14, family="sans"), axis.title=element_text(size=14, face="bold"), axis.text.x = element_text(angle = 0, hjust = 0.5), panel.grid.major.y=element_line(size=1), panel.grid.minor=element_blank(), legend.text = element_text(size = 14), legend.title = element_text(size = 14, face="bold"), legend.key.height=unit(1.5, "cm")) monthly_mehg_plot grob_plot <- ggplotGrob(monthly_mehg_plot) grob_plot grob_plot$widths ggsave(filename = paste0(wd, '/Reeval_Source_Analysis/Loss Data/02_Southern CA/Output/Fig 6.21 Monthly MeHg.jpg'), plot = monthly_mehg_plot, width = 9, height = 6, units = "in", dpi = 125) ``` ### By Water Year ```{r} yearly_mehg_plot <- ggplot(data=mehg_final, aes(x=WaterYear, y=Result, color = SimpleStationName)) + geom_point(size = 3) + scale_y_continuous(expand = expansion(mult = c(0, .1))) + scale_x_continuous(breaks = seq(2000, 2020, by = 1)) + scale_color_manual(name = "Site", values = colors_graphs_2, labels = function(x) str_wrap(x, width = 15)) + ylab("MeHg Result (ng/L)") + theme_light() + theme( text = element_text(size=14, family="sans"), axis.title=element_text(size=14, face="bold"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y=element_line(size=1), panel.grid.minor=element_blank(), legend.text = element_text(size = 14), legend.title = element_text(size = 14, face="bold"), legend.key.height=unit(1.5, "cm")) yearly_mehg_plot ggsave(filename = paste0(wd, '/Reeval_Source_Analysis/Loss Data/02_Southern CA/Output/Fig 6.20 Water Year MeHg.jpg'), plot = yearly_mehg_plot, width = 9, height = 6, units = "in", dpi = 125) ``` ## Flow ### By Month ```{r} monthly_flow_plot <- ggplot(data=dayflow_final, aes(x=month(Month, label=T, abbr=T), y=Flow, color = SimpleStationName)) + geom_point() + scale_color_manual(name = "Site", values = colors_graphs_2, labels = function(x) str_wrap(x, width = 15)) + ylab("Flow (cfs)") + xlab("Month") + theme_light() + theme( text = element_text(size=14, family="sans"), axis.title=element_text(size=14, face="bold"), axis.text.x = element_text(angle = 0, hjust = 0.5), panel.grid.major.y=element_line(size=1), panel.grid.minor=element_blank(), legend.text = element_text(size = 14), legend.title = element_text(size = 14, face="bold"), legend.key.height=unit(1.5, "cm")) monthly_flow_plot ggsave(filename = paste0(wd, '/Reeval_Source_Analysis/Loss Data/02_Southern CA/Output/Fig 6.23 Mnthly Flow.jpg'), plot = monthly_flow_plot, width = 9, height = 6, units = "in", dpi = 125) ``` ### By Water Year ```{r} yearly_flow_plot <- ggplot(data=dayflow_final, aes(x=WaterYear, y=Flow, color = SimpleStationName)) + geom_point() + scale_x_continuous(breaks = seq(2000, 2019, by = 1)) + scale_color_manual(name = "Site", values = colors_graphs_2, labels = function(x) str_wrap(x, width = 15)) + ylab("Flow (cfs)") + theme_light() + theme( text = element_text(size=14, family="sans"), axis.title=element_text(size=14, face="bold"), axis.text.x = element_text(angle = 45, hjust = 1), panel.grid.major.y=element_line(size=1), panel.grid.minor=element_blank(), legend.text = element_text(size = 14), legend.title = element_text(size = 14, face="bold"), legend.key.height=unit(1.5, "cm")) yearly_flow_plot ggsave(filename = paste0(wd, '/Reeval_Source_Analysis/Loss Data/02_Southern CA/Output/Fig 6.22 Water Year Flow.jpg'), plot = yearly_flow_plot, width = 9, height = 6, units = "in", dpi = 125) ``` # Analyze data ## Save Data Used for Calculations ```{r} mehg_save <- mehg_final %>% select(SourceID, ReferenceCode, StationName, SampleDate, Result, Unit, MDL, RL, ResultQualCode, Censored) dayflow_save <- dayflow_final %>% rename(Result=Flow) %>% mutate(SourceID="Dayflow", ReferenceCode = NA_character_, Unit="cfs", MDL=NA_real_, RL=NA_real_, ResultQualCode=NA_character_, Censored=NA) %>% select(SourceID, ReferenceCode, StationName, SampleDate, Result, Unit, MDL, RL, ResultQualCode, Censored) SoCal_load_data <- mehg_save %>% bind_rows(., dayflow_save) %>% arrange(desc(Unit), SourceID, SampleDate) writexl::write_xlsx(list("6.3.4 South of Delta Exports" = SoCal_load_data), path=paste0(wd, "/Reeval_Source_Analysis/Loss Data/02_Southern CA/Output/Appdx A_Data Compilation_Southern CA.xlsx")) # writexl::write_xlsx(list("MeHg" = mehg_final, "Flow" = dayflow_final), # path=paste0(wd, "/Reeval_Source_Analysis/Loss Data/02_Southern CA/Output/Appdx A_Data Compilation_Southern CA.xlsx")) ``` ## Regression test ```{r} # Consistent with the 2010 TMDL Staff Report, joining mercury with daily flow when available & matching for the regression analysis mehg_and_flow <- mehg_final %>% left_join(dayflow_final, by = c("SampleDate", "SimpleStationName")) correlation <- cor(mehg_and_flow$Flow, mehg_and_flow$Result) log_model <- lm(mehg_and_flow$Result ~ log(mehg_and_flow$Flow)) summary(log_model) # No significant relationship between flow and methylmercury, consistent with 2010 TMDL Staff Report ``` ## MeHg First, get the median of each site's pooled methylmercury data ```{r} dmc_mehg <- mehg_final %>% filter(SimpleStationName == "Delta Mendota Canal") dmc_mehg_pooled_median <- median(dmc_mehg$Result) # 0.0788 swp_mehg <- mehg_final %>% filter(SimpleStationName == "California Aqueduct") swp_mehg_pooled_median <- median(swp_mehg$Result) # 0.0585 ``` ## Flow Then, get the annual median of flow data by adding flows for each year (2000-2019) and taking the median of those 20 annual flows ```{r} dmc_flow <- dayflow_final %>% filter(SimpleStationName == "Delta Mendota Canal") dmc_flow_annualsums <- dmc_flow %>% group_by(Year) %>% summarise(AnnualSum = sum(Flow)) %>% mutate(AnnualVolume = AnnualSum * 86400 * 28.317) # convert from cfs to L/year dmc_flow_pooled_median <- median(dmc_flow_annualsums$AnnualVolume) swp_flow <- dayflow_final %>% filter(SimpleStationName == "California Aqueduct") swp_flow_annualsums <- swp_flow %>% group_by(Year) %>% summarise(AnnualSum = sum(Flow)) %>% mutate(AnnualVolume = AnnualSum * 86400 * 28.317) # convert from cfs to L/year swp_flow_pooled_median <- median(swp_flow_annualsums$AnnualVolume) ``` ## Calculate Loads Finally, calculate loads by multiplying the pooled median concentration by the median annual volume (and conversion factor) ```{r} dmc_pooled_loss <- dmc_mehg_pooled_median * dmc_flow_pooled_median / 1000000000 dmc_pooled_loss swp_pooled_loss <- swp_mehg_pooled_median * swp_flow_pooled_median / 1000000000 swp_pooled_loss total_pooled_loss <- dmc_pooled_loss + swp_pooled_loss total_pooled_loss ``` ## Water Balance ```{r} # Convert from liters to million acre-feet dmc_water_balance <- dmc_flow_pooled_median * 0.000000810714 / 1000000 dmc_water_balance swp_water_balance <- swp_flow_pooled_median * 0.000000810714 / 1000000 swp_water_balance ``` ## Data to recreate table 6.16 ```{r} # Total number of samples dmc_n_samples <- nrow(dmc_mehg) dmc_n_samples # Minimum dmc_min <- min(dmc_mehg$Result) dmc_min # Pooled average dmc_ave <- mean(dmc_mehg$Result) dmc_ave # Maximum dmc_max <- max(dmc_mehg$Result) dmc_max # Pooled median dmc_median <- median(dmc_mehg$Result) dmc_median # Annual average dmc_avg_mehg_summary <- dmc_mehg %>% group_by(Month) %>% summarise(MeHgMean = mean(Result)) dmc_annual_ave <- mean(dmc_avg_mehg_summary$MeHgMean) dmc_annual_ave # Same for State Water Project swp_n_samples <- nrow(swp_mehg) swp_n_samples swp_min <- min(swp_mehg$Result) swp_min swp_ave <- mean(swp_mehg$Result) swp_ave swp_max <- max(swp_mehg$Result) swp_max swp_median <- median(swp_mehg$Result) swp_median swp_avg_mehg_summary <- swp_mehg %>% group_by(Month) %>% summarise(MeHgMean = mean(Result)) swp_annual_ave <- mean(swp_avg_mehg_summary$MeHgMean) swp_annual_ave ``` # Alternatives Considered ## 1. Monthly medians Summarize MeHg & Flow ```{r} # Group by month then calculate the median mehg_summary <- mehg_final %>% group_by(Month, SimpleStationName) %>% summarise(MeHgMedian = median(Result)) flow_summary <- dayflow_final %>% group_by(Month, SimpleStationName) %>% summarise(FlowMedian = median(Flow)) # Regression test mehg_and_flow_summary <- mehg_summary %>% full_join(flow_summary) ggplot(mehg_and_flow_summary, aes(x=FlowMedian, y=MeHgMedian)) + geom_point(size = 5) + ylab("Median Monthly MeHg Result (ng/L)") + xlab("Median Monthly Flow (cfs)") + stat_smooth() ``` Calculate Loads ```{r} dmc_monthly_losses <- mehg_and_flow_summary %>% filter(SimpleStationName == "Delta Mendota Canal") %>% mutate(DaysInMonth = days_in_month(Month)) %>% mutate(VolumeMedian = FlowMedian * 86400 * DaysInMonth * 28.317) %>% mutate(`Load (g/month)` = VolumeMedian * MeHgMedian / 1000000000) dmc_annual_loss_mm <- sum(dmc_monthly_losses$`Load (g/month)`) swp_monthly_losses <- mehg_and_flow_summary %>% filter(SimpleStationName == "California Aqueduct") %>% mutate(DaysInMonth = days_in_month(Month)) %>% mutate(VolumeMedian = FlowMedian * 86400 * DaysInMonth * 28.317) %>% mutate(`Load (g/month)` = VolumeMedian * MeHgMedian / 1000000000) swp_annual_loss_mm <- sum(swp_monthly_losses$`Load (g/month)`) total_socal_loss_mm <- dmc_annual_loss_mm + swp_annual_loss_mm total_socal_loss_mm ``` ## 2. Averages ```{r} # calculate means mehg_average_summary <- mehg_final %>% group_by(Month, SimpleStationName) %>% summarise(MeHgMean = mean(Result)) flow_average_summary <- dayflow_final %>% group_by(Month, SimpleStationName) %>% summarise(FlowMean = mean(Flow)) avg_mehg_and_flow <- mehg_average_summary %>% full_join(flow_average_summary) # calculate loads with both means dmc_mehg_pooled_mean <- mean(dmc_mehg$Result) dmc_flow_pooled_mean <- mean(dmc_flow_annualsums$AnnualVolume) dmc_average_loss <- dmc_mehg_pooled_mean * dmc_flow_pooled_mean / 1000000000 swp_mehg_pooled_mean <- mean(swp_mehg$Result) swp_flow_pooled_mean <- mean(swp_flow_annualsums$AnnualVolume) swp_average_loss <- swp_mehg_pooled_mean * swp_flow_pooled_mean / 1000000000 total_average_loss <- dmc_average_loss + swp_average_loss total_average_loss # calculate loads with mean volume and median mercury dmc_flow_pooled_mean <- mean(dmc_flow_annualsums$AnnualVolume) dmc_mixed_loss <- dmc_mehg_pooled_median * dmc_flow_pooled_mean / 1000000000 swp_flow_pooled_mean <- mean(swp_flow_annualsums$AnnualVolume) swp_mixed_loss <- swp_mehg_pooled_median * swp_flow_pooled_mean / 1000000000 total_mixed_loss <- dmc_mixed_loss + swp_mixed_loss total_mixed_loss ```
## Error: <text>:17:1: unexpected '<' ## 16: --- ## 17: < ## ^
The R session information (including the OS info, R version and all packages used):
sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt) ## Platform: x86_64-w64-mingw32/x64 (64-bit) ## Running under: Windows 10 x64 (build 22621) ## ## Matrix products: default ## ## locale: ## [1] LC_COLLATE=English_United States.utf8 LC_CTYPE=English_United States.utf8 ## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C ## [5] LC_TIME=English_United States.utf8 ## ## attached base packages: ## [1] stats graphics grDevices utils datasets methods base ## ## loaded via a namespace (and not attached): ## [1] rstudioapi_0.13 knitr_1.39 magrittr_2.0.3 tidyselect_1.1.2 ## [5] R6_2.5.1 rlang_1.0.2 fastmap_1.1.0 fansi_1.0.3 ## [9] stringr_1.4.0 highr_0.9 dplyr_1.0.9 tools_4.2.2 ## [13] xfun_0.31 utf8_1.2.2 DBI_1.1.2 cli_3.3.0 ## [17] htmltools_0.5.2 ellipsis_0.3.2 assertthat_0.2.1 yaml_2.3.5 ## [21] readxl_1.4.0 digest_0.6.29 tibble_3.1.7 lifecycle_1.0.1 ## [25] crayon_1.5.1 RColorBrewer_1.1-3 purrr_0.3.4 vctrs_0.4.1 ## [29] glue_1.6.2 evaluate_0.15 rmarkdown_2.14 stringi_1.7.6 ## [33] compiler_4.2.2 pillar_1.7.0 cellranger_1.1.0 generics_0.1.2 ## [37] writexl_1.4.0 pkgconfig_2.0.3
Sys.time()
## [1] "2023-12-26 15:17:43 PST"