This report is automatically generated with the R package knitr (version 1.39) .

---
title: "02_Southern CA"
author: "Mercury Program and Basin Planning Unit"
date: '2022-05-16'
output:
  html_document:
    code_folding: show
    toc: TRUE
    toc_float: TRUE
    toc_depth: 3
runtime: shiny
assets:
  css:
    - "http://fonts.googleapis.com/css?family=Raleway:300"
    - "http://fonts.googleapis.com/css?family=Oxygen"
---
<style>
body{
  font-family: 'Oxygen', sans-serif;
  font-size: 16px;
  line-height: 24px;
}

h1,h2,h3,h4 {
  font-family: 'Raleway', sans-serif;
}

.container { width: 1250px; }
h3 {
  background-color: #D4DAEC;
  text-indent: 50px; 
}
h4 {
  text-indent: 75px;
  margin-top: 35px;
  margin-bottom: 5px;
}
</style>

```{r setup, echo=TRUE, warning=FALSE, message=FALSE, fig.width=9.5}
knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE, fig.width=9.5)
```

```{r Libraries, echo=FALSE}
# library(readxl)
library(shiny)

# Had issue trying to set WD with Shiny in R project, reset working directory of rproj
wd <- rstudioapi::getActiveProject()
source(paste0(wd, "/R Functions/functions_estimate NDDNQ values.R"))
source(paste0(wd, "/R Functions/functions_QA data.R"))
```

# Load and Tidy Data
Define some variables for repeat use later
```{r}
# Columns to select
columns_mehg_select <- c("SourceRow", "SampleDate", "StationName", "SimpleStationName", "ReferenceCode", "SourceID", "MDL", "RL", "Unit", "Result", "ResultQualCode", "Comments", "Month", "Year", "DaysInMonth", "WaterYear")
```

## MeHg: Updated Linkage Data
Load data
```{r}
linkage_data <- readxl::read_xlsx(paste0(wd, "/Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/6a_AqMaster_noRepeats.xlsx"),
                           sheet=1) %>% 
  filter(grepl("Mendota Canal|State Water Project", StationName))
```

Tidy data 
```{r}
dmc <- c("Delta Mendota Canal", "Delta-Mendota Mendota Canal at Byron-Bethany Road", "Delta Mendota Canal @ Tracy")

swp <- c("State Water Project", "State Water Project @ Tracy")

linkage_clean <- linkage_data %>% 
  filter(Analyte == "Methylmercury, Total") %>% 
  mutate(SourceRow = 1:nrow(.),
         Year = year(SampleDate),
         Month = month(SampleDate),
         DaysInMonth = days_in_month(month(SampleDate)),
         SimpleStationName = case_when(StationName %in% dmc ~ "Delta Mendota Canal",
                                       StationName %in% swp ~ "California Aqueduct"),
         Project = case_when(SourceID == "R5AQ" ~ Project,
                             T ~ NA_character_)) %>% 
  rename(ReferenceCode = Project,
         Comments = ResultsComments) %>% 
  mutate(SampleDate = as.Date(SampleDate)) %>% 
  mutate(WetDryPeriod = case_when(Month >=10 & Month <=12 ~ "Wet1",   # Wet season Oct-Dec - Start of WaterYear
                            Month >=1  & Month <=4  ~ "Wet2",   # Wet season Jan-Apr of Year
                            Month >=5  & Month <=9 ~ "Dry"),   # Dry season May-Sept of Year
         WaterYear = if_else(WetDryPeriod == 'Wet1', Year+1, Year)) %>%   # Set WaterYear for "Wet1" to next Year, Set WaterYear for Wet2 & Dry same as Year
  dplyr::select(all_of(columns_mehg_select))
```

Estimate ND/DNQ
```{r}
mehgNDDNQs <- robinNDDNQ(linkage_clean, ResultQualCode)
```

View Fitted Distribution Graphs
Remove comment marks (#'s) and run chunk to see fitted distribution graph options
```{r, echo=FALSE}
ui <- fluidPage(
  selectInput('distName', label='Select Distribution', choices=names(mehgNDDNQs), selected=1),
  plotOutput('distPlot')
)

server <- function(input, output, session) {
  output$distPlot <- renderPlot({
  plot(mehgNDDNQs[[input$distName]]$fitmodel) # view plot fits
    title(input$distName, outer=T)
})
}

shinyApp(ui, server, options=list(height=500))
```

Select Best Distribution Model
```{r}
mehg_final <- mehgNDDNQs$gamma$fitted # Selected distribution based on plot showing best fit to data
```


## Flow: Dayflow
Load data
```{r}
dayflow <- read_csv(paste0(wd, "/Reeval_Source_Analysis/Loss Data/02_Southern CA/dayflow-results-1997-2020.csv"))
```

Tidy data
```{r}
# Central Valley Project data represents export at Delta Mendota Canal
dayflow_clean <- dayflow %>%
  dplyr::select(c("Year", "Month", "Date", "SWP", "CVP"))


dayflow_final <- dayflow_clean %>% 
  # Add water year
  mutate(WetDryPeriod = case_when(Month >=10 & Month <=12 ~ "Wet1",   # Wet season Oct-Dec - Start of WaterYear
                            Month >=1  & Month <=4  ~ "Wet2",   # Wet season Jan-Apr of Year
                            Month >=5  & Month <=9 ~ "Dry"),   # Dry season May-Sept of Year
         WaterYear = if_else(WetDryPeriod == 'Wet1', Year+1, Year)) %>%   # Set WaterYear for "Wet1" to next Year, Set WaterYear for Wet2 & Dry same as Year
  mutate(DaysInMonth = days_in_month(Month)) %>%  # Add column for days in month
  pivot_longer(cols = c("SWP", "CVP"),
               names_to = "StationName",
               values_to = "Flow") %>% 
  mutate(SimpleStationName = case_when(StationName == "CVP" ~ "Delta Mendota Canal",
                                       StationName == "SWP" ~ "California Aqueduct"),
         SampleDate = as.Date(Date, format = "%m/%d/%Y")) %>% 
  filter(WaterYear >= 2000 & WaterYear < 2020)
```


# Visualize Data
```{r}
colors_graphs <- RColorBrewer::brewer.pal(n = 11, name = 'RdYlBu')
# [1] "#A50026" "#D73027" "#F46D43" "#FDAE61" "#FEE090" "#FFFFBF" "#E0F3F8" "#ABD9E9" "#74ADD1" "#4575B4" "#313695"

colors_graphs_2 <- colors_graphs[c(9,3)]
```

## MeHg
### By Month
```{r}
monthly_mehg_plot <- ggplot(data=mehg_final, 
                            aes(x=month(Month, label=T, abbr=T), 
                                y=Result, 
                                color = SimpleStationName)) +
  geom_point(size = 3) + 
  scale_y_continuous(expand = expansion(mult = c(0, .1))) +
  scale_color_manual(name = "Site",
                     values =  colors_graphs_2,
                     labels = function(x) str_wrap(x, width = 15)) +
  ylab("MeHg Result (ng/L)") +
  xlab("Month") +
  theme_light() + 
  theme(
    text = element_text(size=14, family="sans"),
    axis.title=element_text(size=14, face="bold"), 
    axis.text.x = element_text(angle = 0, hjust = 0.5),     
    panel.grid.major.y=element_line(size=1),
    panel.grid.minor=element_blank(),
    legend.text = element_text(size = 14), 
    legend.title = element_text(size = 14, face="bold"),
    legend.key.height=unit(1.5, "cm"))
monthly_mehg_plot

grob_plot <- ggplotGrob(monthly_mehg_plot)
grob_plot
grob_plot$widths

ggsave(filename = paste0(wd, '/Reeval_Source_Analysis/Loss Data/02_Southern CA/Output/Fig 6.21 Monthly MeHg.jpg'),
       plot = monthly_mehg_plot,
       width = 9,
       height = 6,
       units = "in",
       dpi = 125)
```

### By Water Year
```{r}
yearly_mehg_plot <- ggplot(data=mehg_final, 
                           aes(x=WaterYear, 
                               y=Result, 
                               color = SimpleStationName)) +
  geom_point(size = 3) + 
  scale_y_continuous(expand = expansion(mult = c(0, .1))) +
  scale_x_continuous(breaks = seq(2000, 2020, by = 1)) +
  scale_color_manual(name = "Site",
                     values = colors_graphs_2,
                     labels = function(x) str_wrap(x, width = 15)) +
  ylab("MeHg Result (ng/L)") +
  theme_light() + 
  theme(
    text = element_text(size=14, family="sans"),
    axis.title=element_text(size=14, face="bold"), 
    axis.text.x = element_text(angle = 45, hjust = 1),     
    panel.grid.major.y=element_line(size=1),
    panel.grid.minor=element_blank(),
    legend.text = element_text(size = 14), 
    legend.title = element_text(size = 14, face="bold"),
    legend.key.height=unit(1.5, "cm"))
yearly_mehg_plot

ggsave(filename = paste0(wd, '/Reeval_Source_Analysis/Loss Data/02_Southern CA/Output/Fig 6.20 Water Year MeHg.jpg'),
       plot = yearly_mehg_plot,
       width = 9,
       height = 6,
       units = "in",
       dpi = 125)
```



## Flow
### By Month
```{r}
monthly_flow_plot <- ggplot(data=dayflow_final, 
                            aes(x=month(Month, label=T, abbr=T), 
                                y=Flow, 
                                color = SimpleStationName)) +
  geom_point() + 
  scale_color_manual(name = "Site",
                     values = colors_graphs_2,
                     labels = function(x) str_wrap(x, width = 15)) +
  ylab("Flow (cfs)") +
  xlab("Month") +
  theme_light() + 
  theme(
    text = element_text(size=14, family="sans"),
    axis.title=element_text(size=14, face="bold"), 
    axis.text.x = element_text(angle = 0, hjust = 0.5),     
    panel.grid.major.y=element_line(size=1),
    panel.grid.minor=element_blank(),
    legend.text = element_text(size = 14), 
    legend.title = element_text(size = 14, face="bold"),
    legend.key.height=unit(1.5, "cm"))
monthly_flow_plot

ggsave(filename = paste0(wd, '/Reeval_Source_Analysis/Loss Data/02_Southern CA/Output/Fig 6.23 Mnthly Flow.jpg'),
       plot = monthly_flow_plot,
       width = 9,
       height = 6,
       units = "in",
       dpi = 125)
```

### By Water Year
```{r}
yearly_flow_plot <- ggplot(data=dayflow_final, 
                           aes(x=WaterYear, 
                               y=Flow, 
                               color = SimpleStationName)) +
  geom_point() + 
  scale_x_continuous(breaks = seq(2000, 2019, by = 1)) +
  scale_color_manual(name = "Site",
                     values = colors_graphs_2,
                     labels = function(x) str_wrap(x, width = 15)) +
  ylab("Flow (cfs)") +
  theme_light() + 
  theme(
    text = element_text(size=14, family="sans"),
    axis.title=element_text(size=14, face="bold"), 
    axis.text.x = element_text(angle = 45, hjust = 1),     
    panel.grid.major.y=element_line(size=1),
    panel.grid.minor=element_blank(),
    legend.text = element_text(size = 14), 
    legend.title = element_text(size = 14, face="bold"),
    legend.key.height=unit(1.5, "cm")) 
yearly_flow_plot

ggsave(filename = paste0(wd, '/Reeval_Source_Analysis/Loss Data/02_Southern CA/Output/Fig 6.22 Water Year Flow.jpg'),
       plot = yearly_flow_plot,
       width = 9,
       height = 6,
       units = "in",
       dpi = 125)
```



# Analyze data
## Save Data Used for Calculations
```{r}
mehg_save <- mehg_final %>% 
  select(SourceID, ReferenceCode, StationName, SampleDate, Result, Unit, MDL, RL, ResultQualCode, Censored)

dayflow_save <- dayflow_final %>% 
  rename(Result=Flow) %>% 
  mutate(SourceID="Dayflow",
         ReferenceCode = NA_character_,
         Unit="cfs",
         MDL=NA_real_,
         RL=NA_real_,
         ResultQualCode=NA_character_,
         Censored=NA) %>% 
  select(SourceID, ReferenceCode, StationName, SampleDate, Result, Unit, MDL, RL, ResultQualCode, Censored)

SoCal_load_data <- mehg_save %>% 
  bind_rows(., dayflow_save) %>% 
  arrange(desc(Unit), SourceID, SampleDate)


writexl::write_xlsx(list("6.3.4 South of Delta Exports" = SoCal_load_data),
                    path=paste0(wd, "/Reeval_Source_Analysis/Loss Data/02_Southern CA/Output/Appdx A_Data Compilation_Southern CA.xlsx"))


# writexl::write_xlsx(list("MeHg" = mehg_final, "Flow" = dayflow_final),
#                     path=paste0(wd, "/Reeval_Source_Analysis/Loss Data/02_Southern CA/Output/Appdx A_Data Compilation_Southern CA.xlsx"))
```

## Regression test
```{r}
# Consistent with the 2010 TMDL Staff Report, joining mercury with daily flow when available & matching for the regression analysis
mehg_and_flow <- mehg_final %>% 
  left_join(dayflow_final, by = c("SampleDate", "SimpleStationName"))

correlation <- cor(mehg_and_flow$Flow, mehg_and_flow$Result)

log_model <- lm(mehg_and_flow$Result ~ log(mehg_and_flow$Flow))
summary(log_model)
# No significant relationship between flow and methylmercury, consistent with 2010 TMDL Staff Report
```

## MeHg
First, get the median of each site's pooled methylmercury data
```{r}
dmc_mehg <- mehg_final %>% 
  filter(SimpleStationName == "Delta Mendota Canal")

dmc_mehg_pooled_median <- median(dmc_mehg$Result)
# 0.0788

swp_mehg <- mehg_final %>% 
  filter(SimpleStationName == "California Aqueduct")

swp_mehg_pooled_median <- median(swp_mehg$Result)
# 0.0585
```

## Flow
Then, get the annual median of flow data by adding flows for each year (2000-2019) and taking the median of those 20 annual flows
```{r}
dmc_flow <- dayflow_final %>% 
  filter(SimpleStationName == "Delta Mendota Canal")

dmc_flow_annualsums <- dmc_flow %>% 
  group_by(Year) %>% 
  summarise(AnnualSum = sum(Flow)) %>% 
  mutate(AnnualVolume = AnnualSum * 86400 * 28.317) # convert from cfs to L/year

dmc_flow_pooled_median <- median(dmc_flow_annualsums$AnnualVolume)

swp_flow <- dayflow_final %>% 
  filter(SimpleStationName == "California Aqueduct")

swp_flow_annualsums <- swp_flow %>% 
  group_by(Year) %>% 
  summarise(AnnualSum = sum(Flow)) %>% 
  mutate(AnnualVolume = AnnualSum * 86400 * 28.317) # convert from cfs to L/year

swp_flow_pooled_median <- median(swp_flow_annualsums$AnnualVolume)
```

## Calculate Loads
Finally, calculate loads by multiplying the pooled median concentration by the median annual volume (and conversion factor)
```{r}
dmc_pooled_loss <- dmc_mehg_pooled_median * dmc_flow_pooled_median / 1000000000
dmc_pooled_loss

swp_pooled_loss <- swp_mehg_pooled_median * swp_flow_pooled_median / 1000000000
swp_pooled_loss

total_pooled_loss <- dmc_pooled_loss + swp_pooled_loss
total_pooled_loss
```

## Water Balance
```{r}
# Convert from liters to million acre-feet 
dmc_water_balance <- dmc_flow_pooled_median * 0.000000810714 / 1000000
dmc_water_balance

swp_water_balance <- swp_flow_pooled_median * 0.000000810714 / 1000000
swp_water_balance
```

## Data to recreate table 6.16
```{r}
# Total number of samples
dmc_n_samples <- nrow(dmc_mehg)
dmc_n_samples

# Minimum
dmc_min <- min(dmc_mehg$Result)
dmc_min

# Pooled average
dmc_ave <- mean(dmc_mehg$Result)
dmc_ave

# Maximum
dmc_max <- max(dmc_mehg$Result)
dmc_max

# Pooled median
dmc_median <- median(dmc_mehg$Result)
dmc_median

# Annual average
dmc_avg_mehg_summary <- dmc_mehg %>% 
  group_by(Month) %>% 
  summarise(MeHgMean = mean(Result))
dmc_annual_ave <- mean(dmc_avg_mehg_summary$MeHgMean)
dmc_annual_ave

# Same for State Water Project
swp_n_samples <- nrow(swp_mehg)
swp_n_samples

swp_min <- min(swp_mehg$Result)
swp_min

swp_ave <- mean(swp_mehg$Result)
swp_ave

swp_max <- max(swp_mehg$Result)
swp_max

swp_median <- median(swp_mehg$Result)
swp_median

swp_avg_mehg_summary <- swp_mehg %>% 
  group_by(Month) %>% 
  summarise(MeHgMean = mean(Result))
swp_annual_ave <- mean(swp_avg_mehg_summary$MeHgMean)
swp_annual_ave
```


# Alternatives Considered
## 1. Monthly medians
Summarize MeHg & Flow
```{r}
# Group by month then calculate the median
mehg_summary <- mehg_final %>% 
  group_by(Month, SimpleStationName) %>% 
  summarise(MeHgMedian = median(Result))

flow_summary <- dayflow_final %>% 
  group_by(Month, SimpleStationName) %>% 
  summarise(FlowMedian = median(Flow))

# Regression test
mehg_and_flow_summary <- mehg_summary %>% 
  full_join(flow_summary)

ggplot(mehg_and_flow_summary, aes(x=FlowMedian, y=MeHgMedian)) +
  geom_point(size = 5) + 
  ylab("Median Monthly MeHg Result (ng/L)") +
  xlab("Median Monthly Flow (cfs)") +
  stat_smooth()
```

 Calculate Loads
```{r}
dmc_monthly_losses <- mehg_and_flow_summary %>% 
  filter(SimpleStationName == "Delta Mendota Canal") %>% 
  mutate(DaysInMonth = days_in_month(Month)) %>% 
  mutate(VolumeMedian = FlowMedian * 86400 * DaysInMonth * 28.317) %>%
  mutate(`Load (g/month)` = VolumeMedian * MeHgMedian / 1000000000)

dmc_annual_loss_mm <- sum(dmc_monthly_losses$`Load (g/month)`)

swp_monthly_losses <- mehg_and_flow_summary %>% 
  filter(SimpleStationName == "California Aqueduct") %>% 
  mutate(DaysInMonth = days_in_month(Month)) %>% 
  mutate(VolumeMedian = FlowMedian * 86400 * DaysInMonth * 28.317) %>%
  mutate(`Load (g/month)` = VolumeMedian * MeHgMedian / 1000000000)

swp_annual_loss_mm <- sum(swp_monthly_losses$`Load (g/month)`)

total_socal_loss_mm <- dmc_annual_loss_mm + swp_annual_loss_mm
total_socal_loss_mm
```


## 2. Averages
```{r}
# calculate means
mehg_average_summary <- mehg_final %>% 
  group_by(Month, SimpleStationName) %>% 
  summarise(MeHgMean = mean(Result))

flow_average_summary <- dayflow_final %>% 
  group_by(Month, SimpleStationName) %>% 
  summarise(FlowMean = mean(Flow))

avg_mehg_and_flow <- mehg_average_summary %>% 
  full_join(flow_average_summary)

# calculate loads with both means
dmc_mehg_pooled_mean <- mean(dmc_mehg$Result)
dmc_flow_pooled_mean <- mean(dmc_flow_annualsums$AnnualVolume)
dmc_average_loss <- dmc_mehg_pooled_mean * dmc_flow_pooled_mean / 1000000000

swp_mehg_pooled_mean <- mean(swp_mehg$Result)
swp_flow_pooled_mean <- mean(swp_flow_annualsums$AnnualVolume)
swp_average_loss <- swp_mehg_pooled_mean * swp_flow_pooled_mean / 1000000000

total_average_loss <- dmc_average_loss + swp_average_loss
total_average_loss

# calculate loads with mean volume and median mercury
dmc_flow_pooled_mean <- mean(dmc_flow_annualsums$AnnualVolume)
dmc_mixed_loss <- dmc_mehg_pooled_median * dmc_flow_pooled_mean / 1000000000

swp_flow_pooled_mean <- mean(swp_flow_annualsums$AnnualVolume)
swp_mixed_loss <- swp_mehg_pooled_median * swp_flow_pooled_mean / 1000000000

total_mixed_loss <- dmc_mixed_loss + swp_mixed_loss
total_mixed_loss
```
## Error: <text>:17:1: unexpected '<'
## 16: ---
## 17: <
##     ^

The R session information (including the OS info, R version and all packages used):

    sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8  LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## loaded via a namespace (and not attached):
##  [1] rstudioapi_0.13    knitr_1.39         magrittr_2.0.3     tidyselect_1.1.2  
##  [5] R6_2.5.1           rlang_1.0.2        fastmap_1.1.0      fansi_1.0.3       
##  [9] stringr_1.4.0      highr_0.9          dplyr_1.0.9        tools_4.2.2       
## [13] xfun_0.31          utf8_1.2.2         DBI_1.1.2          cli_3.3.0         
## [17] htmltools_0.5.2    ellipsis_0.3.2     assertthat_0.2.1   yaml_2.3.5        
## [21] readxl_1.4.0       digest_0.6.29      tibble_3.1.7       lifecycle_1.0.1   
## [25] crayon_1.5.1       RColorBrewer_1.1-3 purrr_0.3.4        vctrs_0.4.1       
## [29] glue_1.6.2         evaluate_0.15      rmarkdown_2.14     stringi_1.7.6     
## [33] compiler_4.2.2     pillar_1.7.0       cellranger_1.1.0   generics_0.1.2    
## [37] writexl_1.4.0      pkgconfig_2.0.3
    Sys.time()
## [1] "2023-12-26 15:17:43 PST"