This report is automatically generated with the R package knitr (version 1.39) .

---
title: "04_Agricultural Return Flows"
author: "Mercury Program and Basin Planning Unit"
date: "11/4/2021"
output:
  html_document:
    code_folding: show
    toc: TRUE
    toc_float: TRUE
    toc_depth: 3
runtime: shiny
assets:
  css:
    - "http://fonts.googleapis.com/css?family=Raleway:300"
    - "http://fonts.googleapis.com/css?family=Oxygen"
---

<style>
body{
  font-family: 'Oxygen', sans-serif;
  font-size: 16px;
  line-height: 24px;
}

h1,h2,h3,h4 {
  font-family: 'Raleway', sans-serif;
}

.container { width: 1250px; }
h3 {
  background-color: #D4DAEC;
  text-indent: 50px; 
}
h4 {
  text-indent: 75px;
  margin-top: 35px;
  margin-bottom: 5px;
}
</style>

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE, fig.width=9.5)
```


```{r Libraries, echo=FALSE}
library(janitor) # for clean_names()
library(shiny)
library(kableExtra) # better formatting of tables

# Had issue trying to set WD with Shiny in R project, reset working directory of rproj
wd <- rstudioapi::getActiveProject()

source(paste0(wd, "/R Functions/functions_estimate NDDNQ values.R"))
source(paste0(wd, "/R Functions/functions_QA data.R"))
```


# Load and Tidy Data

## Methylmercury

Define columns variable for repeat use later
```{r}
# Columns to select
columns_select <- c("SampleDate", "StationName", "SourceID", "ReferenceCode", "MDL", "RL", "Unit", "Result", "ResultQualCode", "SiteType", "TargetLatitude", "TargetLongitude", "CropType", "Comments", "WetDryPeriod", "WaterYear", "SoilType")
```


### 2010 TMDL Staff Report Table 6.7 (Agricultural Drain Aqueous Methylmercury Sampling)
```{r}
# Read in data set
tmdlsr <- readxl::read_xlsx(paste0(wd, "/Reeval_Source_Analysis/Source Data/04_Ag/Data/2010 TMDL SR Tbl 6.7_Aq MeHg Data.xlsx"),
                    sheet = 2)
```

Cleaning up data
```{r}
# Clean column names and dates
tmdlsr_clean_format <- tmdlsr %>%
  clean_names() %>%
  mutate(sample_date = as.Date(sample_date, origin = "1899-12-30"))

# Rename columns
tmdlsr_clean1 <- tmdlsr_clean_format %>%
  dplyr::rename(
    StationName = site,
    SampleDate = sample_date,
    Result = me_hg_conc_ng_l,
    Comments = comments
  ) %>%
  add_column(
    ResultQualCode = "=",
    SiteType = "Drain",
    SourceID = "2010 TMDL Staff Report",
    ReferenceCode = NA,
    MDL = NA,
    RL = NA,
    Unit = "ng/L",
    CropType = NA,
    WetDryPeriod = NA,
    WaterYear = NA,
    SoilType = NA,
    TargetLatitude = NA,
    TargetLongitude = NA
  )

# Reorder columns
tmdlsr_final <- tmdlsr_clean1 %>%
  dplyr::select(all_of(columns_select))
```


### 2011 Central Valley Water Board Agricultural Drain Sampling
```{r}
# Read in data set
cvwb <- readxl::read_xlsx(paste0(wd, "/Reeval_Source_Analysis/Source Data/04_Ag/Data/2011 CVWB Drain Sampling_Aq MeHg Data.xlsx"),
                    sheet = 2)
```

Clean up data
```{r}
cvwb_final <- cvwb %>% 
  # Change date format
  mutate(Date_Colle = as.Date(Date_Colle, origin = "1899-12-30")) %>%
  # Change result format
  mutate(MMHg_ng_L = as.numeric(MMHg_ng_L)) %>%
  # Create SourceID
  mutate(SourceID = "CVRWQCB Sampling") %>%  # paste("2011 CVWB Drain Sampling lab number", Lab_Number)) %>% 
  # Rename columns
  dplyr::rename(
    StationName = Station_Na,
    Result = MMHg_ng_L,
    SampleDate = Date_Colle,
    TargetLatitude = Latitude,
    TargetLongitude = Longitude
  ) %>%
  # Add missing columns
  add_column(
    ReferenceCode = NA,
    WaterYear = NA,
    SiteType = "Drain",
    MDL = NA,
    RL = NA,
    ResultQualCode = "=",
    Unit = "ng/L",
    WetDryPeriod = NA,
    SoilType = NA,
    CropType = NA
  ) %>% 
  # Reorder columns to match other tables
  dplyr::select(all_of(columns_select))
```


### EPA Tetra Tech Control Study
```{r}
# Read in data set
epatt <- readxl::read_xlsx(paste0(wd, "/Reeval_Source_Analysis/Source Data/04_Ag/Data/EPA Tetra Tech_Aq MeHg Data.xlsx"),
                    sheet = 2)
```

Clean up data
```{r}
# View column names
colnames(epatt)

# Remove total mercury column "MLML, Total Hg, ng/l"
epatt_mehg <- epatt %>% dplyr::select(-`MLML, Total Hg, ng/l`)

# Remove rows where site type (inflow, outflow, or drain) is not reported
nrow(epatt_mehg)
epatt_sitetype_na <- epatt_mehg %>% 
  filter(is.na(Inflow) & is.na(Outflow) & is.na(Drain))
nrow(epatt_sitetype_na) #only 6 observations without a site type

epatt_sitetype_nona <- epatt_mehg %>%
  filter(!`Sample ID` %in% epatt_sitetype_na$`Sample ID`)
# Should be TRUE
nrow(epatt_sitetype_nona %>% filter(is.na(Inflow) & is.na(Outflow) & is.na(Drain))) == 0

# Pivot table from wide to long format
epatt_long <- epatt_sitetype_nona %>% 
  pivot_longer(cols = c(Inflow, Outflow, Drain),
               names_to = "site_type",
               values_to = "true_false") %>% 
  filter(!is.na(true_false)) %>% 
  dplyr::select(-true_false)

# Clean column names and dates
epatt_clean_format <- epatt_long %>% 
  clean_names() %>% 
  mutate(date_collected = as.Date(date_collected, origin = "1899-12-30"))

# Move ND values to their own columns - later to be one column called "ResultQualCode"
epatt_nd <- epatt_clean_format %>%
  filter(epa_me_hg_ng_l == "ND" | mlml_me_hg_ng_l == "ND") %>% 
  rename(epa_me_hg_ng_l_nd = epa_me_hg_ng_l, 
         mlml_me_hg_ng_l_nd = mlml_me_hg_ng_l)
  
# Join ND table back to main table, now ND are in their own columns and we can average the EPA/MLML samples
epatt_nd_separate <- epatt_clean_format %>%
  filter(epa_me_hg_ng_l != "ND" | mlml_me_hg_ng_l != "ND") %>% 
  full_join(epatt_nd) %>% 
  mutate(epa_me_hg_ng_l = as.numeric(unlist(epa_me_hg_ng_l))) %>% # convert results columns to numeric
  mutate(mlml_me_hg_ng_l = as.numeric(unlist(mlml_me_hg_ng_l)))


# This report had two labs test their samples for MeHg (EPA and MLML). Board staff decided to average the two results to end with one value per sample.
epatt_samples_avg <- 
  mutate(epatt_nd_separate,
         Result = rowMeans(dplyr::select(epatt_nd_separate, c(epa_me_hg_ng_l,mlml_me_hg_ng_l)), na.rm = TRUE))

# Use only "epa_me_hg_ng_l_nd" column for ND because it overlaps with all MLML ND; call the column "ResultQualCode"
# Rename columns
epatt_clean1 <- epatt_samples_avg %>%
  dplyr::rename(
    ResultQualCode = epa_me_hg_ng_l_nd,
    SampleDate = date_collected,
    StationName = site,
    SiteType = site_type,
    CropType = crop_type,
    WetDryPeriod = wet_dry_period,
    Comments = comments
  )

# Add columns for later (some empty for now)
epatt_clean2 <- add_column(
  epatt_clean1,
  SourceID = "EPA Tetra Tech", # paste("EPA Tetra Tech ", epatt_clean1$sample_id),
  ReferenceCode = NA,
  MDL = NA,
  RL = NA,
  Unit = "ng/L",
  TargetLatitude = NA,
  TargetLongitude = NA,
  WaterYear = NA,
  SoilType = NA
)

# Populate "=" in ResultQualCode where there is a value in "Result"
epatt_clean3 <- epatt_clean2 %>%
  mutate(ResultQualCode = case_when(ResultQualCode == 'ND' ~ 'ND',
                                    TRUE ~ '='))

# Rename "NaN" to NA in Result column
epatt_clean3$Result[is.nan(epatt_clean3$Result)]<-NA

# Delete ND columns where there is no given MDL (not reported)
epatt_clean4 <- epatt_clean3 %>% 
  filter(!(is.na(MDL) & is.na(Result)))

# Select columns
epatt_final <- epatt_clean4 %>% 
  dplyr::select(all_of(columns_select))
```


### Delta Farmed Island Study
```{r}
# Read in drain data set
dfis_drain <- readxl::read_xlsx(paste0(wd, "/Reeval_Source_Analysis/Source Data/04_Ag/Data/Delta Farmed Island Study_Aq MeHg Data.xlsx"),
                    sheet = 2)
```

Clean up data
```{r}
# Clean date format
dfis_drain_clean_format <- dfis_drain %>% 
  mutate(Date = as.Date(Date, origin = "1899-12-30"))

# Deal with "<MDL" value in M1 column to pivot the table
# Workaround: sub "<MDL" for 99, later sub 99 for NA
dfis_drain_clean_format$M1 <- gsub("<MDL", 99, dfis_drain_clean_format$M1)
 
# Reformat from short to long, clean names
dfis_drain_long <- dfis_drain_clean_format %>% 
  mutate(M1 = as.numeric(unlist(M1))) %>% 
  pivot_longer(cols = c(W1, N1, S1, M1, T2, J2, B2, St2),
               names_to = "StationName",
               values_to = "Result") %>% 
  filter(!is.na(Result)) %>% # Remove rows where not all drains were sampled on a given date
  #filter(StationName != "B2") %>% # Remove B2 (outliers?)
  clean_names() 

# Add MDL and ResultQualCode columns
# Where <MDL was detected at M1, make MDL = 0.02 and ResultQualCode = ND
dfis_drain_workaround1 <- dfis_drain_long %>%
  filter(result == 99) %>%
  add_column(MDL = 0.02,
             ResultQualCode = "ND") %>% 
  mutate(result = NA)

# Join fixed ND row back into main table
dfis_drain_joined <- dfis_drain_long %>% 
  filter(result != 99) %>% 
  full_join(dfis_drain_workaround1)

# Change format of water years
dfis_drain_clean1 <- dfis_drain_joined %>%
  mutate(water_year = case_when(water_year == "04-05" ~ 2005,
                                water_year == "05-06" ~ 2006,
                                water_year == "06-07" ~ 2007,
                                water_year == "07-08" ~ 2008)) %>% 
  # Add column for crop type, based on Table 1 in final report
  mutate(CropType = case_when(station_name == "W1" ~ "Wheat, Tomatoes, Corn, Safflower", 
                              station_name == "N1" ~ "Alfalfa, Grapes, Corn, Tomatoes",
                              station_name == "M1" ~ "Alfalfa, Grapes, Tomatoes, Wheat",
                              station_name == "S1" ~ "Alfalfa, Safflower, Beans, Tomatoes",
                              station_name == "St2" ~ "Corn, Wheat, Alfalfa",
                              station_name == "T2" ~ "Corn, Wheat, Alfalfa",
                              station_name == "B2" ~ "Corn, Rice, Tomatoes",
                              station_name == "J2" ~ "Corn")) %>% 
  # Add column for soil type, based on Table 1 in final report
  mutate(SoilType = case_when(station_name == "N1" ~ "Mineral",
                              station_name == "M1" ~ "Mineral",
                              station_name == "W1" ~ "Mineral",
                              station_name == "S1" ~ "Mineral",
                              TRUE ~ "Organic")) %>% 
  mutate(ResultQualCode = case_when(ResultQualCode == 'ND' ~ 'ND',
                                    TRUE ~ '='))

# Rename existing columns
dfis_drain_final <- dfis_drain_clean1 %>%
  dplyr::rename(
    StationName = station_name,
    SampleDate = date,
    Result = result,
    WaterYear = water_year
  ) %>%
  # Add missing columns
  add_column(
    ReferenceCode = NA,
    Comments = NA,
    SiteType = "Drain",
    SourceID = "Delta Farmed Island Study Drain Data",
    RL = NA,
    Unit = "ng/L",
    TargetLatitude = NA,
    TargetLongitude = NA,
    WetDryPeriod = NA
  ) %>% 
  # Reorder columns to match other tables
  dplyr::select(all_of(columns_select))
```


### Alpers et al 2014
```{r}
# Read in data set
alpers <- readxl::read_xlsx(paste0(wd, "/Reeval_Source_Analysis/Source Data/04_Ag/Data/Alpers et al 2014_Aq MeHg Data.xlsx"),
                    sheet = 2)
```

```{r}
# Clean names and column types
alpers_clean1 <- alpers %>% 
  clean_names() %>% 
  dplyr::select(station_id, sample_date, time_pst, id, unit, days_flooded, me_hg_u_ng_l_1, mdl, site_type) %>% 
  mutate(sample_date = as.Date(sample_date, origin = "1899-12-30"))%>% 
  mutate(time_pst = case_when(time_pst == "nd" ~ NA_character_,
                              TRUE ~ time_pst),
         days_flooded = case_when(days_flooded == "nd" ~ NA_character_,
                              TRUE ~ days_flooded),
         me_hg_u_ng_l_1 = case_when(me_hg_u_ng_l_1 == "nd" ~ NA_character_,
                              TRUE ~ me_hg_u_ng_l_1)) %>% 
  mutate(time_pst = hm(time_pst),
         days_flooded = as.numeric(days_flooded),
         me_hg_u_ng_l_1 = as.numeric(me_hg_u_ng_l_1)) %>% 
  mutate(ResultQualCode = case_when(is.na(me_hg_u_ng_l_1) ~ "ND",
                                    !is.na(me_hg_u_ng_l_1) ~ "="))

# Add missing columns and rename columns
# Rename columns
alpers_final <- alpers_clean1 %>%
  add_column(
    SourceID = "Alpers et al 2014",
    ReferenceCode = NA,
    RL = NA,
    Unit = "ng/L",
    TargetLatitude = NA,
    TargetLongitude = NA,
    WaterYear = NA,
    SoilType = NA,
    CropType = NA,
    Comments = NA,
    WetDryPeriod = NA
  ) %>% 
  dplyr::rename(
    Result = me_hg_u_ng_l_1,
    SampleDate = sample_date,
    StationName = station_id,
    SiteType = site_type,
    MDL = mdl
    ) %>% 
  dplyr::select(all_of(columns_select))
```


### Eagles-Smith et al 2014
```{r}
# Read in data set
eaglessmith <- readxl::read_xlsx(paste0(wd, "/Reeval_Source_Analysis/Source Data/04_Ag/Data/Eagles-Smith et al 2014_Aq MeHg Data.xlsx"),
                    sheet = 2)
```

Cleaning up data
```{r}
eaglessmith_clean <- eaglessmith %>% 
  # Change date format
  mutate(Date = as.Date(Date, origin = "1899-12-30")) %>%
  # Change result format
  mutate(AQ_uMeHg = as.numeric(AQ_uMeHg)) %>%
  # Filter only for standard harvest fields
  filter(Treatment == "Std harvest") %>% 
  clean_names() %>% 
  # Rename inlet and outlet to match data standard
  mutate(field_location = case_when(field_location == "inlet" ~ "Inflow",
                                    field_location == "outlet" ~ "Outflow"))

eaglessmith_final <- eaglessmith_clean %>% 
  # Rename columns
  dplyr::rename(
    StationName = site,
    Result = aq_u_me_hg,
    SampleDate = date,
    SiteType = field_location
  ) %>%
  # Add missing columns
  add_column(
    ReferenceCode = NA,
    WaterYear = NA,
    MDL = NA,
    RL = NA,
    ResultQualCode = "=",
    Unit = "ng/L",
    WetDryPeriod = NA,
    SoilType = NA,
    CropType = NA,
    TargetLatitude = NA,
    TargetLongitude = NA,
    SourceID = "Eagles-Smith et al 2014",
    Comments = NA
  ) %>%
  # Reorder columns to match other tables
  dplyr::select(all_of(columns_select))
```


### DMCP Review Data Compilation
```{r}
# Read in data set
datacomp <- readxl::read_xlsx(paste0(wd, "/Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/6a_AqMaster_noRepeats.xlsx"))
```

Cleaning up drain data
```{r}
datacomp_years_stations <- datacomp %>%
  # Change date format
  mutate(SampleDate = as.Date(SampleDate, origin = "1899-12-30")) %>% 
  # Filter for stations, consistent with those used in 2010 TMDL Staff Report (see map in subfolder "Final Linkage Aq Data Sampling Locations")
  filter(
    StationName %in% c(
      "Freeport",
      "River Mile 44",
      "Sacramento R @ Freeport",
      "SACRAMENTO R A FREEPORT CA",
      "Sacramento River @ Freeport",
      "Sacramento River @ Greene's Landing",
      "Sacramento River at Freeport",
      "Sacramento River at Freeport (SVWQC)",
      "Sacramento River at Greene's Landing",
      "Sacramento River at River Mile 44",
      "State Water Project",
      "State Water Project @ Tracy"
    )) %>% 
  # Filter for years that overlap with other MeHg data
  filter(year(SampleDate) %in% c(2000, 2003, 2005, 2006, 2007, 2008, 2011, 2014, 2015)) %>% 
  # Filter for months between April and September (consistent with Delta Farmed Island Study)
  filter(month(SampleDate) %in% c(4, 5, 6, 7, 8, 9)) %>%
  # Filter for MeHg samples
  filter(Analyte %in% c("Methylmercury, Total")) %>% 
  # Merge all comments into one Comments column
  mutate(Comments = paste(SampleComments, ResultsComments, BatchComments, sep = " | "),
         SourceID = case_when(SourceID == "CEDENAqSed" ~ "CEDEN",
                              T ~ SourceID),
         Project = case_when(SourceID == "R5AQ" ~ Project,
                             T ~ NA_character_)) %>%
  # Add columns to match other tables
  add_column(SiteType = "Source",
             CropType = NA,
             SoilType = NA) %>%
  # Rename columns to match other tables
  rename(ReferenceCode = Project,
         WaterYear = waterYear,
         WetDryPeriod = Season)

datacomp_final <- datacomp_years_stations %>%
  # Select columns to match other tables
  dplyr::select(all_of(columns_select))
```



### JOIN ALL TABLES
```{r}
mehg_joined <- epatt_final %>%
  full_join(tmdlsr_final) %>%
  full_join(dfis_drain_final) %>%
  full_join(datacomp_final) %>%
  full_join(cvwb_final) %>% 
  full_join(alpers_final) %>% 
  full_join(eaglessmith_final)

# This should be true
nrow(mehg_joined) == nrow(epatt_final) + nrow(tmdlsr_final) + nrow(dfis_drain_final) + nrow(datacomp_final) + nrow(cvwb_final) + nrow(alpers_final) + nrow(eaglessmith_final)
```


#### Estimate ND/DNQ
```{r}
mehgNDDNQs <- robinNDDNQ(mehg_joined, ResultQualCode)
```

View Fitted Distribution Graphs
Remove comment marks (#'s) and run chunk to see fitted distribution graph options
```{r, echo=FALSE}
# ui <- fluidPage(
#   selectInput('distName', label='Select Distribution', choices=names(mehgNDDNQs), selected=1),
#   plotOutput('distPlot')
# )
# 
# server <- function(input, output, session) {
#   output$distPlot <- renderPlot({
#   plot(mehgNDDNQs[[input$distName]]$fitmodel) # view plot fits
#     title(input$distName, outer=T)
# })
# }
# 
# shinyApp(ui, server, options=list(height=500))
```

Select Best Distribution Model
```{r}
mehg_final <- mehgNDDNQs$burr$fitted %>% # Selected distribution based on plot showing best fit to data
  mutate(SourceRow = 1:nrow(.)) %>%
  # Add water year
  mutate(Year = year(SampleDate),
         Month = month(SampleDate),
         WetDryPeriod = case_when(month(SampleDate) >=11 & month(SampleDate) <=12 ~ "Wet1",   # Wet season Nov-Dec - Start of WaterYear
                            month(SampleDate) >=1  & month(SampleDate) <=4  ~ "Wet2",   # Wet season Jan-Apr of Year
                            month(SampleDate) >=5  & month(SampleDate) <=10 ~ "Dry"),   # Dry season May-Oct of Year
         WaterYear = if_else(WetDryPeriod == 'Wet1', Year+1, Year))  # Set WaterYear for "Wet1" to next Year, Set WaterYear for Wet2 & Dry same as Year
```



## Flow
Delta Island Consumptive Use (DICU) flow data, in cubic feet per second (cfs)

### Diversions
```{r}
dicu_diversions <- readxl::read_xlsx(paste0(wd, "/Reeval_Source_Analysis/Source Data/04_Ag/Data/Ag diversion and drainage flow_DICU.xlsx"),
                              sheet = 1)
```

Select column with total diversion flows from all islands
```{r}
DICU <- subset(dicu_diversions, select = c(`DICU-HIST+NODE...256`, Date))

#Rename
DICU <- rename(DICU, c( "Diversions"="DICU-HIST+NODE...256"))

# Change to numeric data type
DICU$Diversions <- as.numeric(DICU$Diversions)
```


### Seepage
```{r}
dicu_seepage <- readxl::read_xlsx(paste0(wd, "/Reeval_Source_Analysis/Source Data/04_Ag/Data/Ag diversion and drainage flow_DICU.xlsx"),
                              sheet = 3)
```

Select column with seepage onto all islands
```{r}
DICU3 <- dicu_seepage %>% 
  dplyr::select(c("DICU-HIST+NODE...255", "...1")) %>% 
  dplyr::rename(Seepage = `DICU-HIST+NODE...255`,
                Date = `...1`) %>% 
  mutate(Seepage = as.numeric(Seepage))
```


### Drainage
```{r}
dicu_drainage <- readxl::read_xlsx(paste0(wd, "/Reeval_Source_Analysis/Source Data/04_Ag/Data/Ag diversion and drainage flow_DICU.xlsx"),
                              sheet = 2)
```

Select column with total drainage flows from all islands
```{r}
DICU2 <- subset(dicu_drainage, select = c(`DICU-HIST+NODE...200`, Date))

#Rename
DICU2 <- rename(DICU2, c("Drainage" = "DICU-HIST+NODE...200"))

#Change to numeric data type
DICU2$Drainage <- as.numeric(DICU2$Drainage)
```


### Merge Diversion, Seepage, and Drainage data
```{r}
DICU_Dataset_final <- DICU %>% right_join(DICU2, by= "Date") %>%
  right_join(DICU3, by= "Date") %>%
  mutate(
    Year=year(Date), # extract parts
    Month=month(Date),
    Day=day(Date),
    # Add Seepage to Diversions, consistent with methods of 2010 TMDL Staff Report
    `Diversions + Seepage` = Diversions + Seepage
  ) %>% 
  filter(Year >= 2000 & Year <= 2019) %>% 
  dplyr::select(c(`Diversions + Seepage`, Diversions, Seepage, Drainage, Date, Year, Month, Day))
```


### TEST: Recreate 2010 TMDL Staff Report Table 6.8
```{r}
recreate_flow <- DICU %>% 
  right_join(DICU2, by= "Date") %>%
  right_join(DICU3, by= "Date") %>%
  mutate(
    Year=year(Date),
    Month=month(Date),
    Day=day(Date),
    `Diversions + Seepage` = Diversions + Seepage) %>% 
  dplyr::select(c(`Diversions + Seepage`, Drainage, Date, Year, Month, Day)) %>% 
  mutate(MonthYear = paste(Month, Year)) %>%
  mutate(DaysInMonth = days_in_month(Month)) %>% # Add column for days in month
  filter(Date >= "1998-10-01" & Date <= "1999-09-30") %>% 
  dplyr::select(MonthYear, `Diversions + Seepage`, Drainage, DaysInMonth) %>%
  dplyr::rename(Period = MonthYear) %>% 
  # Convert from cfs to acre feet/month
  mutate(`Diversions + Seepage` = `Diversions + Seepage` * 1.98 * DaysInMonth,
         Drainage = Drainage * 1.98 * DaysInMonth) %>%
  mutate(NetChannelDepletion = `Diversions + Seepage` - Drainage)
# Recreation matches closely with Table 6.8, with slight difference due to variation in model output
```



# Analyze Data
## Flow
```{r}
# First, select flow month/year combinations of flow that coincide with methylmercury sampling data
mehg_dates <- paste(month(mehg_final$SampleDate), year(mehg_final$SampleDate)) %>%
  unique()

flow_correct_dates <- DICU_Dataset_final %>% 
  mutate(MonthYear = paste(Month, Year)) %>%
  filter(MonthYear %in% mehg_dates) %>%
  # Add water year
  mutate(WetDryPeriod = case_when(Month >=10 & Month <=12 ~ "Wet1",   # Wet season Oct-Dec - Start of WaterYear
                            Month >=1  & Month <=4  ~ "Wet2",   # Wet season Jan-Apr of Year
                            Month >=5  & Month <=9 ~ "Dry"),   # Dry season May-Sept of Year
         WaterYear = if_else(WetDryPeriod == 'Wet1', Year+1, Year)) %>%   # Set WaterYear for "Wet1" to next Year, Set WaterYear for Wet2 & Dry same as Year
  mutate(DaysInMonth = days_in_month(Month)) # Add column for days in month

```


### Visualize Flow Data
```{r}
hist(flow_correct_dates$Drainage, breaks = 20)
```

```{r}
hist(flow_correct_dates$`Diversions + Seepage`, breaks = 20)
```

### Median Flow Data
```{r}
flow_final <- flow_correct_dates %>% 
  group_by(Month) %>% 
  # Convert from cfs to acre feet/month
  summarise(MonthlyDrainFlow = (median(Drainage) * 1.98 * DaysInMonth),
         MonthlyDiversionFlow = (median(`Diversions + Seepage`) * 1.98 * DaysInMonth)) %>%
  ungroup() %>% 
  distinct()
```


## MeHg
### Visualize MeHg Data
```{r}
drain_mehg <- mehg_final %>% 
  filter(SiteType == "Outflow" | SiteType == "Drain")
hist(drain_mehg$Result, breaks = 20)
```

```{r}
source_mehg <- mehg_final %>% 
  filter(SiteType == "Inflow" | SiteType == "Source")
hist(source_mehg$Result, breaks = 20)
```

### Median MeHg Concentrations
```{r}
# Drain
drain_mehg_monthly <- drain_mehg %>% 
  group_by(Month) %>% 
  summarise(MonthlyDrainMedian = median(Result))

# Source
source_mehg_monthly <- source_mehg %>% 
  group_by(Month) %>% 
  summarise(MonthlySourceMedian = median(Result))
```

## Save Data used for Calculations
```{r}
mehg_save <- mehg_final %>% 
  select(SourceID, ReferenceCode, StationName, SampleDate, Result, Unit, MDL, RL, ResultQualCode, Censored) %>% 
  
  mutate(Diversions=NA_real_,
         Seepage = NA_real_,
         Drainage = NA_real_,
         .before="Unit")

flow_save <- flow_correct_dates %>% 
  rename(SampleDate=Date) %>% 
  mutate(SourceID="DICU",
         ReferenceCode = NA,
         StationName=NA,
         Result=NA,
         Unit="cfs",
         MDL=NA,
         RL=NA,
         ResultQualCode=NA,
         Censored=NA) %>% 
  select(SourceID, ReferenceCode, StationName, SampleDate, Result, Diversions, Seepage, Drainage, Unit, MDL, RL, ResultQualCode, Censored)

ag_load_data <- mehg_save %>% 
  bind_rows(., flow_save) %>% 
  arrange(desc(Unit), SourceID, SampleDate)

writexl::write_xlsx(list("6.2.5 Ag Return Waters" = ag_load_data),
                    path=paste0(wd, "/Reeval_Source_Analysis/Source Data/04_Ag/Output/Appdx A_Data Compilation_Ag Return Waters.xlsx"))
```

## Load calculations
```{r}
annual_median <- median(source_mehg_monthly$MonthlySourceMedian)
no_irrig <- source_mehg_monthly %>%
  filter(Month %in% c(1, 2, 10, 11, 12))
no_irrig_median <- median(no_irrig$MonthlySourceMedian)

ag_loads <- flow_final %>%
  full_join(drain_mehg_monthly) %>%
  full_join(source_mehg_monthly) %>%
  mutate(MonthlySourceMedian = case_when(Month == 3 ~ no_irrig_median,
                                         TRUE ~ MonthlySourceMedian)) %>%
  mutate(MonthlyDrainLoad = MonthlyDrainFlow * MonthlyDrainMedian,
         MonthlySourceLoad = MonthlyDiversionFlow * MonthlySourceMedian) %>%
  # Adjust units so af/L cancels out and results in ng/month
  mutate(MonthlyLoad = (MonthlyDrainLoad - MonthlySourceLoad) * 1233000) %>% 
  # Divide by 1000000000 to get g/month
  mutate(MonthlyLoadGMonth = MonthlyLoad/1000000000)

# Calculate annual load by adding together all 12 monthly loads, result is in g/yr
annual_load <- sum(ag_loads$MonthlyLoadGMonth)
```



# Recreate Tables (redesigned to better show updated data & analyses)
## Farmed Delta Island MeHg Conc.
```{r}
Farmed_Delta_Islands <- ag_loads %>%
  mutate(`Source Water MeHg` = round(MonthlySourceMedian, digits = 3),
         `Return Water MeHg` = round(MonthlyDrainMedian, digits = 3),
         Month = month.abb[Month]) %>% 
  dplyr::select(Month, `Source Water MeHg`, `Return Water MeHg`)


Farmed_Delta_Islands %>% 
  kbl(align='lcc', caption="Farmed Delta Island MeHg Conc.") %>%
  kable_paper('hover', full_width=T) %>% 
  kable_styling(fixed_thead = T)

writexl::write_xlsx(Farmed_Delta_Islands, paste0(wd, "/Reeval_Source_Analysis/Source Data/04_Ag/Output/Farmed Delta Island Monthly MeHg Conc.xlsx"))
```

## DICU Estimates (2010 Staff Report Table 6.8)
```{r}
# In acre feet/month, presented as monthly medians
DICU_est <- ag_loads %>% 
  dplyr::select("Month", "MonthlyDiversionFlow", "MonthlyDrainFlow") %>% 
  mutate(`Net Channel Depletion` = MonthlyDiversionFlow - MonthlyDrainFlow,
         Month = month.abb[Month]) %>% 
  rename(`Diversions + Seepage` = MonthlyDiversionFlow,
         `Return Flow` = MonthlyDrainFlow) %>% 
  adorn_totals(where = "row", name = "Annual Totals")


DICU_est %>% 
  kbl(align='lcc', caption="DICU Estimates") %>%
  kable_paper('hover', full_width=T) %>% 
  kable_styling(fixed_thead = T)

writexl::write_xlsx(DICU_est, paste0(wd, "/Reeval_Source_Analysis/Source Data/04_Ag/Output/DICU Estimates_monthly net channel depletion.xlsx"))
```

## Monthly Gross Ag Returns
```{r}
Ag_Loads <- ag_loads %>% 
  dplyr::select("Month", "MonthlySourceLoad", "MonthlyDrainLoad", "MonthlyLoadGMonth") %>% 
  mutate(Month = month.abb[Month],
         MonthlySourceLoad = round((MonthlySourceLoad * 1233000/1000000000), digits = 3),
         MonthlyDrainLoad = round((MonthlyDrainLoad * 1233000/1000000000), digits = 3),
         MonthlyLoadGMonth = round(MonthlyLoadGMonth, digits = 3)) %>% 
  adorn_totals(fill = " ") %>% 
  rename(`Source Water Load (g/month)` = MonthlySourceLoad,
         `Return Water Load (g/month)` = MonthlyDrainLoad,
         `Net Load (g/month)` = MonthlyLoadGMonth)


Ag_Loads %>% 
  kbl(align='lcc', caption="Monthly Ag Loads") %>%
  kable_paper('hover', full_width=T) %>% 
  kable_styling(fixed_thead = T)

writexl::write_xlsx(Ag_Loads, paste0(wd, "/Reeval_Source_Analysis/Source Data/04_Ag/Output/Monthly Ag Loads within Delta Boundary.xlsx"))
```



*****CODE BELOW NOT USED IN FINAL REPORT, COMMENT OUT IF YOU DON'T WANT IT RUN*****
## TEST: Means instead of medians
```{r}
test_flow_final <- flow_correct_dates %>% 
  group_by(Month) %>% 
  # Convert from cfs to acre feet/month
  summarise(MonthlyDrainFlow = (mean(Drainage) * 1.98 * DaysInMonth),
         MonthlyDiversionFlow = (mean(`Diversions + Seepage`) * 1.98 * DaysInMonth)) %>%
  ungroup() %>% 
  distinct()

# mean volumes in acre feet/year
test_mean_volume_drain <- sum(test_flow_final$MonthlyDrainFlow)
test_mean_volume_drain

test_mean_volume_diversion <- sum(test_flow_final$MonthlyDiversionFlow)
test_mean_volume_diversion

# test mean volume with median concentration
mixed_ag_loads <- test_flow_final %>%
  full_join(drain_mehg_monthly) %>%
  full_join(source_mehg_monthly) %>%
  mutate(MonthlySourceMedian = case_when(Month == 3 ~ no_irrig_median,
                                         TRUE ~ MonthlySourceMedian)) %>%
  mutate(MonthlyDrainLoad = MonthlyDrainFlow * MonthlyDrainMedian,
         MonthlySourceLoad = MonthlyDiversionFlow * MonthlySourceMedian) %>%
  # Adjust units so af/L cancels out and results in ng/month
  mutate(MonthlyLoad = (MonthlyDrainLoad - MonthlySourceLoad) * 1233000) %>% 
  # Divide by 1000000000 to get g/month
  mutate(MonthlyLoadGMonth = MonthlyLoad/1000000000)
# Calculate annual load by adding together all 12 monthly loads, result is in g/yr
mixed_annual_load <- sum(mixed_ag_loads$MonthlyLoadGMonth)

### Average MeHg Concentrations
# Drain
test_drain_mehg_monthly <- drain_mehg %>% 
  group_by(Month) %>% 
  summarise(MonthlyDrainMean = mean(Result))

# Source
test_source_mehg_monthly <- source_mehg %>% 
  group_by(Month) %>% 
  summarise(MonthlySourceMean = mean(Result))


## Load calculations
test_annual_mean <- mean(test_source_mehg_monthly$MonthlySourceMean)
test_no_irrig <- test_source_mehg_monthly %>%
  filter(Month %in% c(1, 2, 10, 11, 12))
test_no_irrig_mean <- mean(test_no_irrig$MonthlySourceMean)

test_ag_loads <- test_flow_final %>%
  full_join(test_drain_mehg_monthly) %>%
  full_join(test_source_mehg_monthly) %>%
  mutate(MonthlySourceMean = case_when(Month == 3 ~ test_no_irrig_mean,
                                         TRUE ~ MonthlySourceMean)) %>%
  mutate(MonthlyDrainLoad = MonthlyDrainFlow * MonthlyDrainMean,
         MonthlySourceLoad = MonthlyDiversionFlow * MonthlySourceMean) %>%
  # Adjust units so af/L cancels out and results in ng/month
  mutate(MonthlyLoad = (MonthlyDrainLoad - MonthlySourceLoad) * 1233000) %>% 
  # Divide by 1000000000 to get g/month
  mutate(MonthlyLoadGMonth = MonthlyLoad/1000000000)
# Calculate annual load by adding together all 12 monthly loads, result is in g/yr
test_annual_load <- sum(test_ag_loads$MonthlyLoadGMonth)

## Estimate Load North of Legal Delta Boundary
test_area_delta <- 485182.025
test_area_north_boundary <- 13013.971

# Below is in g/acre/year
test_annual_rate <- test_annual_load/test_area_delta
test_north_load <- test_annual_rate * test_area_north_boundary


# Get rates in ng/m2/day (for mean/median comparison table)
area_delta_m2 <- test_area_delta * 4047

median_rate <- annual_load/area_delta_m2/365 * 1000000000
  
mean_rate <- test_annual_load/area_delta_m2/365 * 1000000000
```
## Error: <text>:18:1: unexpected '<'
## 17: 
## 18: <
##     ^

The R session information (including the OS info, R version and all packages used):

    sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8  LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lubridate_1.8.0    plotly_4.10.0      readxl_1.4.0       actuar_3.2-2      
##  [5] NADA_1.6-1.1       forcats_0.5.1      stringr_1.4.0      dplyr_1.0.9       
##  [9] purrr_0.3.4        readr_2.1.2        tidyr_1.2.0        tibble_3.1.7      
## [13] ggplot2_3.3.6      tidyverse_1.3.1    fitdistrplus_1.1-8 survival_3.4-0    
## [17] MASS_7.3-58.1     
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-45    assertthat_0.2.1   digest_0.6.29      utf8_1.2.2        
##  [5] R6_2.5.1           cellranger_1.1.0   backports_1.4.1    reprex_2.0.1      
##  [9] evaluate_0.15      httr_1.4.3         highr_0.9          pillar_1.7.0      
## [13] rlang_1.0.2        lazyeval_0.2.2     rstudioapi_0.13    data.table_1.14.2 
## [17] Matrix_1.5-1       rmarkdown_2.14     labeling_0.4.2     splines_4.2.2     
## [21] htmlwidgets_1.5.4  munsell_0.5.0      broom_0.8.0        compiler_4.2.2    
## [25] modelr_0.1.8       xfun_0.31          pkgconfig_2.0.3    htmltools_0.5.2   
## [29] tidyselect_1.1.2   viridisLite_0.4.0  fansi_1.0.3        crayon_1.5.1      
## [33] tzdb_0.3.0         dbplyr_2.2.0       withr_2.5.0        grid_4.2.2        
## [37] jsonlite_1.8.0     gtable_0.3.0       lifecycle_1.0.1    DBI_1.1.2         
## [41] magrittr_2.0.3     scales_1.2.0       writexl_1.4.0      cli_3.3.0         
## [45] stringi_1.7.6      farver_2.1.0       fs_1.5.2           xml2_1.3.3        
## [49] ellipsis_0.3.2     generics_0.1.2     vctrs_0.4.1        expint_0.1-7      
## [53] RColorBrewer_1.1-3 tools_4.2.2        glue_1.6.2         hms_1.1.1         
## [57] yaml_2.3.5         fastmap_1.1.0      colorspace_2.0-3   rvest_1.0.2       
## [61] knitr_1.39         haven_2.5.0
    Sys.time()
## [1] "2023-12-27 10:28:04 PST"