This report is automatically generated with the R package knitr (version 1.40) .

---
title: 'Delta Hg TMDL Review'
subtitle: 'Linkage Analysis - Aq MeHg vs. Black Bass THg'
author: "Mercury Program and Basin Planning Unit"
date: "8/4/2021"
output:
  html_document:
    code_folding: show
    toc: TRUE
    toc_float: TRUE
    toc_depth: 3
runtime: shiny
assets:
  css:
    - "http://fonts.googleapis.com/css?family=Raleway:300"
    - "http://fonts.googleapis.com/css?family=Oxygen"
---

<style>
body{
  font-family: 'Oxygen', sans-serif;
  font-size: 16px;
  line-height: 24px;
}

h1,h2,h3,h4 {
  font-family: 'Raleway', sans-serif;
}

.container { width: 1250px; }
h3 {
  background-color: #D4DAEC;
  text-indent: 50px; 
}
h4 {
  text-indent: 75px;
  margin-top: 35px;
  margin-bottom: 5px;
}
</style>

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE, fig.width=9.5)
```

```{r Libraries, include=FALSE, message=FALSE}
library(knitr)      # generate html tables for this report
library(kableExtra) # better formatting of tables
library(DT)         # creates sortable tables
library(shiny)
library(matrixStats) # weightedMedian

# Having issue with runtime:shiny resetting the WD of rproj so need to use setwd()
wd <- rstudioapi::getActiveProject()
setwd(wd)

source("R Functions/functions_estimate NDDNQ values.R") # Load first so select() isn't masked from dplyr
source("R Functions/functions_QA data.R")
source("R Functions/function_predictionModels.R")
source("R Functions/function_predictionModelPlot.R")
source("R Functions/function_AqSampleFishModelPairing.R")


# Function to round values in a dataframe
signif_df <- function(x, digits) {
  # round numeric columns
  # x = data frame 
  # digits = number of digits to round
  numeric_columns <- sapply(x, mode) == 'numeric'
  x[numeric_columns] <-  signif(x[numeric_columns], digits)
  x
}
```

## Load Data
```{r}
setwd(wd)

# Load Data from "eval1_TLG Eval & BB Impl Goal"
load('Reeval_Impl_Goals_Linkage_Analysis/eval1_TLG Eval & BB Impl Goal/Output/blackBass_2000.RData') # loads "blackBass_2000" ~ Final Black Bass raw data
load('Reeval_Impl_Goals_Linkage_Analysis/eval1_TLG Eval & BB Impl Goal/Output/BlkBassStd350_FinalModels_2000.RData') # loads "BlkBassStd350_FinalModels_2000" ~ Final Black Bass Std 350 [Hg] Models for each Year
load('Reeval_Impl_Goals_Linkage_Analysis/eval1_TLG Eval & BB Impl Goal/Output/BBImpGoal.RData') # loads "BBImpGoal" loads final Black Bass Implementation Goal (calculated using Black Bass data weighted average by year for each subarea)


# Load Aqueous Data
aqdf <- loadData("Reeval_Impl_Goals_Linkage_Analysis/Data/Aqueous/6a_AqMaster_noRepeats.xlsx")

# Convert columns that are character but should be numeric or date
aqdf <- chara_to_NumDate(aqdf)
```



# Aqueous Data QC

**This markdown, shows the finalized results from "Test Different Linkage Analysis Aq & BB Year Pairings" markdown, which evaluated data year range selection and which statistic to use for the linkage model.**

## View Unique Factors
```{r}
aqdf %>%
  mutate(AnalyteUnitMatrix = paste(Analyte, '-', Unit, '-', MatrixName)) %>% 
  unique_factors(SourceID, Subarea, AnalyteUnitMatrix, WBT)
```

## Change X2 Subarea to West Delta
```{r}
aqData1 <- aqdf %>% 
  mutate(Subarea = recode(Subarea, "X2" = "West Delta"), # Change X2 to West Delta, consistent with the 2010 TMDL Staff Report
         Type = 'Aqueous',
         Linkage = case_when(SampleDate >= "2000-03-01" & SampleDate < "2000-11-01" & grepl("Delta Mendota Canal @ Tracy|Freeport|Mokelumne-Co|Mokelumne River d/s I-5|Greene's Landing|Vernalis|State Water Project|X2", StationName) ~ '2010 Staff Report',
         T ~ 'Updated')) # Add column to distinguish Updated Linkage data from 2010 Staff Report Linkage data
```

## Select "Methylmercury, Total", "ng/L", "Aqueous"
```{r}
# Select only aqueous MeHg as in 2010 TMDL Staff Report
aqData2 <- aqData1 %>% 
  filter(Analyte     == 'Methylmercury, Total',
         Unit        == 'ng/L',
         MatrixName  == 'Aqueous')
```


## View Factors for Final Data Selection
```{r}
aqData2 %>%
  mutate(AnalyteUnitMatrix = paste(Analyte, '-', Unit, '-', MatrixName)) %>% 
  unique_factors(SourceID, Subarea, AnalyteUnitMatrix, WBT)
```


## Estimate ND & DNQ Values
```{r}
aqNDDNQs <- robinNDDNQ(aqData2, ResultQualCode)
```


### View Fitted Distribution Graphs
```{r, echo=FALSE}
ui <- fluidPage(
  selectInput('distName', label='Select Distribution', choices=names(aqNDDNQs), selected=1),
  plotOutput('distPlot')
)

server <- function(input, output, session) {
  output$distPlot <- renderPlot({
  plot(aqNDDNQs[[input$distName]]$fitmodel) # view plot fits
    title(input$distName, outer=T)
})
}

shinyApp(ui, server, options=list(height=500))
```


### Select Best Distribution Model
```{r}
aqData3 <- aqNDDNQs$llogis$fitted  # Selected distribution - primarily based on CDF & P-P plot showing best fit to data
```

## Check for Outliers
e.g., orders of magnitude maybe issues with unit conversion
```{r}
# View Data for Entire Delta
result_time_Plotly(aqData3 %>% filter(Censored == F),
                   groupByCol=TMDL, showMean=F, interactive=T, logscale=T, showLegend=F)
```

```{r, fig.height=8.5}
# View Data for Each Subarea
result_time_Plotly(aqData3 %>% filter(Censored == F),
                   groupByCol=Subarea, showMean=T, interactive=T, logscale=T, showLegend=F)
```

```{r}
# View NDs & DNQs
result_time_Plotly(aqData3 %>% filter(Censored == T),
                   groupByCol=TMDL, showMean=F, interactive=T, logscale=T, showLegend=F)
```

## Finalize QC & Data Selection
```{r}
# Select DRMP Data
aqDRMP_data <- aqData3 %>%
  filter(grepl('DRMP', SourceID, ignore.case=T),
               !grepl('YB', Subarea, ignore.case=T)) %>% # remove Yolo Bypass (north removed from Black Bass during TLG Eval). South Yolo Bypass was originally included in year range and statistic evaluations but removed as it appears to be an outlier attributed to different hydrologic conditions and resulting fish Hg exposure compared to the other subareas (which contain fish year round).
  arrange(Subarea, SampleDate)
```

### Black Bass Data Selection
```{r}
# Add Linkage Designation to raw data
bbData_2000_2019 <- blackBass_2000 %>%
   mutate(Linkage = case_when(SampleDate >= as.Date("2000-08-01") & SampleDate < as.Date("2000-10-01") & grepl("544ST0909|Mokelumne River/d/s Cosumnes River|544ST0934|Isleton|510ST1312|Vernalis|541ST1481|510ST1666|510ST1304", StationName) ~ '2010 Staff Report',
         T ~ 'Updated'), # Add column to distinguish Updated Linkage data from 2010 Staff Report Linkage data
         Type = case_when(Linkage == "2010 Staff Report" ~ "Largemouth Bass",
                          T ~ "Black Bass")) 

bbData_2016_2019 <- bbData_2000_2019 %>% 
  filter(Year >= 2016)

# model result data
BlkBassStd350_FinalModels_2000 <- BlkBassStd350_FinalModels_2000 %>% 
  filter(Year >= 2016)
```


### Aq & BB Data Sig Fig Count
```{r}
# Function to Find # of Sig Figs - does not count trailing 0's but those don't exist from excel anyway
sigdigs <- function(x){
  orig_scipen <- getOption("scipen")
  options(scipen = 999)
  on.exit(options(scipen = orig_scipen))
  
  x <- as.character(x)
  x <- sub("\\.", "", x)
  x <- gsub("(^0+|0+$)", "", x)
  nchar(x)
}

aqData_decimals <- aqDRMP_data %>%
  mutate(SigFigs = map_dbl(aqDRMP_data$Result, sigdigs),
         Type = 'Aqueous') %>% 
  select(SigFigs, Type)

blackBass_decimals <- bbData_2016_2019 %>%
  mutate(SigFigs = map_dbl(bbData_2016_2019$Result, sigdigs),
         Type = 'Black Bass') %>% 
  select(SigFigs, Type)



aqData_decimals %>% 
  bind_rows(blackBass_decimals) %>% 
  count(Type, SigFigs) %>% 
  rename(Occurences = n) %>% 
  kbl(align='lcc', caption="Significant Figure Count - Does not count trailing 0's") %>%
  kable_paper('hover', full_width=T) %>% 
  kable_styling(fixed_thead = T)

# Aqueous data predominantly at least 2 sig figs, so round Aq Impl goal to 2 sig figs (15 sig figs are estimated ND/DNQ values)
# Black data predominantly 3 sig figs, so round BB Impl goal to 3 sig figs

sigdigs(BBImpGoal)
```


### Save Linkage Model Aq & BB Data Sets & Sampling Locations
```{r}
setwd(wd)

aqDRMP_data_save <- aqDRMP_data %>% 
  select(Subarea, SourceID, Project, StationName, SampleDate, Analyte, Unit, Result, MDL, RL, ResultQualCode, Censored) %>%
  rename(ReferenceCode=Project) %>% 
  mutate(SourceID = case_when(SourceID == "CEDENAqSed" ~ "CEDEN",
                              T ~ SourceID),
         ReferenceCode = case_when(SourceID == "R5AQ" ~ ReferenceCode,
                                   T ~ NA_character_)) %>% 
  arrange(Subarea, SampleDate, StationName)

bbData_2016_2019_save <- bbData_2016_2019 %>% 
  select(Subarea, SourceID, ProjectCode, StationName, SampleDate, Analyte, Unit, Result, MDL, RL, ResultQualCode, Censored, CommonName, TLAvgLength, TissueName, NumberFishPerComp) %>%
  rename(ReferenceCode=ProjectCode,
         TLAvgLength_mm = TLAvgLength) %>% 
  mutate(SourceID = case_when(SourceID == "CEDENFish" ~ "CEDEN",
                              T ~ SourceID),
         ReferenceCode = case_when(SourceID == "R5MF" ~ ReferenceCode,
                                   T ~ NA_character_)) %>% 
  arrange(Subarea, SampleDate, StationName)

# Save Linkage Model Data Sets
writexl::write_xlsx(list("Linkage Model Aq Data" = aqDRMP_data_save,
                         "Linkage Model BB Data"=bbData_2016_2019_save),
                    path='Reeval_Impl_Goals_Linkage_Analysis/eval2_Linkage & Aq Impl Goal/Output/Appdx A_Data Compilation_Linkage Model.xlsx')


# Save Linkage Model Sampling Locations
aq_SampLoc <- aqDRMP_data %>% # Type & Linkage columns previously added when creating "aqData2"
  mutate(uniqName = paste(StationName, Linkage, round(TargetLatitude,4), round(TargetLongitude,4))) %>%  # argetLat & TargetLong needs to be the same code used in Step 3 Script when creating variable "AqMaster" in mutate(uniqName = ...)
  distinct(uniqName, .keep_all=T) %>%
  select(Subarea, StationName, Type, Linkage, TargetLatitude, TargetLongitude, CoordSystem, uniqName)

bb_SampLoc <- bbData_2016_2019 %>%
  mutate(uniqName = paste(StationName, Linkage, round(TargetLatitude,4), round(TargetLongitude,4))) %>%  # TargetLat & TargetLong needs to be the same code used in Step 3 Script when creating variable "AqMaster" in mutate(uniqName = ...)
  distinct(uniqName, .keep_all=T) %>%
  select(Subarea, StationName, Type, Linkage, TargetLatitude, TargetLongitude, CoordSystem, uniqName)


SampLoc <- aq_SampLoc %>%
  bind_rows(., bb_SampLoc) %>%         # combine BB & Aqueous sampling locations
  arrange(Type, Subarea, Linkage)

writexl::write_xlsx(SampLoc, path='Reeval_Impl_Goals_Linkage_Analysis/eval2_Linkage & Aq Impl Goal/Output/Aq&BB_SampleLocations.xlsx')
```



# UPDATED LINKAGE GRAPH 

## 5yr Aq DRMP Data Overlap (2016-2019) vs. DRMP Black Bass (2016-2019) by Subarea
```{r}
Linkage.DRMPAq.DRMPBByr <- AqSampleFishModelPairing(aqData=aqDRMP_data,
                                                   fishModels=BlkBassStd350_FinalModels_2000,
                                                   bylocationColumn=Subarea,
                                                   aqYearColumn=seasonYear,
                                                   aqOverlapYrs=5)

Linkage.DRMPAq.DRMPBByr.Subarea <- Linkage.DRMPAq.DRMPBByr %>% 
  group_by(Subarea) %>%
  mutate(sampleYears = aq_sampleYears[which.max(aq_n)]) %>%
  ungroup() %>% 
  group_by(Subarea, sampleYears) %>% 
  summarize(n = n(),
            Std350_Median             = median(Stdz350),
            Aq_Median_ofPooledMedians = median(aq_Median_pool),
            UsedInLinkage = 'Yes',
            .groups = 'drop')


updated_linkage <- predictionModels(Linkage.DRMPAq.DRMPBByr.Subarea,
                                    .knownYcol=Aq_Median_ofPooledMedians,
                                    .knownXcol=Std350_Median,
                                    knownX=BBImpGoal) %>% 
  rename(aqMeHgSafeConc = predictedY) %>%
  mutate(aqMeHgSafeConc = signif(aqMeHgSafeConc, 2)) %>% 
  ungroup %>% 
  head(1)   # select best model. in this case it is log
```

### Linkage Graph
```{r}
title  <- ""
xTitle <- 'Median Hg Conc. of Standardized 350 mm Black Bass (mg/Kg)'
yTitle <- 'Median of Pooled Medians of Unfiltered Aqueous MeHg (ng/L)'
    
  # extract data and model
  dataframe <- updated_linkage$data[[1]]

  dfKnownX  <- updated_linkage$knownX
  
  
  # Regression Model 
  model     <- updated_linkage$Model[[1]]
  modelName <- updated_linkage$ModelName
  
  model_predict <- tibble(Std350_Median = seq(floor(  min(dataframe %>% pull(Std350_Median), dfKnownX)*95)/100,
                                               ceiling(max(dataframe %>% pull(Std350_Median), dfKnownX)*105)/100,
                                               length.out=500))
  model_predictUpdate <- model_predict %>%
    mutate(Aq_Median_ofPooledMedians = case_when(modelName == 'pwr' | modelName == 'exp' ~ exp(predict(model, model_predict %>% select(Std350_Median))),
                                      T ~ predict(model, model_predict %>% select(Std350_Median))))
  
  
  #ggplot shapes & colors
  Subareas <- sort(unique(BlkBassStd350_FinalModels_2000$Subarea))
  mySubareashapes <- c(15, 16, 17, 18, 8, 22, 21, 23, 24, 4)
  Subareashapes   <- setNames(mySubareashapes[1:length(Subareas)], Subareas)
  
  # color safe palette - https://personal.sron.nl/~pault/#sec:qualitative
  mySubareacolors <- c('#0077BB','#33BBEE','#009988','#EE7733','#CC3311','#EE3377', '#919191') # Changed '#BBBBBB' light gray to '#919191' darker gray
  Subareacolors   <- setNames(mySubareacolors[1:length(Subareas)], Subareas)
  
  used_colors_orig <- Subareacolors[unique(dataframe %>% pull(Subarea))]
  used_shapes_orig <- Subareashapes[unique(dataframe %>% pull(Subarea))]
  
  # Graph Limits
  xMax <- ceiling(max(model_predictUpdate[[1]])*10)/10   
  yMax <- ceiling(max(model_predictUpdate[[2]], max(dataframe %>% select(Aq_Median_ofPooledMedians)))*10)/10
  

  #Plot Data Points & Regression Line
  regressionGraph <- ggplot() +
    ggtitle(title) +
    xlab(xTitle) +
    ylab(yTitle) +
  
    # Data Points
    geom_point(data=dataframe, aes(x=Std350_Median, y=Aq_Median_ofPooledMedians, color=Subarea, shape=Subarea),
               alpha=1,
               size=4, stroke=1.5,
               show.legend=T) +
    # Regression Line
    geom_point(data=model_predictUpdate, aes(x=Std350_Median, y=Aq_Median_ofPooledMedians),
               color='grey20',
               size=.3,
               show.legend=F) +
    # Text - Regression Equation
    annotate("text", x=1.05, hjust=0, y=.1+.004,
             label=paste0("italic(y)==", round(model$coefficients['(Intercept)'],3), "~'+'~", round(model$coefficients['log(Std350_Median)'],4), "~'*'~'ln('*italic(x)*')'"), parse=T) +
    
    # Text - Regression Model SER (Standard Error of Regression)
    annotate("text", x=1.05, hjust=0, y=.1-.003, # updated_linkage %>% pull(aqMeHgSafeConc)*1.9,
             label=paste0("italic(~SER) == ", sigfig(updated_linkage$SER,3)), parse=T) +
    
    #Scale & Theme
    scale_x_continuous(expand = c(0, 0), limits = c(0, xMax)) +
    scale_y_continuous(expand = c(0, 0), limits = c(0, yMax)) +
    scale_color_manual(values = used_colors_orig) +
    scale_shape_manual(values = used_shapes_orig) +
    theme_light() +
    theme(text = element_text(size=14, family="sans"),
          axis.title=element_text(size=12, face="bold"),
          axis.text.x = element_text(size=10.5, angle = 0, hjust = 0.5),
          panel.grid.major.y=element_line(size=1),
          panel.grid.minor=element_blank(),
          legend.text = element_text(size = 14),
          legend.title = element_text(size = 14, face="bold"))

  regressionGraph
  
  #Add Prediction Line
  predictionGraph <- regressionGraph +
    #Vertical Line
    annotate(geom="segment", size=0.8, arrow=arrow(type="open", length=unit(0.02, "npc")), x=updated_linkage$knownX, xend=updated_linkage$knownX, y=0, yend=updated_linkage %>% pull(aqMeHgSafeConc), color='grey20') +
    #Vertical Line Text - Known X
    annotate("text", x=0.27, hjust=0, y=0.02,
             label=paste0("italic(x)== ", updated_linkage$knownX, "[~Black~Bass~Implementation~Goal]"), parse=T, color='grey20') +
    
    #Horizontal Line
    annotate(geom="segment", size=0.8, arrow=arrow(type="open", length=unit(0.02, "npc")), x=updated_linkage$knownX, xend=0, y=updated_linkage %>% pull(aqMeHgSafeConc), yend=updated_linkage %>% pull(aqMeHgSafeConc), color='grey20') +
    #Horizontal Line Text - Predicted Y
    annotate("text", x=.027, hjust=0, y=updated_linkage %>% pull(aqMeHgSafeConc) + .004, label=paste0('italic(y)== ', sigfig(updated_linkage %>% pull(aqMeHgSafeConc), 3)), parse=T, color='grey20')
  
  predictionGraph
  
  ggsave(filename = paste0(wd, '/Reeval_Impl_Goals_Linkage_Analysis/eval2_Linkage & Aq Impl Goal/Output/Fig 5.2 Updated Linkage Analysis.png'),
       plot = predictionGraph,
       width = 9,
       height = 5.75,
       units = "in",
       dpi = 125)

```

### Linkage Data Table
```{r}
Linkage.DRMPAq.DRMPBByr.Subarea %>% 
  kbl(align='lccc', caption='Linkage Data by Subarea') %>%
  kable_paper('hover', full_width=T) %>%
  kable_styling(fixed_thead = T)


Linkage.DRMPAq.DRMPBByr %>% 
  select(Subarea, Year, n, Stdz350, aq_sampleYears, aq_n, aq_Median_pool) %>% 
  kbl(align='lcccccc', caption='Linkage Data used to find Medians by Subarea') %>%
  kable_paper('hover', full_width=T) %>%
  kable_styling(fixed_thead = T)
```



# UPDATED LINKAGE vs. 2010 Staff Report LINKAGE
## Prepare 2010 Staff Report Linkage Data
```{r}
LMBgoal <- 0.24

# Summarize Aq data to Recreate Orig Linkage
Linkage2010_aqData <- aqData3 %>% 
  filter(Linkage == "2010 Staff Report") %>% 
  substituteNDDNQ(ResultQualCode, MDL.RL.fraction=.5)

aq_origLinkage1 <- Linkage2010_aqData %>%
  group_by(Subarea, Unit, month(SampleDate)) %>% 
  summarise(n = n(),
            Avg = mean(Result),
            .groups='drop')
aq_origLinkage <- aq_origLinkage1 %>%
  group_by(Subarea, Unit) %>% 
  summarise(Aq_n = sum(n),
            Aq_Mean = mean(Avg),
            Aq_Median = median(Avg),
            .groups='drop') %>% 
  rename(Aq_Unit = Unit)



# Summarize Fish Data to Recreate Orig Linkage
Linkage2010_lmbData <- bbData_2000_2019 %>% filter (Linkage == "2010 Staff Report") %>%
  substituteNDDNQ(ResultQualCode, MDL.RL.fraction=.5)

Linkage2010_lmbData_350models <- Linkage2010_lmbData %>%
  add_count(Subarea) %>%
  group_by(Subarea, Analyte, Unit) %>%
  nest %>%
  mutate(PwrReg = purrr::map(.x = data, .f = ~lm(log(Result) ~ log(TLAvgLength), data=.))) %>%     # helpful link showing power regression in R (equivalent to power regression used in Excel) https://stackoverflow.com/questions/18305852/power-regression-in-r-similar-to-excel
  mutate(Stand350_pwrReg = purrr::map_dbl(.x = PwrReg, .f = ~signif(exp(summary(.x)$coefficients[1]), 1) * 350^summary(.x)$coefficients[2])) %>%
  ####
  mutate(ExpoReg = purrr::map(.x = data, .f = ~lm(log(Result) ~ TLAvgLength, data=.))) %>% 
  mutate(Stand350_expoReg = purrr::map_dbl(.x = ExpoReg, .f = ~signif(exp(summary(.x)$coefficients[1]), 3) * exp(signif(summary(.x)$coefficients[2], 2) * 350) )) %>%
  ####
  mutate(Rsq_pwrReg = purrr::map_dbl(.x = PwrReg, .f = ~summary(.x)$r.squared)) %>%
  mutate(Rsq_expoReg = purrr::map_dbl(.x = ExpoReg, .f = ~summary(.x)$r.squared)) %>%
  select(-PwrReg, -ExpoReg) %>%
  unnest(cols = Stand350_pwrReg) %>%
  ungroup() %>% 
  arrange(Subarea)

Linkage2010_lmbData_350 <- Linkage2010_lmbData_350models %>% 
  select(-starts_with("Rsq")) %>% 
  pivot_longer(
    cols = Stand350_pwrReg:Stand350_expoReg,
    names_to = "Model",
    names_prefix = "Stand350_",
    values_to = "Stand350"
  ) %>% 
  arrange(Subarea, Model)
Linkage2010_lmbData_rsq <- Linkage2010_lmbData_350models %>% 
  select(-starts_with("Stand350")) %>% 
  pivot_longer(
    cols = Rsq_pwrReg:Rsq_expoReg,
    names_to = "Model",
    names_prefix = "Rsq_",
    values_to = "Rsq"
  ) %>% 
  arrange(Subarea, Model)
Linkage2010_lmbData_Stand350 <- Linkage2010_lmbData_350 %>% 
  mutate(Rsq = Linkage2010_lmbData_rsq$Rsq) %>%  
  group_by(Subarea) %>% 
  filter(Rsq == max(Rsq)) %>%
  select(Subarea, lmb_data=data, lmb_Analyte=Analyte, lmb_Unit=Unit, lmb_350=Stand350, lmb_model=Model, lmb_rsq=Rsq)

# Merge 2010 Staff Report Aq & LMB Summary Data for Linkage Graph
orig_linkageData <- aq_origLinkage %>% 
  left_join(., Linkage2010_lmbData_Stand350, "Subarea") %>%
  drop_na() %>% 
  mutate(Aq_Mean   = round(Aq_Mean, 3),
         Aq_Median = round(Aq_Median, 3),
         lmb_350  = round(lmb_350, 2))
```


## View Updated & 2010 Staff Report Linkage Graph
```{r}
# 2010 Staff Report Linkage Data Model Prediction Line
origModel_predictRange <- tibble(lmb_350=seq(floor(min(LMBgoal, BBImpGoal, orig_linkageData$lmb_350, Linkage.DRMPAq.DRMPBByr.Subarea$Std350_Median) * 95) /100,
                                         ceiling(max(orig_linkageData$lmb_350, Linkage.DRMPAq.DRMPBByr.Subarea$Std350_Median) * 105) /100,
                                         length.out=500))

orig_linkageModels <- predictionModels(orig_linkageData, .knownYcol=Aq_Mean, .knownXcol=lmb_350, knownX=LMBgoal) %>% 
  rename(aqMeHgSafeConc = predictedY)

orig_linkage <- orig_linkageModels %>% 
  filter(ModelName == 'pwr') %>% 
  mutate(aqMeHgSafeConc = signif(aqMeHgSafeConc, 2))

orig_linkage_model <- orig_linkage$Model[[1]]

model_predictOrig <- origModel_predictRange %>%
  mutate(Aq_Mean = case_when(orig_linkage$ModelName == 'pwr' | orig_linkage$ModelName == 'exp' ~ exp(predict(orig_linkage_model, origModel_predictRange %>% select(lmb_350))),
                                      T ~ predict(orig_linkage_model, origModel_predictRange %>% select(lmb_350))) )

# PLOT DATA POINTS & REGRESSION MODELS
DMCP_vs_Orig_LinkageGraph <- predictionGraph +
  # ggtitle("2010 Staff Report Linkage Data [2000] & Updated Linkage [DRMP 2016-2019]") +
  xlab('Hg Conc. of Standardized 350mm Largemouth or Black Bass (mg/Kg)') +
  ylab('Unfiltered Aqueous MeHg (ng/L)') +
  
  #2010 Staff Report Linkage Data Points
  geom_point(data=orig_linkageData, aes(x=lmb_350, y=Aq_Mean, shape=Subarea),
             alpha=1,
             size=3.5, stroke=1.5,
             color='grey46',
             show.legend=F) +
  #2010 Staff Report Linkage Model Regression Line
  geom_point(data=model_predictOrig, aes(x=lmb_350, y=Aq_Mean),
             color='grey46',
             size=.3,
             show.legend=F) +
  
  # Text - Regression Equation
    annotate("text", x=1.05, hjust=0, y=.185+.005,
             label=paste0("italic(y)==", round(exp(orig_linkage_model$coefficients['(Intercept)']),3), "~'*'~ italic(x) ^", round(orig_linkage_model$coefficients['log(lmb_350)'],3)), parse=T, color='grey46') +
    
    # Text - Regression Model SER (Standard Error of Regression)
    annotate("text", x=1.05, hjust=0, y=.185-.004, # updated_linkage %>% pull(aqMeHgSafeConc)*1.9,
             label=paste0("italic(~SER) == ", sigfig(orig_linkage$SER,3)), parse=T, color='grey46')
  
DMCP_vs_Orig_LinkageGraph


# ADD MODEL PREDICTION VALUES
DMCP_vs_Orig_LinkageGraph_prediction <- DMCP_vs_Orig_LinkageGraph +
  
  #2010 Staff Report Linkage Vertical Line
  geom_segment(aes(x=LMBgoal, xend=LMBgoal, y=0, yend=orig_linkage$aqMeHgSafeConc), 
               size=0.8, arrow=arrow(type="open", length=unit(0.02, "npc")), color='grey46') +
  #2010 Staff Report Linkage Vertical Line Text - Linkage LMB Goal
  annotate("text", x=.27, hjust=0, y=0.03, 
           label=paste0("italic(x)== ", LMBgoal, "[~Largemouth~Bass~Implementation~Goal]"), parse=T, color='grey46') +
           # label=paste0('LMB<sub>goal</sub>=',LMBgoal,' mg/Kg '), color='grey56') +
  
  #2010 Staff Report Linkage Horizontal Line
  geom_segment(aes(x=LMBgoal, xend=0, y=orig_linkage$aqMeHgSafeConc, yend=orig_linkage$aqMeHgSafeConc),
               size=0.8, arrow=arrow(type="open", length=unit(0.02, "npc")), color='grey46') +
  #2010 Staff Report Linkage Horizontal Line Text - Predicted Aqueous Concentration
  annotate("text", x=.027, hjust=0, y=orig_linkage$aqMeHgSafeConc + .004,
           label=paste0('italic(y)== ', sigfig(orig_linkage$aqMeHgSafeConc,2)), parse=T,
           color='grey46')

DMCP_vs_Orig_LinkageGraph_prediction

ggsave(filename = paste0(wd, '/Reeval_Impl_Goals_Linkage_Analysis/eval2_Linkage & Aq Impl Goal/Output/Fig 5.3 Linkage Analysis Comparison.png'),
       plot = DMCP_vs_Orig_LinkageGraph_prediction,
       width = 9,
       height = 5.75,
       units = "in",
       dpi = 125)
```


## Effect of Switching Axes for 2010 Staff Report Linkage Analysis
2010 Staff Report Linkage plotted Largemouth Bass data (Y-axis) vs. Aqueous data (X-axis). The resulting regression equation requires solving for X to determine the Aqueous [Hg] Implementation Goal. Because R's predict() solves for Y and not X, test to see if switching coordinates has an effect on calculating the Aq [Hg] Implementation Goal
```{r}
origLinakge_predictAq <- function(lmb_goal){
  signif((lmb_goal/exp(PwrReg$coefficients[1]))^(1/PwrReg$coefficients[2]), 3)
}

# Calculate Aq Implementation Goal using LMB 350mm vs. Aq (Method for Linkage used in 2010 TMDL Staff Report)
PwrReg <- lm(log(lmb_350) ~ log(Aq_Mean), data=orig_linkageData)
aqMeHgSafeConc_calc <- origLinakge_predictAq(LMBgoal)
aqMeHgSafeConc_pred <- NA  # axes are in wrong order & can't be done in R
Rsq <- signif(summary(PwrReg)$r.square, 2)
RMSE <- signif(sqrt(mean((origLinakge_predictAq(orig_linkageData$lmb_350) - orig_linkageData$Aq_Mean)^2)), 3)


# Calculate Aq Implementation Goal using Aq vs. LMB 350 (New Method)

PwrReg_flip <- lm(log(Aq_Mean) ~ log(lmb_350), data=orig_linkageData)
aqMeHgSafeConc_flip_calc <- signif(exp(PwrReg_flip$coefficients[1])*LMBgoal^PwrReg_flip$coefficients[2], 2)
aqMeHgSafeConc_flip_pred <- signif(exp(predict(PwrReg_flip, tibble(lmb_350=LMBgoal))), 2)
aqMeHgSafeConc_flip_Rsq  <- signif(summary(PwrReg_flip)$r.square, 2)
aqMeHgSafeConc_flip_RMSE <- sqrt(mean((exp(predict(PwrReg_flip)) - orig_linkageData$Aq_Mean)^2))

coordFlipResultSummary <- data.frame('_'              = c('Aq Hg Goal (Calc)', 'Aq Hg Goal (R Predict)', 'R^2', 'RMSE'),
                                     OrigLinkage      = c(aqMeHgSafeConc_calc, aqMeHgSafeConc_pred,  Rsq, RMSE),
                                     OrigLinkageCoordFlip = c(aqMeHgSafeConc_flip_calc, aqMeHgSafeConc_flip_pred, aqMeHgSafeConc_flip_Rsq, aqMeHgSafeConc_flip_RMSE))

coordFlipResultSummary %>% 
   signif_df(., 3) %>% 
  kbl(align='lcc', caption='Comparison of Aq Hg Goal After Flipping Coords') %>%
  kable_paper('hover', full_width=T) %>% 
  kable_styling(fixed_thead = T)

# Switching axes for 2010 Staff Report linkage results in different Aq [Hg] Implementation Goal. However, Board staff decided to update the linkage graphs as Aqueous data (Y-axis) vs. Largemouth Bass data (X-axis). This is because solving for Y given X is (a) a more standard mathematical approach, and (b) is easily implemented in R using the predict() function. In addition, the difference from switching coordinates (0.066 for the 2010 Staff Report graph vs 0.0689 with coordinates switched) is not considerable.
```


# UPDATED LINKAGE: Margin of Safety Analysis

## View Probability Histogram of Aqueous Safe Concentrations
```{r}
setwd(wd)
load('Reeval_Impl_Goals_Linkage_Analysis/eval2_Linkage & Aq Impl Goal/Output/randomSampled_FINAL_median_5yr DRMP Aq & BB 2016-2019.RData')  # loads randomSampled_AqSafeConc ~ random sampling results

ggplot() +
  geom_histogram(aes(x=randomSampled_AqSafeConc$aqMeHgSafeConcs$SER, y=after_stat(ncount)),
                 alpha = 0.5, position = 'identity') +
  geom_vline(xintercept=quantile(randomSampled_AqSafeConc$aqMeHgSafeConcs$SER, .5),  color='red', size=1) +
  annotate(geom='text', x=quantile(randomSampled_AqSafeConc$aqMeHgSafeConcs$SER, .5) - .0005, y=.5, label='Median', color='red', angle=90)+
  
  geom_vline(xintercept=quantile(randomSampled_AqSafeConc$aqMeHgSafeConcs$SER, .05), color='red', size=1.2, linetype='dashed') +
  annotate(geom='text', x=quantile(randomSampled_AqSafeConc$aqMeHgSafeConcs$SER, .05) - .0005, y=.5, label='5th Percentile', color='red', angle=90)+
  
  geom_vline(xintercept=quantile(randomSampled_AqSafeConc$aqMeHgSafeConcs$SER, .95), color='red', size=1.2, linetype='dashed') +
  annotate(geom='text', x=quantile(randomSampled_AqSafeConc$aqMeHgSafeConcs$SER, .95) - .0005, y=.5, label='95th Percentile', color='red', angle=90)+
  
  theme_light()
```

## View Histogram of Aqueous Safe Conc Linkage Model Errors
```{r}
wt.avg5yr_BestErrors <- randomSampled_AqSafeConc$aqMeHgSafeConcs %>% 
  filter(SER <= quantile(SER, 1)) # quantile(SER, 1) uses all the models

wg_hist <- ggplot() + 
  xlab("Predicted Protective Aqueous MeHg Concentration (ng/L)") +
  ylab("Frequency") +
  geom_histogram(aes(x=wt.avg5yr_BestErrors$aqMeHgSafeConc, y=after_stat(count)),
                 alpha = 0.5, position = 'identity') +

  geom_vline(xintercept=updated_linkage$aqMeHgSafeConc, color='black', size=1, linetype='dashed') +
  annotate(geom='text', x=updated_linkage$aqMeHgSafeConc - .0006, y=505, hjust=0,
           label=paste('DMCP Review Linkage Model =',updated_linkage$aqMeHgSafeConc), color='black', angle=90)+
  
  geom_vline(xintercept=quantile(wt.avg5yr_BestErrors$aqMeHgSafeConc, .5),  color='red', size=1) +
  annotate(geom='text', x=quantile(wt.avg5yr_BestErrors$aqMeHgSafeConc, .5) - .0006, y=505, hjust=0,
           label=paste('Median =',signif(quantile(wt.avg5yr_BestErrors$aqMeHgSafeConc, .5),3)), color='red', angle=90)+
  
  geom_vline(xintercept=signif(quantile(wt.avg5yr_BestErrors$aqMeHgSafeConc, .05), 2), color='red', size=1.2, linetype='dashed') +
  annotate(geom='text', x=signif(quantile(wt.avg5yr_BestErrors$aqMeHgSafeConc, .05), 2) - .0006, y=505, hjust=0,
           label=paste('5th Percentile =',signif(quantile(wt.avg5yr_BestErrors$aqMeHgSafeConc, .05), 2)), color='red', angle=90) +
  
  scale_y_continuous(expand=c(0,0), limits = c(0, 1500)) +
  theme_light() +
  theme(text = element_text(size=14, family="sans"),
          axis.title=element_text(size=14, face="bold"),
          axis.text = element_text(size=14, angle = 0, hjust = 0.5),
          panel.grid.major.y=element_line(size=1),
          # panel.grid.minor=element_blank(),
          legend.text = element_text(size = 14),
          legend.title = element_text(size = 14, face="bold"))

wg_hist

ggsave(filename = paste0(wd, '/Reeval_Impl_Goals_Linkage_Analysis/eval2_Linkage & Aq Impl Goal/Output/Fig 5.4 Probability Distribution of Aqueous Protective Concs.png'),
       plot = wg_hist,
       width = 9,
       height = 5.75,
       units = "in",
       dpi = 125)

signif(quantile(wt.avg5yr_BestErrors$aqMeHgSafeConc, .5), 4) # Median value
updated_linkage$aqMeHgSafeConc
(aq_HgGoal <- signif(quantile(wt.avg5yr_BestErrors$aqMeHgSafeConc, .05), 2)) # Aq water goal w/ Margin of Safety
# Calculate Margin of Safety
MarginSafety <- signif((1 - aq_HgGoal/updated_linkage$aqMeHgSafeConc) * 100, 2)
names(MarginSafety) <- 'Margin of Safety %'
MarginSafety
```

# Aqueous Percent Reduction Needed
```{r}
# Get median MeHg for Subareas not used in the Linkage Analysis using same methods as AqSampleFishModelPairing() - then calc median of pooled median

CCSB <- aqData3 %>% 
  filter(Subarea == 'Cache Creek Settling Basin')

YoloBypassNorth_CCSB <- aqData3 %>% 
  filter(Subarea == 'YB-North') %>%
  bind_rows(CCSB) %>% 
  mutate(Subarea = "YB-North w/ CCSB")

YoloBypass_CCSB <- aqData3 %>% 
  filter(grepl("yolo", Subarea, ignore.case=T)) %>%  # filter out subareas used in linkage
  bind_rows(CCSB) %>%
  mutate(Subarea = "YB N & S w/ CCSB")


dataNotInLinkage <- aqData3 %>% 
  filter(Subarea %are not% unique(Linkage.DRMPAq.DRMPBByr.Subarea$Subarea)) %>%
  bind_rows(CCSB, YoloBypassNorth_CCSB, YoloBypass_CCSB)


AqResults_NotInLinkage <- NULL
AqResultSummary_NotInLinkage <- NULL
subareasNotInLinkage <- dataNotInLinkage %>% distinct(Subarea) %>% pull
for(subarea in subareasNotInLinkage){
  ## Gather Aq Samples ##
      aqSubarea <- dataNotInLinkage %>% 
        filter(Subarea == subarea)
      
      sampleYears_temp <- aqSubarea %>% filter(seasonYear > 2010) %>% distinct(seasonYear) %>% arrange(seasonYear) %>%  pull # only using data after 2010
      sampleYears <- sampleYears_temp[ifelse(length(sampleYears_temp)>=5, length(sampleYears_temp)-4, 1):length(sampleYears_temp)] # Get 5 Years of most recent data

      for (k in 1:length(sampleYears)) {
        aqSampleYr <- aqSubarea %>%
          filter(seasonYear %in% sampleYears[1:k]) # Include earliest year and then consecutive subsequent years - as what was done in AqSampleFishModelPairing() 

        # Calc Median Aq Concs of Pooled data
        aqSampleYrs <- paste(aqSampleYr %>% distinct(seasonYear) %>% pull %>% sort, collapse=", ") # Collapse Aq Smple Years for storage in dataframe
        
        tempAqSummary <- tibble(Subarea = subarea,
                             aq_sampleYears = aqSampleYrs,
                             aq_n = nrow(aqSampleYr),
                             aq_Median_pool = median(aqSampleYr$Result))
      
        # Store Results
        AqResults_NotInLinkage <- bind_rows(AqResults_NotInLinkage, aqSampleYr)
        
        AqResultSummary_NotInLinkage <- bind_rows(AqResultSummary_NotInLinkage, tempAqSummary)
      }
}

AqResultSummary_NotInLinkage %>% 
  kbl(align='lccc', caption='Pooled Median Aq [MeHg] for Subareas Not Used in Linkage Analysis') %>% 
  kable_paper('hover', full_width=T) %>% 
  kable_styling(fixed_thead = T)


AqResults_SubareasNotInLinkage <- AqResultSummary_NotInLinkage %>%
  group_by(Subarea) %>%
  mutate(sampleYears = aq_sampleYears[which.max(aq_n)]) %>%
  ungroup() %>%
  group_by(Subarea, sampleYears) %>%
  summarize(n = n(),
            Std350_Median             = NA_real_,
            Aq_Median_ofPooledMedians = median(aq_Median_pool),
            modelAqConc               = NA_real_,
            UsedInLinkage = 'No',
            .groups = 'drop')


subarea_percArea <- tibble('Central Delta'=.31,'Moke/Cos Rivers'=.01,'Marsh Creek'=.02,'Sacramento River'=.25,'San Joaquin River'=.16,'West Delta'=.07,'YB-North w/ CCSB'=.05,'YB-South'=.13) %>% # only include values for suabreas to be used to calculate weightedMedian below
  pivot_longer(cols=everything(), names_to='Subarea', values_to='perc_Area')

SubareaPercReduction <- Linkage.DRMPAq.DRMPBByr.Subarea  %>% 
  group_by(Subarea) %>% 
  mutate(modelAqConc = purrr::map_dbl(.x=updated_linkage$Model[1], .f = ~predict(.x, tibble(Std350_Median = cur_data()$Std350_Median)))) %>% 
  ungroup %>% 
  select(Subarea, Std350_Median, Aq_Median_ofPooledMedians, modelAqConc, UsedInLinkage, n, sampleYears) %>% 
  bind_rows(AqResults_SubareasNotInLinkage) %>% 
  left_join(., subarea_percArea, by='Subarea') %>% # adds perc_Area column
  mutate(
    # Std350_Median = signif(Std350_Median, 3),
    # Aq_Median_ofPooledMedians = signif(Aq_Median_ofPooledMedians, 2),
    # `2010 Method % Reduction` = round((Aq_Median_ofPooledMedians - aq_HgGoal)/Aq_Median_ofPooledMedians * 100, 1),
    `2010 Method % Reduction` = (Aq_Median_ofPooledMedians - aq_HgGoal)/Aq_Median_ofPooledMedians * 100,
    # `Model Aq Conc Method % Reduction` = round((modelAqConc - aq_HgGoal)/modelAqConc * 100, 1)
    `Model Aq Conc Method % Reduction` = (modelAqConc - aq_HgGoal)/modelAqConc * 100
    ) %>%
  add_row(Subarea = "Median Weighted by perc_Area", summarize(., across(contains(c('Pooled','Final')), ~ weightedMedian(.x, w=perc_Area, na.rm=T)))) %>%  # doesn't include subareas with a perc_Area of NA
  mutate(`Final Aq Conc Method % Reduction` = ifelse(is.na(modelAqConc), (Aq_Median_ofPooledMedians - aq_HgGoal)/Aq_Median_ofPooledMedians * 100, (modelAqConc - aq_HgGoal)/modelAqConc * 100)) %>% 
  select(Subarea, perc_Area, everything())

SubareaPercReduction %>% 
  kbl(align='lccc', caption='Percent Aq [MeHg] Reductions Needed for each Subarea') %>% 
  kable_paper('hover', full_width=T) %>% 
  kable_styling(fixed_thead = T)
```

## Save Data Tables
```{r}
setwd(wd)
writexl::write_xlsx(SubareaPercReduction, path='Reeval_Impl_Goals_Linkage_Analysis/eval2_Linkage & Aq Impl Goal/Output/Table 5.1_Aq Percent Reduction Needed for Each Subarea.xlsx')

writexl::write_xlsx(Linkage.DRMPAq.DRMPBByr, path='Reeval_Impl_Goals_Linkage_Analysis/eval2_Linkage & Aq Impl Goal/Output/Table 5._LinkageDataUsedToFindFinalMediansBySubarea.xlsx')
```
## Error: <text>:19:1: unexpected '<'
## 18: 
## 19: <
##     ^

The R session information (including the OS info, R version and all packages used):

    sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8  LC_CTYPE=C                            
## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## system code page: 65001
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lubridate_1.8.0    plotly_4.10.0      readxl_1.4.1       actuar_3.3-0      
##  [5] NADA_1.6-1.1       forcats_0.5.2      stringr_1.4.1      dplyr_1.0.9       
##  [9] purrr_0.3.4        readr_2.1.2        tidyr_1.2.0        tibble_3.1.8      
## [13] ggplot2_3.3.6      tidyverse_1.3.2    fitdistrplus_1.1-8 survival_3.4-0    
## [17] MASS_7.3-58.1     
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-45     assertthat_0.2.1    digest_0.6.29       utf8_1.2.2         
##  [5] R6_2.5.1            cellranger_1.1.0    backports_1.4.1     reprex_2.0.2       
##  [9] evaluate_0.16       highr_0.9           httr_1.4.4          pillar_1.8.1       
## [13] rlang_1.0.5         lazyeval_0.2.2      googlesheets4_1.0.1 rstudioapi_0.14    
## [17] data.table_1.14.2   Matrix_1.5-1        rmarkdown_2.16      splines_4.2.2      
## [21] googledrive_2.0.0   htmlwidgets_1.5.4   munsell_0.5.0       broom_1.0.1        
## [25] compiler_4.2.2      modelr_0.1.9        xfun_0.32           pkgconfig_2.0.3    
## [29] htmltools_0.5.3     tidyselect_1.1.2    fansi_1.0.3         viridisLite_0.4.1  
## [33] crayon_1.5.1        tzdb_0.3.0          dbplyr_2.2.1        withr_2.5.0        
## [37] grid_4.2.2          jsonlite_1.8.0      gtable_0.3.1        lifecycle_1.0.1    
## [41] DBI_1.1.3           magrittr_2.0.3      scales_1.2.1        writexl_1.4.0      
## [45] cli_3.3.0           stringi_1.7.8       fs_1.5.2            xml2_1.3.3         
## [49] ellipsis_0.3.2      generics_0.1.3      vctrs_0.4.1         expint_0.1-7       
## [53] tools_4.2.2         glue_1.6.2          hms_1.1.2           yaml_2.3.5         
## [57] fastmap_1.1.0       colorspace_2.0-3    gargle_1.2.0        rvest_1.0.3        
## [61] knitr_1.40          haven_2.5.1
    Sys.time()
## [1] "2024-01-05 15:39:39 PST"