This report is automatically generated with the R package knitr (version 1.39) .

---
title: '03_Wastewater Treatment Plants'
subtitle: 'Aqueous MeHg Data Evaluation'
author: "Mercury Program and Basin Planning Unit"
date: "3/1/2022"
output:
  html_document:
    code_folding: show
    toc: TRUE
    toc_float: TRUE
    toc_depth: 3
runtime: shiny
assets:
  css:
    - "http://fonts.googleapis.com/css?family=Raleway:300"
    - "http://fonts.googleapis.com/css?family=Oxygen"
---
---

<style>
body{
  font-family: 'Oxygen', sans-serif;
  font-size: 16px;
  line-height: 24px;
}

h1,h2,h3,h4 {
  font-family: 'Raleway', sans-serif;
}

.container { width: 1250px; }
h3 {
  background-color: #D4DAEC;
  text-indent: 50px; 
}
h4 {
  text-indent: 75px;
  margin-top: 35px;
  margin-bottom: 5px;
}
</style>

```{r setup, include=FALSE}
knitr::opts_chunk$set(echo=TRUE, warning=FALSE, message=FALSE, fig.width=9.5)
```


## Load Packages, Functions, & Data
```{r Libraries, echo=FALSE}
library(janitor) # for clean_names()
library(kableExtra) # better formatting of tables
library(shiny)

# Had issue trying to set WD with Shiny in R project, reset working directory of rproj
wd <- rstudioapi::getActiveProject()
setwd(wd)

source("R Functions/functions_estimate NDDNQ values.R")
source("R Functions/functions_QA data.R")


# Read MeHg Data set
esmr <- readxl::read_xlsx("Reeval_Source_Analysis/Source Data/03a_Municipal WWTPs (NPDES)/Data/03_NPDES_eSMR MeHg Data.xlsx", guess_max = 300000, sheet = "OIMA eSMR FixErrors-AddLathrop")  %>% 
 clean_names()


facilityInfo <- readxl::read_xlsx("Reeval_Source_Analysis/Source Data/03a_Municipal WWTPs (NPDES)/Data/03_NPDES Facility Info.xlsx", guess_max = 300000, sheet = "Final Table") %>%
  select(Discharger, Facility, eSMR_Name, GraphName, FacilityType, Treatment, Subarea, Status, Permit, Receiving_water) %>% 
  filter(Status %in% c("Active", "Closing", "New")) %>% 
  distinct(eSMR_Name, .keep_all=T)
```

## Clean Data
### Fix Column Names
```{r}
### LIST COLUMNS TO BE USED, ADD USER DEFINED COLUMNS, & RENAME COLUMNS TO CEDEN STANDARDS ###
#Use 1.READ ME.xlsx, 'ColumnsForR' to list & identify columns that match corresponding CEDEN Standard columns
keep_cols <-
  c(
    'source_row',
    'source_id',
    'station_name',
    'location_place_type',
    'location',
    'parameter',
    'sample_date',
    'unit',
    'result',
    'mdl',
    'rl',
    'result_qual_code',
    'sampling_time',
    'analytical_method',
    'lattitude',
    'longitude',
    'location_desc'
  )

temp_cols <- c('facility_place_id', 'location_place_id', 'calculated_method', 'ml', 'analysis_date', 'analysis_time', 'qa_codes', 'comments', 'analytical_method_code', 'report_name', 'smr_document_id', 'review_priority_indicator', 'receiving_water_body') #Include columns that do not match CEDEN standards but may be useful (e.g., Unit columns for MDL & RL)
#temp_cols are removed before the data is merged with other datasets

esmr_correct_columns <- esmr %>% 
  dplyr::select(keep_cols,temp_cols) %>% #DO NOT CHANGE - selects columns specified above
  rename(
    #Rename worksheet columns to CEDEN format here: CEDEN 'COLUMNNAME' = WORKSHEET 'COLUMNNAME'
    #DELTE COLUMN NAMES THAT DO NOT HAVE AN EQUIVALENT COLUMN IN THE WORKSHEET
    SourceRow = source_row,
    SourceID = source_id,
    StationName = station_name,
    StationCode = location_place_type,
    SampleTypeCode = location,
    Analyte = parameter,
    SampleDate = sample_date,
    Unit = unit,
    Result = result,
    MDL = mdl,
    RL = rl,
    ResultQualCode = result_qual_code,
    SampleTime = sampling_time,
    MethodName = analytical_method,
    TargetLatitude = lattitude,
    TargetLongitude = longitude,
    CollectionComments = location_desc
  )
```

### Format Column Parameters
```{r}
esmr_correct_columns <- chara_to_NumDate(esmr_correct_columns)

  # Check STATIONCODE for unexpected parameter variances
esmr_correct_columns %>% 
  unique_factors(StationCode)

  # Filter for Effluent Monitoring
esmr_eff <- esmr_correct_columns %>% 
  filter(StationCode == 'Effluent Monitoring')

  # Check RESULTQUALCODE for unexpected parameter variances
esmr_eff %>% 
  unique_factors(ResultQualCode)

  # Change "<" to "ND"
esmr_qualcode <- esmr_eff %>%
   mutate(ResultQualCode = recode(ResultQualCode,
                                  "<" = "ND"))
  # Check if Result, MDL, & RL Columns all equal <NA> or 0 - these rows have no useful information
nrow(esmr_qualcode) #Number rows before
#CODE BELOW REQUIRES USER TROUBLESHOOTING DEPENDING ON AVAILABLE COLUMNS AND SPREADSHEET SPECIFIC CONDITIONS #
esmr_qualcode <- esmr_qualcode %>%
  #Set 0 & negative values as blank
  mutate(Result = ifelse(Result <= 0, NA_real_, Result),
         MDL = ifelse(MDL <= 0, NA_real_, MDL),
         RL = ifelse(RL <= 0, NA_real_, RL))
na_results <- esmr_qualcode %>% #Record rows where Result, MDL, & RL all equal <NA>
  filter( is.na(Result) & is.na(MDL) & is.na(RL) )
nrow(na_results)
esmr_qualcode <- anti_join(esmr_qualcode, na_results) #returns rows from esmr_qualcode not matching values in no_result
nrow(esmr_qualcode) #Number rows after


  # Format Units Column - "ng/L", "mg/Kg"
unique(esmr_qualcode$Unit) #Identifies OLDNAMES
# If more than 1 unit colmn exists (e.g., for RL and MDL columns) see WQP script for example on merging into 1 column
esmr_units <- esmr_qualcode %>%
  filter(Unit %in% c("ng/L", "ug/L", "mg/L")) %>%
  standardizeUnits()
unique(esmr_qualcode$Unit) #New naming structure for Unit groupings



# Add Discharger, Facility, GraphName, FacilityType, Treatment, Subarea, Permit, & Receiving_water columns
esmr_facility_info <- esmr_units %>% 
  left_join(., facilityInfo, by=c("StationName"="eSMR_Name"))


### REMOVE TEMPORARY COLUMNS ###
esmr_formatted <- esmr_facility_info %>%
  dplyr::select(-one_of(temp_cols)) %>%  #Remove temp columns since they are no longer needed
  arrange(StationName, SampleDate) %>% 
  chara_to_NumDate                       #custom function to format numbers & dates from chr
```



### Graph Data to see if any Order of Magnitude errors
```{r}
esmr_formatted_plot <- esmr_formatted %>%
  # add columns so plotting function works
  mutate(Shape = NA)
result_time_Plotly(esmr_formatted_plot, SourceID, logscale=F)
```


## Compare Data to CVCWA Control Study

### Data Summary Appendix B (pdf p265)
```{r}
esmr_formatted_subND <- substituteNDDNQ(esmr_formatted, ResultQualCode, MDL.RL.fraction=0.5)


esmr_cvcwa_summary <- esmr_formatted_subND %>%
  group_by(StationName) %>% 
  filter(SampleDate >= "2014-10-01" & SampleDate <= "2017-09-30") %>% 
  summarize(NDs = nrow(cur_data() %>% filter(ResultQualCode == 'ND')), # this operation needs to go 1st for some reason, maybe an issue with cur_data()??
            Avg = mean(Result),
            Count = n(),
            Percent_NDs = NDs/Count * 100,
            minDate = min(SampleDate),
            maxDate = max(SampleDate)) %>% 
  select(StationName, Avg, Count, NDs, Percent_NDs, minDate, maxDate) 

esmr_cvcwa_summary %>% 
  kbl(align='lccc', caption='For Comparison to CVCWA Control Study Appendix B (pdf p265)') %>% 
  kable_paper('hover', full_width=T) %>% 
  kable_styling(fixed_thead = T)

# writexl::write_xlsx(esmr_cvcwa_summary, paste0(wd, "Reeval_Source_Analysis/Source Data/03a_Municipal WWTPs (NPDES)/Data/03_eSMR Data Summary_Reproduce 10-2014_09-2017 CVCWA.xlsx")
```



## Final Data Eval & Save

### Estimate ND & DNQ Values
```{r}
esmr_formatted_distND <- robinNDDNQ(esmr_formatted_plot, ResultQualCode)
```


### View Fitted Distribution Graphs
```{r, echo=FALSE}
ui <- fluidPage(
  selectInput('distName', label='Select Distribution', choices=names(esmr_formatted_distND), selected=1),
  plotOutput('distPlot')
)

server <- function(input, output, session) {
  output$distPlot <- renderPlot({
  plot(esmr_formatted_distND[[input$distName]]$fitmodel) # view plot fits
    title(input$distName, outer=T)
})
}

shinyApp(ui, server, options=list(height=500))
```

### Set Final ND/DNQ Estimation
```{r}
esmr_final <- esmr_formatted_distND$weibull$fitted  # Selected distribution - primarily based on CDF & P-P plot showing best fit to data
```

### Graph Individual WWTPs - Compare w/ Appendix C (pdf p218-244)
```{r, echo=FALSE}
ui <- fluidPage(
  selectInput('wwtp', label='Select WWTP', choices=unique(esmr_final$StationName), selected=1),
  plotlyOutput('wwtpPlot')
)

server <- function(input, output, session) {
  data1 <- reactive({
    
    esmr_final %>% 
      # add columns so plotting function works
      mutate( Shape = NA) %>% 
      # filter for selected wwtp
      filter(StationName == input$wwtp)
  })
  
  
   output$wwtpPlot <- renderPlotly({
    req(input$wwtp)
     
    result_time_Plotly(data1(), StationName, logscale=F)
  })
}

shinyApp(ui, server, options=list(height=500))
```

### Final Data Summary
```{r}
esmr_final %>%
  group_by(Discharger, Facility) %>% 
  summarize(NDDNQs = nrow(cur_data() %>% filter(ResultQualCode != '=')), # this operation needs to go 1st for some reason, maybe an issue with cur_data()??
            Min = min(Result),
            Max = max(Result),
            Median = median(Result),
            Avg = mean(Result),
            Count = n(),
            Percent_NDDNQs = NDDNQs/Count * 100,
            minDate = min(SampleDate),
            maxDate = max(SampleDate)) %>% 
  # select(StationName, Min, Max, Median, Avg, Count, NDDNQs, Percent_NDDNQs, minDate, maxDate) %>% 
  kbl(align='lccc', caption='NPDES MeHg Data Summary') %>% 
  kable_paper('hover', full_width=T) %>% 
  kable_styling(fixed_thead = T)
```


# Export to excel
```{r}
writexl::write_xlsx(esmr_final, paste0(wd, "/Reeval_Source_Analysis/Source Data/03a_Municipal WWTPs (NPDES)/Data/03.1_WWTP MeHg Data Prep_Clean_", today(), ".xlsx"))
```
## Error: <text>:20:1: unexpected '<'
## 19: 
## 20: <
##     ^

The R session information (including the OS info, R version and all packages used):

    sessionInfo()
## R version 4.2.2 (2022-10-31 ucrt)
## Platform: x86_64-w64-mingw32/x64 (64-bit)
## Running under: Windows 10 x64 (build 22621)
## 
## Matrix products: default
## 
## locale:
## [1] LC_COLLATE=English_United States.utf8  LC_CTYPE=English_United States.utf8   
## [3] LC_MONETARY=English_United States.utf8 LC_NUMERIC=C                          
## [5] LC_TIME=English_United States.utf8    
## 
## attached base packages:
## [1] stats     graphics  grDevices utils     datasets  methods   base     
## 
## other attached packages:
##  [1] lubridate_1.8.0    plotly_4.10.0      readxl_1.4.0       actuar_3.2-2      
##  [5] NADA_1.6-1.1       forcats_0.5.1      stringr_1.4.0      dplyr_1.0.9       
##  [9] purrr_0.3.4        readr_2.1.2        tidyr_1.2.0        tibble_3.1.7      
## [13] ggplot2_3.3.6      tidyverse_1.3.1    fitdistrplus_1.1-8 survival_3.4-0    
## [17] MASS_7.3-58.1     
## 
## loaded via a namespace (and not attached):
##  [1] lattice_0.20-45    assertthat_0.2.1   digest_0.6.29      utf8_1.2.2        
##  [5] R6_2.5.1           cellranger_1.1.0   backports_1.4.1    reprex_2.0.1      
##  [9] evaluate_0.15      httr_1.4.3         highr_0.9          pillar_1.7.0      
## [13] rlang_1.0.2        lazyeval_0.2.2     rstudioapi_0.13    data.table_1.14.2 
## [17] Matrix_1.5-1       rmarkdown_2.14     labeling_0.4.2     splines_4.2.2     
## [21] htmlwidgets_1.5.4  munsell_0.5.0      broom_0.8.0        compiler_4.2.2    
## [25] modelr_0.1.8       xfun_0.31          pkgconfig_2.0.3    htmltools_0.5.2   
## [29] tidyselect_1.1.2   viridisLite_0.4.0  fansi_1.0.3        crayon_1.5.1      
## [33] tzdb_0.3.0         dbplyr_2.2.0       withr_2.5.0        grid_4.2.2        
## [37] jsonlite_1.8.0     gtable_0.3.0       lifecycle_1.0.1    DBI_1.1.2         
## [41] magrittr_2.0.3     scales_1.2.0       writexl_1.4.0      cli_3.3.0         
## [45] stringi_1.7.6      farver_2.1.0       fs_1.5.2           xml2_1.3.3        
## [49] ellipsis_0.3.2     generics_0.1.2     vctrs_0.4.1        expint_0.1-7      
## [53] RColorBrewer_1.1-3 tools_4.2.2        glue_1.6.2         hms_1.1.1         
## [57] yaml_2.3.5         fastmap_1.1.0      colorspace_2.0-3   rvest_1.0.2       
## [61] knitr_1.39         haven_2.5.0
    Sys.time()
## [1] "2023-12-27 10:14:32 PST"