---
title: "FJSC Chapter One Final"
author: "EPatton"
date: "2023-04-03"
output: html_document
---
Notes for this code:
This code does not automatically grab the required datasets; aside from the NOAA data, the information is maintained in repositories that do not allow automated download. Three datasets must be read into this code for each installation. The user must obtained these datasets and adjust the "read.csv" links accordingly.

  a. Weather observations provided by the 14th weather squadron. Request these files from: https://climate.af.mil/forms/sar
  
  b. NOAA regional observations, downloadable from: https://www.ncei.noaa.gov/cdo-web/search;jsessionid=CEACC4D64F1754F26A4951DC58DAD6EE
  
  c. Basic combat training schedule data for FY2023. This data can be obtained from the U.S. Army ATRRS system: https://www.atrrs.army.mil

Code is divided into six sections:
1. initial set up and package loading
2 - 5. individual installation analysis
6. plots and tables combining all installations.
  
Code for each installation repeats with little change other than dataset input. Plots figures and tables used in this study are in the last section of code.
  
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
library(ggplot2)
library(dplyr)
library(patchwork) # To display 2 charts together
#library(hrbrthemes)
library(tidyverse)
library(lubridate)
#library(sf)
#library(mapview)
#library(RColorBrewer)
library(readr)
#install.packages("fBasics")
library(fBasics)
library(Hmisc)
library(trend)
library(EnvStats)
library(cowplot)
library(imputeTS)
library(padr)
library(data.table)
library(scales)
```

# LOAD HEAT CATAGORY TEMPERATURE THRESHOLDS
```{r TEMPERATURE THRESHOLDS}
##Create the heat category data frame for reference
Flag_Color <- c("Green","Yellow","Red","Black")
Category_Temp_Min.F <- c(82,85,88,90)
Category_Temp_Max.F <- c(84.999,87.999,89.999,104)
Heat_Categories <- cbind(Flag_Color,Category_Temp_Min.F,Category_Temp_Max.F)
Heat_Categories <- as.data.frame(Heat_Categories)
##recast the vector as an integer to get the mutate code to work
Heat_Categories$Category_Temp_Min.F <- as.integer(Heat_Categories$Category_Temp_Min.F)
Heat_Categories$Category_Temp_Max.F <- as.integer(Heat_Categories$Category_Temp_Max.F)
##Create degree Celsius columns
Heat_Categories <- Heat_Categories%>%
  mutate(Category_Temp_Min.C = (Category_Temp_Min.F - 32)*(5/9))%>%
  mutate(Category_Temp_Max.C = (Category_Temp_Max.F-32)*(5/9))
##remove extraneous from the environment
remove(Category_Temp_Max.F) 
remove(Category_Temp_Min.F)
remove(Flag_Color)
#view(Heat_Categories)
```


#KMMT (Ft Jackson, SC)
#### INITIAL DATA LOAD AND SET UP
```{r}
#load the data file from the 14th weather squadron.
KMMT <- read_csv("/Users/erikpatton/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/Raw Data/KMMT_Raw.csv")

#check for missing observations
KMMT <- KMMT %>%
  mutate('datetime' = make_datetime(year=Year, month = Month, day = Day, hour = Hour..UTC.))%>%
  pad()
print(sum(is.na(KMMT$datetime))) #zero, as it should be
print(sum(is.na(KMMT$Temperature..F.)))
print(550056-sum(is.na(KMMT$Temperature..F.)))
print(sum(is.na(KMMT$Derived.Wet.Bulb.Globe.Temperature..F.)))
print(550056-sum(is.na(KMMT$Derived.Wet.Bulb.Globe.Temperature..F.)))

```
###Data Table build
```{r}
#to build the data table showing where observations are missing
KMMT_last10_raw <- KMMT%>%
  filter(between(datetime,"2012-10-01 00:00:00 UTC","2022-09-30 23:00:00 UTC"))%>%
  filter(between(Hour..UTC.,8,19))
print(43208-(sum(is.na(KMMT_last10_raw$Derived.Wet.Bulb.Globe.Temperature..F.))))

KMMT_raw_60s <- KMMT%>%
  filter(between(datetime,"1960-01-01 00:00:00","1969-12-31 23:00:00"))
print((print((nrow(KMMT_raw_60s)))-print(sum(is.na(KMMT_raw_60s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KMMT_raw_60s)

KMMT_raw_60s.daylight <- KMMT_raw_60s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KMMT_raw_60s.daylight)))-print(sum(is.na(KMMT_raw_60s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KMMT_raw_60s.daylight)

KMMT_raw_70s <- KMMT%>%
  filter(between(datetime,"1970-01-01 00:00:00","1979-12-31 23:00:00"))
print((print((nrow(KMMT_raw_70s)))-print(sum(is.na(KMMT_raw_70s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KMMT_raw_70s)

KMMT_raw_70s.daylight <- KMMT_raw_70s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KMMT_raw_70s.daylight)))-print(sum(is.na(KMMT_raw_70s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KMMT_raw_70s.daylight)

KMMT_raw_80s <- KMMT%>%
  filter(between(datetime,"1980-01-01 00:00:00","1989-12-31 23:00:00"))
print((print((nrow(KMMT_raw_80s)))-print(sum(is.na(KMMT_raw_80s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KMMT_raw_80s)

KMMT_raw_80s.daylight <- KMMT_raw_80s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KMMT_raw_80s.daylight)))-print(sum(is.na(KMMT_raw_80s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KMMT_raw_80s.daylight)

KMMT_raw_90s <- KMMT%>%
  filter(between(datetime,"1990-01-01 00:00:00","1999-12-31 23:00:00"))
print((print((nrow(KMMT_raw_90s)))-print(sum(is.na(KMMT_raw_90s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KMMT_raw_90s)

KMMT_raw_90s.daylight <- KMMT_raw_90s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KMMT_raw_90s.daylight)))-print(sum(is.na(KMMT_raw_90s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KMMT_raw_90s.daylight)

KMMT_raw_00s <- KMMT%>%
  filter(between(datetime,"2000-01-01 00:00:00","2009-12-31 23:00:00"))
print((print((nrow(KMMT_raw_00s)))-print(sum(is.na(KMMT_raw_00s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KMMT_raw_00s)

KMMT_raw_00s.daylight <- KMMT_raw_00s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KMMT_raw_00s.daylight)))-print(sum(is.na(KMMT_raw_00s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KMMT_raw_00s.daylight)

KMMT_raw_10s <- KMMT%>%
  filter(between(datetime,"2010-01-01 00:00:00","2019-12-31 23:00:00"))
print((print((nrow(KMMT_raw_10s)))-print(sum(is.na(KMMT_raw_10s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KMMT_raw_10s)

KMMT_raw_10s.daylight <- KMMT_raw_10s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KMMT_raw_10s.daylight)))-print(sum(is.na(KMMT_raw_10s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KMMT_raw_10s.daylight)

KMMT_raw_20s <- KMMT%>%
  filter(between(datetime,"2020-01-01 00:00:00","2022-10-01 23:00:00"))
print((print((nrow(KMMT_raw_20s)))-print(sum(is.na(KMMT_raw_20s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KMMT_raw_20s)

KMMT_raw_20s.daylight <- KMMT_raw_20s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KMMT_raw_20s.daylight)))-print(sum(is.na(KMMT_raw_20s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KMMT_raw_20s.daylight)

KMMT_raw_last30 <- KMMT%>%
  filter(datetime>"1992-10-01 00:00:00")
print((print((nrow(KMMT_raw_last30)))-print(sum(is.na(KMMT_raw_last30$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KMMT_raw_last30)
```
###Data interpolation and wrangle
```{r}
KMMT <- KMMT%>%
  mutate('Temp.C' = ((Temperature..F.-32)*(5/9)))%>%
  mutate('Dewpoint.Temp.C' = (Dewpoint.Temperature..F.-32)*(5/9))%>%
  mutate('Heat.Index.C'=((Heat.Index..F.-32)*(5/9)))%>%
  mutate('WBGT.C'=((Derived.Wet.Bulb.Globe.Temperature..F.-32)*(5/9)))%>%
  rename('Temp.F' = Temperature..F.)%>%
  rename('Dewpoint.Temp.F'=Dewpoint.Temperature..F.)%>%
  rename('Heat.Index.F'=Heat.Index..F.)%>%
  rename("WBGT.F"=Derived.Wet.Bulb.Globe.Temperature..F.)%>%
  mutate("Hour"=hour(datetime))%>%
  mutate("Day"=day(datetime))%>%
  mutate("Month"=month(datetime))%>%
  mutate("Year"=year(datetime))%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(WBGT.C = zoo::na.approx(WBGT.C, maxgap=3))%>%
  mutate(Temp.C = zoo::na.approx(Temp.C, maxgap=3))

##CREATE DECADE COLUMN
KMMT <- KMMT%>%
  mutate(Decade=floor(Year/10)*10)
sum(is.na(KMMT$Temp.C))  
sum(is.na(KMMT$WBGT.C))
550056-sum(is.na(KMMT$WBGT.C))
```
## WRANGLE DATA & REPLACE WITH AVERAGE DATA VALUES
###1960-1970
```{r}
KMMT_60s_Hour_Avg <- KMMT%>%
  filter(Decade=="1960"|Decade=="1970")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation, to use as replacement for missing observations.

KMMT_60s <- KMMT%>%
  filter(Decade=="1960")%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KMMT_60s <- full_join(KMMT_60s,KMMT_60s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KMMT_90s)

KMMT_60s <- mutate(KMMT_60s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces NA values in 'x' column with the averaged from 'y' column

KMMT_60s <- mutate(KMMT_60s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KMMT_60s <- KMMT_60s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)

```
####1970s
```{r}
##1970s 20% missing (after maxgap=3)
##Data Prep
KMMT_70s_Hour_Avg <- KMMT%>%
  filter(Decade=="1960"|Decade=="1970"|Decade=="1980")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KMMT_70s <- KMMT%>%
  filter(Decade=="1970")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

KMMT_70s <- full_join(KMMT_70s,KMMT_70s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KMMT_90s)

KMMT_70s <- mutate(KMMT_70s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KMMT_70s <- mutate(KMMT_70s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KMMT_70s <- KMMT_70s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####1980
```{r}
##1980s - no missing values
KMMT_80s_Hour_Avg <- KMMT%>%
  filter(Decade=="1970"|Decade=="1980"|Decade=="1990")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KMMT_80s <- KMMT%>%
  filter(Decade=="1980")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KMMT_80s <- full_join(KMMT_80s,KMMT_80s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KMMT_90s)

KMMT_80s <- mutate(KMMT_80s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KMMT_80s <- mutate(KMMT_80s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KMMT_80s <- KMMT_80s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####1990
```{r}
##1990s
KMMT_90s_Hour_Avg <- KMMT%>%
  filter(Decade=="1980"|Decade=="1990"|Decade=="2000")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KMMT_90s <- KMMT%>%
  filter(Decade=="1990")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

KMMT_90s <- full_join(KMMT_90s,KMMT_90s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KMMT_90s)

KMMT_90s <- mutate(KMMT_90s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KMMT_90s <- mutate(KMMT_90s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KMMT_90s <- KMMT_90s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####2000s 
```{r}
##2000  ##Almost all the NAs are in 2002 and 2003, with 2001 having some and 2008 having a handful.
KMMT_00s_Hour_Avg <- KMMT%>%
  filter(Decade=="1990"|Decade=="2000"|Decade=="2010")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KMMT_00s <- KMMT%>%
  filter(Decade=="2000")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KMMT_00s <- full_join(KMMT_00s,KMMT_00s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KMMT_00s)

KMMT_00s <- mutate(KMMT_00s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KMMT_00s <- mutate(KMMT_00s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KMMT_00s <- KMMT_00s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####2010
```{r}
##2010
KMMT_10s_Hour_Avg <- KMMT%>%
  filter(Decade=="2000"|Decade=="2010"|Decade=="2020")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KMMT_10s <- KMMT%>%
  filter(Decade=="2010")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

#view(KMMT_10s)
KMMT_10s <- full_join(KMMT_10s,KMMT_10s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KMMT_00s)

KMMT_10s <- mutate(KMMT_10s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KMMT_10s <- mutate(KMMT_10s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KMMT_10s <- KMMT_10s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
#### 2020 DECADE, EXTRAPOLATED OUT FOR 2020-2029
```{r}
KMMT_20s_Hour_Avg <- KMMT%>%
  filter(Decade=="2020"|Decade=="2010")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KMMT_20s <- KMMT%>%
  filter(Decade=="2020")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:24093))

KMMT_20s <- full_join(KMMT_20s,KMMT_20s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KMMT_20s)

KMMT_20s <- mutate(KMMT_20s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KMMT_20s <- mutate(KMMT_20s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KMMT_20s <- KMMT_20s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
###Combine all chunks to new KMMT data set with the interpolated values. Only 22 NAs now.
```{r}
KMMT_Wrangled <- rbind(KMMT_60s,KMMT_70s,KMMT_80s,KMMT_90s,KMMT_00s,KMMT_10s,KMMT_20s) %>%
  #rename(WBGT.C = Ave_WBGT.C)%>%
  rename(date=Date)
sum(is.na(KMMT_Wrangled$Ave_Temp.C))  ##28 still missing
sum(is.na(KMMT_Wrangled$Ave_WBGT.C))  ##22 still missing
550056-sum(is.na(KMMT_Wrangled$Ave_WBGT.C))

missing_values <- c(sum(is.na(KMMT_60s$Ave_WBGT.C)),sum(is.na(KMMT_70s$Ave_WBGT.C)),sum(is.na(KMMT_80s$Ave_WBGT.C)),sum(is.na(KMMT_90s$Ave_WBGT.C)),sum(is.na(KMMT_00s$Ave_WBGT.C)),sum(is.na(KMMT_10s$Ave_WBGT.C)))
print(missing_values) ##shows which decades the remaining missing values come from
```
###Create a df with daily values
```{r}
KMMT_Daily_Values.C <- KMMT_Wrangled%>%
  group_by(date)%>%
  dplyr::summarise(daily_high = max(Ave_Temp.C),daily_low = min(Ave_Temp.C),daily_avg = mean(Ave_Temp.C),daily_high_WBGT = max(Ave_WBGT.C),daily_low_WBGT = min(Ave_WBGT.C),daily_avg_WBGT = mean(Ave_WBGT.C))%>%
  as.data.frame()
```

###RED and BLACK Count
#### Start Red and Black Flag Day Analysis
```{r}
#count heat cat 4 or 5
KMMT_REDorABOVEbyYear <- KMMT_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Red = n)
#count heat cat 5
KMMT_BLACKorABOVEbyYear <- KMMT_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Black = n)

##For last 40 years - Filtering by Red or Above will include Black days - Hazard Day
KMMT_AboveRed.last40 <- KMMT_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Above_Red = n)%>%
  mutate(Day_Above_Red =ifelse(Day_Above_Red == '',1,1))%>%
  mutate(Julian=yday(date))%>%
  filter(date>="1982-01-01"&date<="2021-12-31")%>%
  mutate(year=lubridate::year(date))

##Filters for Green and above - Caution Day
KMMT_AboveGreen.last40 <- KMMT_Wrangled%>%
  subset(between(Ave_WBGT.C,Heat_Categories$Category_Temp_Min.C[1],Heat_Categories$Category_Temp_Min.C[2]))%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Green_or_Yellow = n)%>%
  mutate(Day_Green_or_Yellow =ifelse(Day_Green_or_Yellow == '',1,1))%>%
  mutate(Julian=yday(date))%>%
  filter(date>="1982-01-01"&date<="2021-12-31")%>%
  mutate(year=lubridate::year(date))
```
##Create DF with all Flag Day counts only
these are days at are at RED OR ABOVE or are days following RED OR ABOVE that are also GREEN OR ABOVE
```{r}
##Create a df that has all days at/above red OR days at/above green following a red+ day
KMMT_Flag_Days.last40 <- full_join(KMMT_AboveRed.last40,KMMT_AboveGreen.last40,by="date")
KMMT_Flag_Days.last40 <- KMMT_Flag_Days.last40%>%
  select(date,Day_Above_Red,Day_Green_or_Yellow,Julian.y,year.y)%>%
    mutate(Julian.y=yday(date))%>%
    mutate(year.y=lubridate::year(date))
KMMT_Flag_Days.last40 <- replace(KMMT_Flag_Days.last40,is.na(KMMT_Flag_Days.last40),0)

#view(KMMT_Flag_Days.last40)

####YOU STOPPED HERE ON 28-MAR-2023 trying to make a script that counts days above red or days above green following a day above red
KMMT_Flag_Days.last40 <-  KMMT_Flag_Days.last40%>%arrange(ymd(KMMT_Flag_Days.last40$date)) ##this line gets the df in date order for the following loop
KMMT_Flag_Days.last40$count <- c(1:nrow(KMMT_Flag_Days.last40))

KMMT_Flag_Days.last40$Lagged <- lag(KMMT_Flag_Days.last40$Day_Above_Red>0,1)

KMMT_Caution_Days.last40 <- KMMT_Flag_Days.last40%>%
  filter(Lagged==TRUE | Day_Above_Red==1)

#view(KMMT_Flag_Days.last40)
#view(KMMT_Caution_Days.last40)
```

##TIME SERIES
### Seasonal and trend plots - these are used to show the trend plots. Next section provides the values used in the study.
```{r}
###Force a best guess fit to any remaining missing data
print(sum(is.na(KMMT_Daily_Values.C)))

KMMT_Daily_Values.C <- KMMT_Daily_Values.C%>%
  mutate(daily_high = zoo::na.approx(daily_high))%>%
  mutate(daily_avg_WBGT = zoo::na.approx(daily_avg_WBGT))%>%
  mutate(daily_low = zoo::na.approx(daily_low))%>%
  mutate(daily_low_WBGT = zoo::na.approx(daily_low_WBGT))%>%
  mutate(daily_avg = zoo::na.approx(daily_avg))%>%
  mutate(daily_avg_WBGT=zoo::na.approx(daily_avg_WBGT))%>%
  mutate(daily_high_WBGT=zoo::na.approx(daily_high_WBGT))
sum(is.na(KMMT_Daily_Values.C$Ave_Temp.C))  ##28 still missing
sum(is.na(KMMT_Daily_Values.C$Ave_WBGT.C))  ##22 still missing

####TIME SERIES ANALYSIS ON THE DATASET
##Daily HIGH
KMMT_Daily_High.TS <- ts(KMMT_Daily_Values.C$daily_high, start = c(1960,01,01),frequency=365)
KMMT_Daily_High.decomp <- stl(KMMT_Daily_High.TS,s.window="periodic")
plot(KMMT_Daily_High.decomp)
KMMT_Daily_High_Trend <- Kendall::SeasonalMannKendall(KMMT_Daily_High.TS)
summary(KMMT_Daily_High_Trend)

##Daily LOW
KMMT_Daily_Low.TS <- ts(KMMT_Daily_Values.C$daily_low, start = c(1960,01,01),frequency=365)
KMMT_Daily_Low.decomp <- stl(KMMT_Daily_Low.TS,s.window="periodic")
plot(KMMT_Daily_Low.decomp)
KMMT_Daily_Low_Trend <- Kendall::SeasonalMannKendall(KMMT_Daily_Low.TS)
summary(KMMT_Daily_Low_Trend)

##Daily AVG
KMMT_Daily_Avg.TS <- ts(KMMT_Daily_Values.C$daily_avg, start = c(1960,01,01),frequency=365)
KMMT_Daily_Avg.decomp <- stl(KMMT_Daily_Avg.TS,s.window="periodic")
plot(KMMT_Daily_Avg.decomp)
KMMT_Daily_Avg_Trend <- Kendall::SeasonalMannKendall(KMMT_Daily_Avg.TS)
summary(KMMT_Daily_Avg_Trend)

##WBGT
##WBGT.High
KMMT_Daily_High_WBGT.TS <- ts(KMMT_Daily_Values.C$daily_high_WBGT, start = c(1960,01,01),frequency=365)
KMMT_Daily_High_WBGT.decomp <- stl(KMMT_Daily_High_WBGT.TS,s.window="periodic")
plot(KMMT_Daily_High_WBGT.decomp)
KMMT_Daily_High_WBGT_Trend <- Kendall::SeasonalMannKendall(KMMT_Daily_High_WBGT.TS)
summary(KMMT_Daily_High_WBGT_Trend)

##WBGT Low
KMMT_Daily_low_WBGT.TS <- ts(KMMT_Daily_Values.C$daily_low_WBGT, start = c(1960,01,01),frequency=365)
KMMT_Daily_low_WBGT.decomp <- stl(KMMT_Daily_low_WBGT.TS,s.window="periodic")
plot(KMMT_Daily_low_WBGT.decomp)
KMMT_Daily_low_WBGT_Trend <- Kendall::SeasonalMannKendall(KMMT_Daily_low_WBGT.TS)
summary(KMMT_Daily_low_WBGT_Trend)

##WBGT Avg
KMMT_Daily_avg_WBGT.TS <- ts(KMMT_Daily_Values.C$daily_avg_WBGT, start = c(1960,01,01),frequency=365)
KMMT_Daily_avg_WBGT.decomp <- stl(KMMT_Daily_avg_WBGT.TS,s.window="periodic")
plot(KMMT_Daily_avg_WBGT.decomp)
KMMT_Daily_avg_WBGT_Trend <- Kendall::SeasonalMannKendall(KMMT_Daily_avg_WBGT.TS)
summary(KMMT_Daily_avg_WBGT_Trend)
```
### Mann Kendall Seasonal Values
```{r}
##RUN THE MANN KENDALL TEST - WBGT TEMPERATURE - MAX DAILY HIGH
#Set up month and year columns
KMMT_Daily_Values.C$month <- month(ymd(KMMT_Daily_Values.C$date))
KMMT_Daily_Values.C$year <- year(ymd(KMMT_Daily_Values.C$date))

###WBGT
##HIGH WBGT
KMMT_Daily_High_WBGT.C_MannKendall <- kendallSeasonalTrendTest(daily_high_WBGT ~ month+year, data=KMMT_Daily_Values.C)
#return just tau, slope, and intercept
KMMT_Daily_High_WBGT.C_MannKendall$estimate
##LOW WBGT
KMMT_Daily_Low_WBGT.C_MannKendall <- kendallSeasonalTrendTest(daily_low_WBGT ~ month+year, data=KMMT_Daily_Values.C)
KMMT_Daily_Low_WBGT.C_MannKendall$estimate
##AVG WBGT
KMMT_Daily_Avg_WBGT.C_MannKendall <- kendallSeasonalTrendTest(daily_avg_WBGT ~ month+year, data=KMMT_Daily_Values.C)
KMMT_Daily_Avg_WBGT.C_MannKendall$estimate

###Temperature
##HIGH 
KMMT_Daily_High.C_MannKendall <- kendallSeasonalTrendTest(daily_high ~ month+year, data=KMMT_Daily_Values.C)
#return just tau, slope, and intercept
KMMT_Daily_High.C_MannKendall$estimate
##LOW WBGT
KMMT_Daily_Low.C_MannKendall <- kendallSeasonalTrendTest(daily_low ~ month+year, data=KMMT_Daily_Values.C)
KMMT_Daily_Low.C_MannKendall$estimate
##AVG WBGT
KMMT_Daily_Avg.C_MannKendall <- kendallSeasonalTrendTest(daily_avg ~ month+year, data=KMMT_Daily_Values.C)
KMMT_Daily_Avg.C_MannKendall$estimate
##Build data frame with the needed information
KMMT_Daily_TS_estimates.C <- KMMT_Daily_High.C_MannKendall$estimate%>%
  as.data.frame()
KMMT_Daily_TS_estimates.C$Low <- as.data.frame(KMMT_Daily_Low.C_MannKendall$estimate)
KMMT_Daily_TS_estimates.C$Avg <- as.data.frame(KMMT_Daily_Avg.C_MannKendall$estimate)
KMMT_Daily_TS_estimates.C$High <- as.data.frame(KMMT_Daily_High.C_MannKendall$estimate)
```
##quartiles
###Bottom quarter trend test
```{r}
##clean data for quartile trend analysis
print(sum(is.na(KMMT_Wrangled))) ##214 missing values in data set, 106 each temp and WBGT

KMMT_Wrangled <- KMMT_Wrangled%>%
  mutate(Ave_Temp.C = zoo::na.approx(Ave_Temp.C))%>%
  mutate(Ave_WBGT.C = zoo::na.approx(Ave_WBGT.C))

##Temp
KMMT_quartile_test_lower25 <- KMMT_Wrangled%>%
  group_by(date)%>%
  reframe(qs=quantile(Ave_Temp.C,c(0.25,0.75)),prob=c(0.25,0.75))%>%
  filter(prob==0.25)

KMMT_quartile_test_lower25$month <- month(ymd(KMMT_quartile_test_lower25$date))
KMMT_quartile_test_lower25$year <- year(ymd(KMMT_quartile_test_lower25$date))

##Bottom Quarter Temp
KMMT_quartile_test_lower25_MannKendall <- kendallSeasonalTrendTest(qs ~ month+year, data=KMMT_quartile_test_lower25)
KMMT_quartile_test_lower25_MannKendall$estimate

##returns a small but not 0 positive trend in temperature

##WBGT
KMMT_quartile_test_lower25_WBGT <- KMMT_Wrangled%>%
  group_by(date)%>%
  reframe(qs=quantile(Ave_WBGT.C,c(0.25,0.75)),prob=c(0.25,0.75))%>%
  filter(prob==0.25)

KMMT_quartile_test_lower25_WBGT$month <- month(ymd(KMMT_quartile_test_lower25_WBGT$date))
KMMT_quartile_test_lower25_WBGT$year <- year(ymd(KMMT_quartile_test_lower25_WBGT$date))

##Bottom Quarter Temp
KMMT_quartile_test_lower25_WBGT <- kendallSeasonalTrendTest(qs ~ month+year, data=KMMT_quartile_test_lower25_WBGT)
KMMT_quartile_test_lower25_WBGT$estimate
##returns a small but not 0 positive trend in temperature
```
###Upper quarter trend test
```{r}
##Temp
KMMT_quartile_test_upper25 <- KMMT_Wrangled%>%
  group_by(date)%>%
  reframe(qs=quantile(Ave_Temp.C,c(0.25,0.75)),prob=c(0.25,0.75))%>%
  filter(prob==0.75)

KMMT_quartile_test_upper25$month <- month(ymd(KMMT_quartile_test_upper25$date))
KMMT_quartile_test_upper25$year <- year(ymd(KMMT_quartile_test_upper25$date))

##Upper Quarter Temp
KMMT_quartile_test_upper25_MannKendall <- kendallSeasonalTrendTest(qs ~ month+year, data=KMMT_quartile_test_upper25)
KMMT_quartile_test_upper25_MannKendall$estimate

##returns a small but not 0 positive trend in temperature

##WBGT
KMMT_quartile_test_upper25_WBGT <- KMMT_Wrangled%>%
  group_by(date)%>%
  reframe(qs=quantile(Ave_WBGT.C,c(0.25,0.75)),prob=c(0.25,0.75))%>%
  filter(prob==0.75)

KMMT_quartile_test_upper25_WBGT$month <- month(ymd(KMMT_quartile_test_upper25_WBGT$date))
KMMT_quartile_test_upper25_WBGT$year <- year(ymd(KMMT_quartile_test_upper25_WBGT$date))

##Upper Quarter Temp
KMMT_quartile_test_upper25_WBGT <- kendallSeasonalTrendTest(qs ~ month+year, data=KMMT_quartile_test_upper25_WBGT)
KMMT_quartile_test_upper25_WBGT$estimate
```
##time series full year data tables
```{r}
##Build data frame with the trend information - WBGT
KMMT_Daily_TS_estimates.C <- KMMT_Daily_High.C_MannKendall$estimate%>%
  as.data.frame()
KMMT_Daily_TS_estimates.C$Low <- as.data.frame(KMMT_Daily_Low.C_MannKendall$estimate)
KMMT_Daily_TS_estimates.C$Avg <- as.data.frame(KMMT_Daily_Avg.C_MannKendall$estimate)
KMMT_Daily_TS_estimates.C$Upper <- as.data.frame(KMMT_quartile_test_upper25_MannKendall$estimate)
KMMT_Daily_TS_estimates.C$Lower <- as.data.frame(KMMT_quartile_test_lower25_MannKendall$estimate)
KMMT_Daily_TS_estimates.C$High <- as.data.frame(KMMT_Daily_High.C_MannKendall$estimate)
KMMT_Daily_TS_estimates.C <-  as.data.frame(KMMT_Daily_TS_estimates.C[2:6])

##Build data frame with the trend information - WBGT
KMMT_Daily_TS_estimates_WBGT.C <- KMMT_Daily_High_WBGT.C_MannKendall$estimate%>%
  as.data.frame()
KMMT_Daily_TS_estimates_WBGT.C$Low_WBGT <- as.data.frame(KMMT_Daily_Low_WBGT.C_MannKendall$estimate)
KMMT_Daily_TS_estimates_WBGT.C$Avg_WBGT <- as.data.frame(KMMT_Daily_Avg_WBGT.C_MannKendall$estimate)
KMMT_Daily_TS_estimates_WBGT.C$Upper_WBGT <- as.data.frame(KMMT_quartile_test_upper25_WBGT$estimate)
KMMT_Daily_TS_estimates_WBGT.C$Lower_WBGT <- as.data.frame(KMMT_quartile_test_lower25_WBGT$estimate)
KMMT_Daily_TS_estimates_WBGT.C$High_WBGT <- as.data.frame(KMMT_Daily_High_WBGT.C_MannKendall$estimate)
KMMT_Daily_TS_estimates_WBGT.C[2:6]

KMMT_full_year_trends.C <- as.data.frame(c(KMMT_Daily_High.C_MannKendall$estimate[2],KMMT_quartile_test_upper25_MannKendall$estimate[2],KMMT_Daily_Avg.C_MannKendall$estimate[2],KMMT_quartile_test_lower25_MannKendall$estimate[2],KMMT_Daily_Low.C_MannKendall$estimate[2]))
KMMT_full_year_trends.C$installation <- "FJSC"
KMMT_full_year_trends.C$Metric <- c("Maximum","Upper 75%","Mean","Lower 25%","Minimum")
colnames(KMMT_full_year_trends.C)[1] <- 'Temp.C'
print(KMMT_full_year_trends.C)

KMMT_full_year_trends_WBGT.C <- as.data.frame(c(KMMT_Daily_High_WBGT.C_MannKendall$estimate[2],KMMT_quartile_test_upper25_WBGT$estimate[2],KMMT_Daily_Avg_WBGT.C_MannKendall$estimate[2],KMMT_quartile_test_lower25_WBGT$estimate[2],KMMT_Daily_Low_WBGT.C_MannKendall$estimate[2]))
KMMT_full_year_trends.C$installation <- "FJSC"
KMMT_full_year_trends.C$Metric <- c("Maximum","Upper 75%","Mean","Lower 25%","Minimum")
colnames(KMMT_full_year_trends_WBGT.C)[1] <- 'WBGT.C'
print(KMMT_full_year_trends_WBGT.C)

```
##warm month season trends
```{r}
#using the previous calculated monthly data, select for only warm season months 
KMMT_warm_season_trends.C <- as.data.frame(c(mean(KMMT_Daily_High.C_MannKendall$seasonal.estimates[5:9,2]),mean(KMMT_quartile_test_upper25_MannKendall$seasonal.estimates[5:9,2]),mean(KMMT_Daily_Avg.C_MannKendall$seasonal.estimates[5:9,2]),mean(KMMT_quartile_test_lower25_MannKendall$seasonal.estimates[5:9,2]),mean(KMMT_Daily_Low.C_MannKendall$seasonal.estimates[5:9,2])))
KMMT_warm_season_trends.C$installation <- "FJSC"
KMMT_warm_season_trends.C$Metric <- c("Maximum","Upper 75%","Mean","Lower 25%","Minimum")
colnames(KMMT_warm_season_trends.C)[1] <- 'Temp.C_Warm.season'
print(KMMT_warm_season_trends.C)

KMMT_warm_season_trends_WBGT.C <- as.data.frame(c(mean(KMMT_Daily_High_WBGT.C_MannKendall$seasonal.estimates[5:9,2]),mean(KMMT_quartile_test_upper25_WBGT$seasonal.estimates[5:9,2]),mean(KMMT_Daily_Avg_WBGT.C_MannKendall$seasonal.estimates[5:9,2]),mean(KMMT_quartile_test_lower25_WBGT$seasonal.estimates[5:9,2]),mean(KMMT_Daily_Low_WBGT.C_MannKendall$seasonal.estimates[5:9,2])))
KMMT_warm_season_trends_WBGT.C$installation <- "FJSC"
KMMT_warm_season_trends_WBGT.C$Metric <- c("Maximum","Upper 75%","Mean","Lower 25%","Minimum")
colnames(KMMT_warm_season_trends_WBGT.C)[1] <- 'WBGT.C_Warm.season'
print(KMMT_warm_season_trends_WBGT.C)
```
##full installation trend table
```{r}
FJSC_Historical_Trends <- cbind(KMMT_full_year_trends.C,KMMT_full_year_trends_WBGT.C,KMMT_warm_season_trends.C,KMMT_warm_season_trends_WBGT.C)

FJSC_Historical_Trends <- FJSC_Historical_Trends%>%
  select(c(1:5,8))%>%
  select(installation,Metric,Temp.C,WBGT.C,Temp.C_Warm.season,WBGT.C_Warm.season)

#view(FJSC_Historical_Trends)
```

#KCUB
##KCUB is the adjactent station to KMMT. Used to compare trends to ensure confidence in the KMMT data. Remove the "eval=FALSE" to run this script; currently turned off to save computing effort.
#### INITIAL DATA LOAD AND SET UP
```{r eval=FALSE}
#this data set is available on the NOAA website. 
KCUB <- read_csv("/Users/erikpatton/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/Raw Data/KCUB_Raw.csv")

KCUB <- KCUB %>%
  mutate('datetime' = make_datetime(year=Year, month = Month, day = Day, hour = Hour..UTC.))%>%
  pad()

#%>%
#  mutate('date' = make_date(year=Year, month = Month, day = Day))

KCUB <- KCUB%>%
  mutate('Temp.C' = ((Temperature..F.-32)*(5/9)))%>%
  mutate('Dewpoint.Temp.C' = (Dewpoint.Temperature..F.-32)*(5/9))%>%
  mutate('Heat.Index.C'=((Heat.Index..F.-32)*(5/9)))%>%
  mutate('WBGT.C'=((Derived.Wet.Bulb.Globe.Temperature..F.-32)*(5/9)))%>%
  rename('Temp.F' = Temperature..F.)%>%
  rename('Dewpoint.Temp.F'=Dewpoint.Temperature..F.)%>%
  rename('Heat.Index.F'=Heat.Index..F.)%>%
  rename("WBGT.F"=Derived.Wet.Bulb.Globe.Temperature..F.)%>%
  mutate("Hour"=hour(datetime))%>%
  mutate("Day"=day(datetime))%>%
  mutate("Month"=month(datetime))%>%
  mutate("Year"=year(datetime))%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(WBGT.C = zoo::na.approx(WBGT.C, maxgap=3))

##CREATE DECADE COLUMN
KCUB <- KCUB%>%
  mutate(Decade=floor(Year/10)*10)
```
## WRANGLE DATA & REPLACE WITH AVERAGE DATA VALUES
###DECADES
####1970s
```{r eval=FALSE}
##1970s - no missing values
KCUB_70s_Hour_Avg <- KCUB%>%
  filter(Decade=="1980")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KCUB_70s <- KCUB%>%
  filter(Decade=="1970")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:5572))

KCUB_70s <- full_join(KCUB_70s,KCUB_70s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KCUB_90s)

KCUB_70s <- mutate(KCUB_70s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KCUB_70s <- mutate(KCUB_70s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KCUB_70s <- KCUB_70s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####1980
```{r  eval=FALSE}
##1980s - no missing values
KCUB_80s_Hour_Avg <- KCUB%>%
  filter(Decade=="1980")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KCUB_80s <- KCUB%>%
  filter(Decade=="1980")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KCUB_80s <- full_join(KCUB_80s,KCUB_80s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KCUB_90s)

KCUB_80s <- mutate(KCUB_80s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KCUB_80s <- mutate(KCUB_80s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KCUB_80s <- KCUB_80s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####1990
```{r eval=FALSE}
##1990s
KCUB_90s_Hour_Avg <- KCUB%>%
  filter(Decade=="1990")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KCUB_90s <- KCUB%>%
  filter(Decade=="1990")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

KCUB_90s <- full_join(KCUB_90s,KCUB_90s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KCUB_90s)

KCUB_90s <- mutate(KCUB_90s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KCUB_90s <- mutate(KCUB_90s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KCUB_90s <- KCUB_90s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####2000s 
```{r eval=FALSE}
##2000  ##Almost all the NAs are in 2002 and 2003, with 2001 having some and 2008 having a handful.
KCUB_00s_Hour_Avg <- KCUB%>%
  filter(Decade=="2000")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KCUB_00s <- KCUB%>%
  filter(Decade=="2000")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KCUB_00s <- full_join(KCUB_00s,KCUB_00s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KCUB_00s)

KCUB_00s <- mutate(KCUB_00s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KCUB_00s <- mutate(KCUB_00s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KCUB_00s <- KCUB_00s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####2010
```{r eval=FALSE}
##2010
KCUB_10s_Hour_Avg <- KCUB%>%
  filter(Decade=="2010")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KCUB_10s <- KCUB%>%
  filter(Decade=="2010")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

KCUB_10s <- full_join(KCUB_10s,KCUB_10s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KCUB_10s)

KCUB_10s <- mutate(KCUB_10s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KCUB_10s <- mutate(KCUB_10s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KCUB_10s <- KCUB_10s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)

```

#### 2020 DECADE, EXTRAPOLATED OUT FOR 2020-2029
```{r eval=FALSE}
KCUB_20s_Hour_Avg <- KCUB%>%
  filter(Decade=="2020")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KCUB_20s <- KCUB%>%
  filter(Decade=="2020")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:24096))

KCUB_20s <- full_join(KCUB_20s,KCUB_20s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KCUB_20s)

KCUB_20s <- mutate(KCUB_20s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KCUB_20s <- mutate(KCUB_20s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KCUB_20s <- KCUB_20s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
###Combine all chunks to new KCUB data set with the interpolated values. 
```{r eval=FALSE}
KCUB_Wrangled <- rbind(KCUB_80s,KCUB_90s,KCUB_00s,KCUB_10s,KCUB_20s) %>%
  #rename(WBGT.C = Ave_WBGT.C)%>%
  rename(date=Date)
#sum(is.na(KCUB_Wrangled$Ave_Temp.C))  ##28 still missing
#sum(is.na(KCUB_Wrangled$Ave_WBGT.C))  ##22 still missing
```
##CREATE SINGLE DAY HIGHS AND LOWS
```{r eval=FALSE}
#Create single day high temp
KCUB_DAILY_HIGH.C <- KCUB_Wrangled%>%
  group_by(date)%>%
  dplyr::summarise(daily_high = max(Ave_Temp.C))%>%
  as.data.frame()
#Create single day low temp
KCUB_DAILY_LOW.C <- KCUB_Wrangled%>%
  group_by(date)%>%
  dplyr::summarise(daily_LOW = min(Ave_Temp.C))%>%
  as.data.frame()
#Create single day high WBGT temp
KCUB_DAILY_HIGH_WBGT.C <- KCUB_Wrangled%>%
  group_by(date)%>%
  dplyr::summarise(daily_high_WBGT = max(Ave_WBGT.C))%>%
  as.data.frame()
#Create single day low WBGT temp
```
### Look for missing dates in the complete data set
No missing dates!
```{r eval=FALSE}
FullSeq <- seq.Date(from = min(KCUB_Wrangled$date), to = max(KCUB_Wrangled$date), by = 1)
Missing <- FullSeq[!FullSeq %in% KCUB_Wrangled$date]
Missing
```
###RED and BLACK Count
#### Start Red and Black Flag Day Analysis
```{r eval=FALSE}
KCUB_REDorABOVEbyYear <- KCUB_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Red = n)

KCUB_BLACKorABOVEbyYear <- KCUB_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Black = n)

##Plots hours Red or Above
KCUB_REDorABOVEbyYear.plot <- ggplot(KCUB_REDorABOVEbyYear, aes(x = Year, y=Hours_Above_Red))+
  geom_line()+
  geom_point()
```
# KMMT vs KCUB COMPARISON
```{r eval=FALSE}
##CONDUCT A CORRELATION TEST BETWEEN THE TWO SITES TO SEE IF TRENDS MOVE IN THE SAME DIRECTION 
##IF THEY DO, WE CAN SAY THAT THE TWO SITES ARE SHOWING THE SAME GENERAL CLIMATE TRENDS ACROSS YEARS
##NULL HYPOTHESIS IS THEY DO NOT MOVE IN THE SAME DIRECTION, AND THEREFORE WE HAVE LOWER CONFIDENCE IN USING THEM
KCUB_WBGT_REDorABOVE_Hourly_byYear <- KCUB_Wrangled%>%
  filter(date>='1993-01-01')%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Red = n)
KMMT_WBGT_REDorABOVE_Hourly_byYear <- KMMT_Wrangled%>%
  filter(date>='1993-01-01')%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Red = n)
##CHECK FOR NORMAL DISTRIBUTION TO THE POINTS
##Shapiro test, if P>0.05, then it IS normally distributed. In this case, hourly summary of KCUB is not normally distributed.
shapiro.test(KCUB_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red)
shapiro.test(KMMT_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red)
##Not normally distributed, so should not sure Pearson's correlation tests

##USE KENDALL RANK CORRELATION TEST - can be used if not from normal distribution
cor.test(KCUB_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red,KMMT_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red,method="kendall")
##Shows positive correlation between the 2 sites (tau=0.279) and p-value < 0.05 (0.004) ; implies correlation

##USE SPEARMAN RANK CORRELATION COEFFICIENT - can be used if data is not norma;
cor.test(KCUB_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red,KMMT_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red,method="spearman")
##All tests return positive correlation and low p-values, including tests robust to non-normal 
```
# KMMT ANALYSIS AND PLOT CREATION
```{r}
####FULL_JOIN ALL DECADES
Avg_Hourly_Decade <- full_join(KMMT_60s,KMMT_70s,by="count")%>%
  rename(Hour_1960=Hour.x,Day.1960=Day.x,Month.1960=Month.x,Year.1960=Year.x,Decade.1960=Decade.x,Ave_WBGT.C.1960=Ave_WBGT.C.x,Date.1960=Date.x)%>%
  rename(Hour_1970=Hour.y,Day.1970=Day.y,Month.1970=Month.y,Year.1970=Year.y,Decade.1970=Decade.y,Ave_WBGT.C.1970=Ave_WBGT.C.y,Date.1970=Date.y)
Avg_Hourly_Decade <-   full_join(Avg_Hourly_Decade,KMMT_80s,by="count")
Avg_Hourly_Decade <- full_join(Avg_Hourly_Decade,KMMT_90s,by="count")%>%
  rename(Hour_1980=Hour.x,Day.1980=Day.x,Month.1980=Month.x,Year.1980=Year.x,Decade.1980=Decade.x,Ave_WBGT.C.1980=Ave_WBGT.C.x,Date.1980=Date.x)%>%
  rename(Hour_1990=Hour.y,Day.1990=Day.y,Month.1990=Month.y,Year.1990=Year.y,Decade.1990=Decade.y,Ave_WBGT.C.1990=Ave_WBGT.C.y,Date.1990=Date.y)
Avg_Hourly_Decade <- full_join(Avg_Hourly_Decade,KMMT_00s,by="count")
Avg_Hourly_Decade <- full_join(Avg_Hourly_Decade,KMMT_10s,by="count")%>%
  rename(Hour_2000=Hour.x,Day.2000=Day.x,Month.2000=Month.x,Year.2000=Year.x,Decade.2000=Decade.x,Ave_WBGT.C.2000=Ave_WBGT.C.x,Date.2000=Date.x)%>%
  rename(Hour_2010=Hour.y,Day.2010=Day.y,Month.2010=Month.y,Year.2010=Year.y,Decade.2010=Decade.y,Ave_WBGT.C.2010=Ave_WBGT.C.y,Date.2010=Date.y)
Avg_Hourly_Decade <- full_join(Avg_Hourly_Decade,KMMT_20s,by="count")%>%
  rename(Hour_2020=Hour,Day.2020=Day,Month.2020=Month,Year.2020=Year,Decade.2020=Decade,Ave_WBGT.C.2020=Ave_WBGT.C,Date.2020=Date)

#Avg_Hourly_Decade$Month.1960 <- paste0("1960s",Avg_Hourly_Decade$Month.1960) #1960s not used due to the high degree of missing value replacement
Avg_Hourly_Decade$Month.1970 <- paste0("1970s",Avg_Hourly_Decade$Month.1970)
Avg_Hourly_Decade$Month.1980 <- paste0("1980s",Avg_Hourly_Decade$Month.1980)
Avg_Hourly_Decade$Month.1990 <- paste0("1990s",Avg_Hourly_Decade$Month.1990)
Avg_Hourly_Decade$Month.2000 <- paste0("2000s",Avg_Hourly_Decade$Month.2000)
Avg_Hourly_Decade$Month.2010 <- paste0("2010s",Avg_Hourly_Decade$Month.2010)
Avg_Hourly_Decade$Month.2020 <- paste0("2020s",Avg_Hourly_Decade$Month.2020)

KMMT_Avg_Summer_Hourly_Decade <- Avg_Hourly_Decade%>%
  filter(between(Month.1960,5,9))     ##Filters for just the summer months
#view(Avg_Summer_Hourly_Decade)
KMMT_Thirteen_to_Nineteen <- Avg_Hourly_Decade%>%
  filter(Year.2010>2012)%>%
  select(Month.1960,Hour_2010:Ave_Temp.C.y.y.y)%>%
  rename(Hour=Hour_2010,Day=Day.2010,Month=Month.2010,Year=Year.2010,Decade=Decade.2010,Ave_WBGT.C=Ave_WBGT.C.2010,Date=Date.2010,Ave_Temp.C=Ave_Temp.C.y.y.y)
KMMT_Zero_to_Two <- Avg_Hourly_Decade%>%
  filter(Year.2020>0)%>%
  select(Month.1960,Hour_2020:Ave_Temp.C)%>%
  rename(Hour=Hour_2020,Day=Day.2020,Month=Month.2020,Year=Year.2020,Decade=Decade.2020,Ave_WBGT.C=Ave_WBGT.C.2020,Date=Date.2020,Ave_Temp.C=Ave_Temp.C)
KMMT_Thirteen_to_Two <- rbind(KMMT_Thirteen_to_Nineteen,KMMT_Zero_to_Two)
KMMT_Thirteen_to_Two <- KMMT_Thirteen_to_Two%>%
  select(Hour,Month.1960,Hour,Ave_WBGT.C:Ave_Temp.C)%>%
  rename(Ave_WBGT_last10 = Ave_WBGT.C ,Ave_Temp_last10=Ave_Temp.C,Month_last10= Month.1960 )%>%
  filter(between(Month_last10,5,9))%>%  
  add_row(Hour = 21:23, Month_last10=9,Ave_WBGT_last10=NA,Date=NA,Ave_Temp_last10=NA)

KMMT_Avg_Summer_Hourly_Decade <- cbind(KMMT_Avg_Summer_Hourly_Decade,KMMT_Thirteen_to_Two)
```

##DENSITY PLOT AND HISTORGRAMS BY HOURLY AVERAGE BY DECADE
###DENSITY PLOT
````{r}
KMMT_Summer_DensityPlot_decades <- ggplot(KMMT_Avg_Summer_Hourly_Decade)+
  geom_density(aes(x=Ave_WBGT.C.1960,color='white'),linetype=2,linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.1970,color='purple'),linetype=2,linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.1980,color='red'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.1990,color='blue'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.2000, color='yellow'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.2010,color='green'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.2020,color='black'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT_last10 ,color='pink'),alpha=0.7,linewidth=1)+
  scale_color_discrete(name="Hourly WBGT \n Grouped by decade",
                         breaks=c("white","purple","red", "blue", "yellow","green","black","pink"),
                         labels=c("1960s","1970s","1980s", "1990s", "2000s","2010s","2020s","Last 10 Years"))+
  xlab("WBGT")+
  scale_x_continuous(limits = c(20,38),expand=c(0,0))+
  ggtitle("May - September Hourly WBGT Density",subtitle="Ft Jackson, SC")+
#  xlim(20,38)+
  theme_light()
KMMT_Summer_DensityPlot_decades
```

###USE THIS ANOMALY PLOT INSTEAD - DAYLIGHT FULL YEAR WITH FIX TO BASELINE
```{r}
##SET UP DATA
##mean values
KMMT_Anom_Avg <- KMMT_Wrangled%>%
  filter(between(Year,1960,2021),between(Hour,8,20))%>% 
  group_by(Hour, Day, Month)%>%
  dplyr::summarise(Ave_WBGT.C = mean(Ave_WBGT.C,na.rm=TRUE)) ##create a column with the average WBGT values for that day, ignoring missing data in calculation

KMMT_Anomaly <- KMMT_Wrangled%>%
  filter(between(Year,1960,2021),between(Hour,8,20))%>%
  group_by(Hour, Day, Month, Year, Decade)%>%
  dplyr::summarise(Ave_WBGT.C = mean(Ave_WBGT.C))%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:294398))

KMMT_Anomaly <- full_join(KMMT_Anomaly,KMMT_Anom_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KMMT_00s)

KMMT_Anomaly <- mutate(KMMT_Anomaly, Ave_WBGT.C.x = case_when(
  is.na(Ave_WBGT.C.x) ~Ave_WBGT.C.y,
  TRUE~Ave_WBGT.C.x
))   ##Replaces NA values in 'x' column with the averaged from 'y' column

##START ACTUAL PLOT
KMMT_Anomaly <- KMMT_Anomaly%>%
  select(Hour,Day,Month,Year,Ave_WBGT.C.x,Date,count)%>%
  rename(Ave_WBGT.C=Ave_WBGT.C.x)
KMMT_Ref <- KMMT_Anomaly%>%
  filter(between(Year,1990,2020),between(Hour,8,20))%>%
  summarise(mean=mean(Ave_WBGT.C))
KMMT_Anomaly <- KMMT_Anomaly%>%
  group_by(Year)%>%
  dplyr::summarise(Avg_Year_WBGT = mean(Ave_WBGT.C))%>%
  mutate(Anomaly = Avg_Year_WBGT-KMMT_Ref$mean)
  #mutate(Date = make_date(year=Year,month=Month))
#KMMT_Anomaly <- KMMT_Anomaly %>% arrange(ymd(KMMT_Anomaly$Date)) ##puts DF in date order

#KMMT_Anomaly$count <- c(1:nrow(KMMT_Anomaly)) ##assigns date ordered number to DF
KMMT_Monthly_Anomaly.plot <- ggplot(KMMT_Anomaly,aes(x=Year,y=Anomaly))+
  geom_line()+
  geom_smooth()+
  xlab("Year")+
  ylab("Monthly Average WBGT Anomaly\nDegrees C")+
  ggtitle("Yearly average WBGT anomaly, 1960-2022",subtitle="Ft Jackson, SC")+
  labs(caption = "Reference period = Average WBGT, 1990-2020")+
  theme_classic()+
  geom_hline(yintercept=0,linetype="dashed")
  #annotate("text",x=2010,y=-1.5,label=("Reference period = Average WBGT, 1990-2020"),size=2)+

KMMT_Monthly_Anomaly.plot
```

## Compare anomaly from KMMT to anomaly from NOAA
This section is used to validate the single point observations from station KMMT with regional trends, in order to gain confidence in the data set.
https://www.ncei.noaa.gov/access/monitoring/climate-at-a-glance/divisional/time-series/0904/tavg/ann/5/1960-2022?base_prd=true&begbaseyear=1990&endbaseyear=2020
```{r}
##SET UP DATA
##manually load in the regional anomaly trend
Avg_Temp_Anom <-  read.csv ('/Users/erikpatton/Library/Mobile Documents/com~apple~CloudDocs/R Files/JAN 2023 Data Projects/NOAA Climate Anomaly Data.csv')
CSC_Avg_Temp_Anom <- Avg_Temp_Anom$Anomaly.SW.OK
CSC_Avg_Temp_Anom <- CSC_Avg_Temp_Anom[1:62]

KMMT_Anom_Avg.C <- KMMT_Wrangled%>%
  filter(between(Year,1960,2021))%>% 
  group_by(Hour, Day, Month)%>%
  dplyr::summarise(Ave_Temp.C = mean(Ave_Temp.C,na.rm=TRUE)) ##create a column with the average WBGT values for that day, ignoring missing data in calculation

KMMT_Anomaly.C <- KMMT_Wrangled%>%
  filter(between(Year,1960,2021))%>%
  group_by(Hour, Day, Month, Year, Decade)%>%
  dplyr::summarise(Ave_Temp.C = mean(Ave_Temp.C))%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:543504))

KMMT_Anomaly.C <- full_join(KMMT_Anomaly.C,KMMT_Anom_Avg.C,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KMMT_00s)

KMMT_Anomaly.C <- mutate(KMMT_Anomaly.C, Ave_Temp.C.x = case_when(
  is.na(Ave_Temp.C.x) ~Ave_Temp.C.y,
  TRUE~Ave_Temp.C.x
))   ##Replaces NA values in 'x' column with the averaged from 'y' column

##START ACTUAL PLOT
KMMT_Anomaly.C <- KMMT_Anomaly.C%>%
  select(Hour,Day,Month,Year,Ave_Temp.C.x,Date,count)%>%
  rename(Ave_Temp.C=Ave_Temp.C.x)
KMMT_Ref.C <- KMMT_Anomaly.C%>%
  filter(between(Year,1990,2020))%>%
  summarise(mean=mean(Ave_Temp.C))
  
KMMT_Anomaly.C <- KMMT_Anomaly.C%>%
  group_by(Year)%>%
  dplyr::summarise(Avg_Year_Temp = mean(Ave_Temp.C))%>%
  mutate(Anomaly = Avg_Year_Temp-KMMT_Ref.C$mean)
  #mutate(Date = make_date(year=Year,month=Month))

KMMT_Anomaly.C$Regional_Anom <- CSC_Avg_Temp_Anom

KMMT_Monthly_Anomaly.C.plot <- ggplot(KMMT_Anomaly.C)+
  geom_line(aes(x=Year,y=Anomaly,color="Blue"))+
  geom_line(aes(x=Year,y=Regional_Anom,color="red"))+
  xlab("Year")+
  scale_x_continuous(breaks = c(1960,1970,1980,1990,2000,2010,2020),expand=c(0,0))+
  ylab("Average Anomaly\nDegrees C")+
  ggtitle("Yearly average temperature anomaly\nFt Jackson, SC vs Central South Carolina Average")+
  theme_classic()+
 # theme(axis.text.y=element_text(margin=margin(r=0)))+
  geom_hline(yintercept=0,linetype="dashed")+
   scale_color_identity(name="Location",breaks=c("Blue","red"),
                        labels=c("FJSC","Regional Average\n(NOAA)"),guide="legend")+
  labs(caption = "Reference period = Yearly average temp, 1990-2020")
  #annotate("text",x=2009,y=-2,label=("Reference period = Average Temperature, 1990-2020"),size=2)+

KMMT_Monthly_Anomaly.C.plot

FJSCxRegional.cor <-  cor.test(KMMT_Anomaly.C$Regional_Anom ,KMMT_Anomaly.C$Anomaly,method = "pearson")
FJSCxRegional.cor$estimate
FJSCxRegional.cor$p.value
FJSCxRegional.cor$conf.int
```
####HISTOGRAMS FULL year data used
```{r}
Summer_Histogram_1970s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.1980),color='red', fill="red",alpha=0.4,position="identity",bins=50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("1970-1979")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()  
Summer_Histogram_1980s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.1980),color='red', fill="red",alpha=0.4,position="identity",bins=50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("1980-1989")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()  
Summer_Histogram_1990s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.1990),color='red', fill="red",alpha=0.4,position="identity",bins = 50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("1990-1999")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()  
Summer_Histogram_2000s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.2000),color='red', fill="red",alpha=0.4,position="identity",bins=50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("2000-2009")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()  
Summer_Histogram_2010s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.2010),color='red', fill="red",alpha=0.4,position="identity",bins=50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("2010-2019")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  #xlim(27,36)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()
```
####BAR CHARTS (Full Year data used)
```{r}
##CREATE VECTORS OF HOURS ABOVE A CATEGORY
Green <- c(sum(KMMT_70s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,sum(KMMT_80s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1],na.rm = TRUE)/10,sum(KMMT_90s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1],na.rm = TRUE)/10,sum(KMMT_00s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,sum(KMMT_10s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,sum(KMMT_20s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/2.75)
Yellow <- c(sum(KMMT_70s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/10,sum(KMMT_80s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2],na.rm = TRUE)/10,sum(KMMT_90s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2],na.rm = TRUE)/10,sum(KMMT_00s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/10,sum(KMMT_10s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/10,sum(KMMT_20s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/2.75)
Red <- c(sum(KMMT_70s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/10,sum(KMMT_80s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3],na.rm = TRUE)/10,sum(KMMT_90s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3],na.rm = TRUE)/10,sum(KMMT_00s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3],na.rm = TRUE)/10,sum(KMMT_10s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/10,sum(KMMT_20s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/2.75)
Black <- c(sum(KMMT_70s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,sum(KMMT_80s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4],na.rm = TRUE)/10,sum(KMMT_90s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4],na.rm = TRUE)/10,sum(KMMT_00s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,sum(KMMT_10s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,sum(KMMT_20s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/2.75)
#/10 in order to get single year avg

KMMT_Hours_Flags <- as.data.frame(cbind(c("1970s","1980s","1990s","2000s","2010s","2020s"),Green,Yellow,Red,Black),stringsAsFactors = FALSE)

KMMT_Hours_Flags <-  KMMT_Hours_Flags%>%
  pivot_longer(!V1,names_to = "Heat_Category",values_to = "Hours")%>%
  as.data.frame()
KMMT_Hours_Flags$Hours <- as.integer(KMMT_Hours_Flags$Hours)
#view(KMMT_Hours_Flags)

##CREATE GRAPH
KMMT_Threshold_Barplot <- ggplot(data=KMMT_Hours_Flags, aes(x=Heat_Category,y=Hours,fill=V1)) +
  geom_bar(stat="identity",position=position_dodge())+
  geom_text(aes(label=Hours), position=position_dodge(width=0.9),hjust=-0.25,size=3)+
  scale_x_discrete(limits=c("Green","Yellow","Red","Black"))+
  ylab("Average Hours per Year")+
  xlab("Heat Category")+
  theme_classic()+
  scale_fill_brewer(name="Decade",palette = "Spectral",direction=-1,guide = guide_legend(reverse = TRUE))+
  coord_flip()+
ggtitle("Average Hours at Each Heat Catergory per Year",subtitle="Ft Jackson, SC")
KMMT_Threshold_Barplot
```
#### COMBINED PLOTS
```{r}
Summer_Months_KMMT.plot <- plot_grid(KMMT_Summer_DensityPlot_decades, KMMT_Monthly_Anomaly.plot, KMMT_Threshold_Barplot,nrow=3)
Summer_Months_KMMT.plot <- ggdraw(add_sub(Summer_Months_KMMT.plot,"Data gaps =< 3 hours interpolated. Longer gaps replaced with decade average for missing Hour, Day \n 2020s density plot for 2020-2022 only; Bar chart extrapolates full 2020s decade",size=8))
Summer_Months_KMMT.plot
```

#BCT Analysis
##Load Data
FJSC BCT Wrangle
```{r}
#BCT analysis done from ATRRS data previously converted to a .csv file. 
FJSC_BCT_23 <- read.csv("~/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/ATRRS Data/FT Jackson ATRRS Pull/FJSC_BCT_23.csv",stringsAsFactors = FALSE)

FJSC_BCT_23 <- FJSC_BCT_23%>%
  mutate_all(.funs = funs(str_trim))%>%
  select(Class,X.Report.Date,X.Start.Date,X.End.Date,X.Capacity)

FJSC_BCT_23 <-  na.omit(FJSC_BCT_23) #remove the placeholder classes with capacity=0

FJSC_BCT_23$Class <-  as.numeric(FJSC_BCT_23$Class)
FJSC_BCT_23$X.Capacity <- as.numeric(FJSC_BCT_23$X.Capacity)
FJSC_BCT_23$X.Report.Date <-  dmy(FJSC_BCT_23$X.Report.Date)
FJSC_BCT_23$X.Start.Date <-  dmy(FJSC_BCT_23$X.Start.Date)
FJSC_BCT_23$X.End.Date <- dmy(FJSC_BCT_23$X.End.Date)

FJSC_BCT_23 <- FJSC_BCT_23%>%
  rename(Report_Date = X.Report.Date, Start_Date=X.Start.Date,End_Date=X.End.Date)%>%
  mutate(Julian_Start=yday(Start_Date),Julian_End=yday(End_Date))

FJSC_BCT_23 <- FJSC_BCT_23[apply(FJSC_BCT_23!=0,1,all),] #remove any row with a numerical '0' to get rid of classes with 0 capacity

#write.csv(FJSC_BCT_23,file="~/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/ATRRS Data/FT Jackson ATRRS Pull/FJSC_BCT_23_Wrangled.csv")
```
##KMMT BCT
##Assign clean history to KMMT data 
```{r}
FJSC_BCT_23$Start_Month <- month(ymd(FJSC_BCT_23$Start_Date))
FJSC_BCT_23$End_Month <- month(ymd(FJSC_BCT_23$End_Date))
FJSC_BCT_23$Start_Day <- day(ymd(FJSC_BCT_23$Start_Date))
FJSC_BCT_23$End_Day <- day(ymd(FJSC_BCT_23$End_Date))

FJSC_BCT_23$Days_Long <- FJSC_BCT_23$End_Date-FJSC_BCT_23$Start_Date #counts number of days in each class
```

```{r}
###Averaging out the values left alone, per conversation with Luke
KMMT_Hourly_Avg.last40 <- KMMT_Wrangled%>%
  filter(date>="1982-01-01"&date<="2021-12-31")%>%
  mutate(Julian=yday(date))%>%
  group_by(Hour,Julian)

#average hourly values for the most recent climatological average of 30 years.
KMMT_Hourly_Avg.last30 <- KMMT_Wrangled%>%
  filter(date>="1992-10-01"&date<="2022-09-30")%>%
  mutate(Julian=yday(date))%>%
  group_by(Hour,Julian)

KMMT_Cat4 <- KMMT_Hourly_Avg.last30%>%
  subset(between(Ave_WBGT.C,Heat_Categories$Category_Temp_Min.C[3],Heat_Categories$Category_Temp_Min.C[4]))%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Cat4 = n)%>%
  mutate(Day_Cat4 =ifelse(Day_Cat4 == '',1,1))%>%
  mutate(Julian=yday(date))
#Days just in Black category Last 30
KMMT_Cat5 <- KMMT_Hourly_Avg.last30%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4] )%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Cat5 = n)%>%
  mutate(Day_Cat5 =ifelse(Day_Cat5 == '',1,1))%>%
  mutate(Julian=yday(date))
```
##Filter 
```{r}
##Get ATRRS Data and re-wrangled
FJSC_BCT_23_Wrangled <- FJSC_BCT_23
FJSC_BCT_23_Wrangled$Start_Date <-  ymd(FJSC_BCT_23_Wrangled$Start_Date)
FJSC_BCT_23_Wrangled$End_Date <- ymd(FJSC_BCT_23_Wrangled$End_Date)

FJSC_BCT_23_Wrangled <- FJSC_BCT_23_Wrangled%>%
  filter(between(Start_Date,"2023-03-01","2023-12-31"))
```
##Filter with ATRRS Data for course dates
Done over the last 30 years, i.e. the most recent climatological average
```{r}
FJSC_BCT_23_Wrangled$Count <- c(1:nrow(FJSC_BCT_23_Wrangled)) #add a counter column for following loop

##Loop to count average hours in each flag condition by cohort.
for (i in FJSC_BCT_23_Wrangled$Count){
  Flags <- KMMT_Hourly_Avg.last30%>%
  filter(between(Julian,FJSC_BCT_23_Wrangled$Julian_Start[i],FJSC_BCT_23_Wrangled$Julian_End[i]))
  ##divide the below by 20 because there are 20 years in this filtered data set
  FJSC_BCT_23_Wrangled$NoFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C<Heat_Categories$Category_Temp_Min.C[1])) /(30))
FJSC_BCT_23_Wrangled$GreenFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1]&Flags$Ave_WBGT.C<Heat_Categories$Category_Temp_Min.C[2])) /(30))
FJSC_BCT_23_Wrangled$YellowFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2]&Flags$Ave_WBGT.C<Heat_Categories$Category_Temp_Min.C[3])) /(30))
FJSC_BCT_23_Wrangled$RedFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3]&Flags$Ave_WBGT.C<Heat_Categories$Category_Temp_Min.C[4])) /(30))
FJSC_BCT_23_Wrangled$BlackFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])) /(30))
FJSC_BCT_23_Wrangled$Dangerous_Heat_Hours_Avg[i] <- as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])) /(30))
}

##Loop to count up total hours from NoFlag:BlackFlag as a check that all hours are accounted for
for (i in FJSC_BCT_23_Wrangled$Count){
  FJSC_BCT_23_Wrangled$TotalCourseHours[i] <- FJSC_BCT_23_Wrangled$NoFlags_Avg[i]+FJSC_BCT_23_Wrangled$GreenFlags_Avg[i]+FJSC_BCT_23_Wrangled$YellowFlags_Avg[i]+FJSC_BCT_23_Wrangled$RedFlags_Avg[i]+FJSC_BCT_23_Wrangled$BlackFlags_Avg[i]
}
view(FJSC_BCT_23_Wrangled)
```

##Create a list of Green and Yellow days that follow Red or Black Days
```{r}
##Create a df that has all days at/above red OR days at/above green following a red+ day
KMMT_Flag_Days.last40 <- full_join(KMMT_AboveRed.last40,KMMT_AboveGreen.last40,by="date")
KMMT_Flag_Days.last40 <- KMMT_Flag_Days.last40%>%
  select(date,Day_Above_Red,Day_Green_or_Yellow,Julian.y,year.y)%>%
    mutate(Julian.y=yday(date))%>%
    mutate(year.y=lubridate::year(date))
KMMT_Flag_Days.last40 <- replace(KMMT_Flag_Days.last40,is.na(KMMT_Flag_Days.last40),0)

KMMT_Flag_Days.last40 <-  KMMT_Flag_Days.last40%>%arrange(ymd(KMMT_Flag_Days.last40$date)) ##this line gets the df in date order for the following loop

KMMT_Flag_Days.last40$Lagged <- lag(KMMT_Flag_Days.last40$Day_Above_Red>0,1) ##create a column identifying days after RED or BLACK

##Create the average occurrence of "caution" day
KMMT_Caution_Days.last40 <- KMMT_Flag_Days.last40%>%
  filter(Lagged==TRUE | Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Caution Days" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Caution Days",Julian,date)
KMMT_Caution_Days.last40$Avg_Count_Caution_Days <- KMMT_Caution_Days.last40$`Caution Days`/40

##Create the average occurance of heat cat 4-5 "high heat" days
KMMT_High_Heat_Days.last40 <- KMMT_Flag_Days.last40%>%
filter(Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename(High_Heat_Days = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select(High_Heat_Days,Julian,date)
  
KMMT_High_Heat_Days.last40$High_Heat_Days <- KMMT_High_Heat_Days.last40$High_Heat_Days/40

#"Caution Days" include both high heat days and caitiopn days
KMMT_Caution_Days.last40 <- full_join(KMMT_Caution_Days.last40,KMMT_High_Heat_Days.last40,by="Julian")%>%
  select(High_Heat_Days,Avg_Count_Caution_Days,Julian,date.x)%>%
  rename(date=date.x)

KMMT_Caution_Days.last40 <- replace(KMMT_Caution_Days.last40,is.na(KMMT_Caution_Days.last40),0)

##Create last 30 years of flag days
KMMT_Flag_Days.last30 <- KMMT_Flag_Days.last40%>%
  filter(between(date,"1992-10-01","2022-09-30"))

KMMT_Caution_Days.last30 <- KMMT_Flag_Days.last30%>%
  filter(Lagged==TRUE | Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Caution Days" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Caution Days",Julian,date)
KMMT_Caution_Days.last30$Avg_Count_Caution_Days <- KMMT_Caution_Days.last30$`Caution Days`/30

##Create the average occurance of heat cat 4-5 "high heat" days
KMMT_High_Heat_Days.last30<- KMMT_Flag_Days.last30%>%
filter(Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename(High_Heat_Days = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select(High_Heat_Days,Julian,date)
  
KMMT_High_Heat_Days.last30$High_Heat_Days <- KMMT_High_Heat_Days.last30$High_Heat_Days/30

KMMT_Caution_Days.last30 <- full_join(KMMT_Caution_Days.last30,KMMT_High_Heat_Days.last30,by="Julian")%>%
  select(High_Heat_Days,Avg_Count_Caution_Days,Julian,date.x)%>%
  rename(date=date.x)

KMMT_Caution_Days.last30 <- replace(KMMT_Caution_Days.last30,is.na(KMMT_Caution_Days.last30),0)

##Create last 10 years of flag days
KMMT_Flag_Days.last10 <- KMMT_Flag_Days.last40%>%
  filter(between(date,"2012-10-01","2022-09-30"))

KMMT_Caution_Days.last10 <- KMMT_Flag_Days.last10%>%
  filter(Lagged==TRUE | Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Caution Days" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Caution Days",Julian,date)
KMMT_Caution_Days.last10$Avg_Count_Caution_Days <- KMMT_Caution_Days.last10$`Caution Days`/10

##Create the average occurance of heat cat 4-5 "high heat" days
KMMT_High_Heat_Days.last10<- KMMT_Flag_Days.last10%>%
filter(Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename(High_Heat_Days = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select(High_Heat_Days,Julian,date)
  
KMMT_High_Heat_Days.last10$High_Heat_Days <- KMMT_High_Heat_Days.last10$High_Heat_Days/10

KMMT_Caution_Days.last10 <- full_join(KMMT_Caution_Days.last10,KMMT_High_Heat_Days.last10,by="Julian")%>%
  select(High_Heat_Days,Avg_Count_Caution_Days,Julian,date.x)%>%
  rename(date=date.x)

KMMT_Caution_Days.last10 <- replace(KMMT_Caution_Days.last10,is.na(KMMT_Caution_Days.last10),0)
```
##Create a data frame of Red and Black Flag Days - last 30
```{r}
##Create a df that has all days at RED and all days at BLACK
KMMT_RedorBlack_Flag_Days.last30 <- full_join(KMMT_Cat4,KMMT_Cat5,by="date")
KMMT_RedorBlack_Flag_Days.last30 <- KMMT_RedorBlack_Flag_Days.last30%>%
  select(date,Day_Cat4,Day_Cat5)%>%
    mutate(Julian=yday(date))%>%
    mutate(year.y=lubridate::year(date))
KMMT_RedorBlack_Flag_Days.last30 <- replace(KMMT_RedorBlack_Flag_Days.last30,is.na(KMMT_RedorBlack_Flag_Days.last30),0)
```
##Plot the training cycle vs red and black flag days (Hazardous Heat per Class)
```{r}
FJSC_heat_hours.plot <-  ggplot (FJSC_BCT_23_Wrangled,aes(x=Start_Date,y=Dangerous_Heat_Hours_Avg))+
  geom_segment(aes(x=Start_Date,xend=Start_Date,y=0,yend=Dangerous_Heat_Hours_Avg))+
  geom_point(aes(size=Dangerous_Heat_Hours_Avg,fill=Dangerous_Heat_Hours_Avg),color="red",fill=alpha("red",0.3),alpha=0.7,shape=21,stroke=2)+scale_size(range=c(0.01,7),name="Hours of Hazardous\nHeat per Class")+
    theme_light() +
  geom_text(aes(label=round(Dangerous_Heat_Hours_Avg,0)), position=position_dodge(width=0.9),vjust=-2,hjust=0,size=3)+
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x = element_text(angle=30)
  )+
  scale_x_date(date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week",limits=as.Date(c("2023-03-01","2023-10-01")))+
  geom_text(aes(label=Class,y=-3), position=position_dodge(width=0.9),hjust=0,size=4,fontface='bold')+
  xlab("FJSC BCT Class Number (FY23)\nLine corresponds to class start date")+ylab("'Red' or 'Black' Hours per Class")+
  ggtitle("Average (mean) hours of hazardous heat by basic training class",subtitle="Ft Jackson, SC")+
  labs(caption = "Hours averaged from 30 year period (October 1992 - September 2022).\nClass dates from FY 2023 basic combat training schedule")
FJSC_heat_hours.plot
```
###PLOT COURSE NUMBER AND THE LIKLIHOOD THAT THE DAY IS DANGEROUS ON SAME GRAPH
```{r}
ggplot()+
   #geom_line(data=KMMT_Caution_Days.last40,aes(x=date,y=Avg_Count_Caution_Days*250))+
  geom_smooth(data=KMMT_Caution_Days.last30,aes(x=date,y=Avg_Count_Caution_Days*200))+
  scale_y_continuous(limits=c(0,200), name="Average Hazardous Heat Hours per Class",expand=c(0,0),sec.axis = sec_axis( trans=~./200, name="Probability of Cautionary Heat Day"))+
  ##second axis is divided by 200, since to plot the geom_smooth line we multiplied by 250
  geom_segment(data=FJSC_BCT_23_Wrangled,aes(x=Start_Date,xend=End_Date,y=Dangerous_Heat_Hours_Avg,yend=Dangerous_Heat_Hours_Avg))+
  geom_text(data=FJSC_BCT_23_Wrangled,aes(x=(Start_Date),y=Dangerous_Heat_Hours_Avg,label=round(Class,0)), position=position_dodge(width=0.9),vjust=-0.1,hjust=1.5,size=5,fontface='bold')+
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x = element_text(angle=30)
  )+
  scale_x_date(date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week")+
  xlab("FJSC BCT Class Number (FY23 example)")+
  ggtitle("Hours Hazardous Heat for Basic Training and probability of experiencing a cautionary heat day\nbased on FY 2023 course schedule",subtitle="Ft Jackson, SC")+
  theme_light()
```

###Rank days by high heat and by heat wave probability
```{r}
KMMT_Caution_Days.last40.sorted <- KMMT_Caution_Days.last40%>%
  mutate(High_Heat_Ranked=rank(-High_Heat_Days))%>%
  mutate(Avg_Count_Caution_Days_Ranked=rank(-Avg_Count_Caution_Days))

KMMT_Caution_Days.last40.sorted <- KMMT_Caution_Days.last40.sorted%>%
  arrange(KMMT_Caution_Days.last40.sorted$High_Heat_Ranked,KMMT_Caution_Days.last40.sorted$Avg_Count_Caution_Days_Ranked)%>%
  mutate(newcol=High_Heat_Ranked[]+Avg_Count_Caution_Days_Ranked)%>%
  mutate(ranking=c(1:nrow(KMMT_Caution_Days.last40.sorted)))%>%
  mutate(ranking_rescaled=rescale(-ranking))
##this ranking is based on the following - probability of a high heat day, probability of a cautionary heat day, and then earlier in the year. scale is used to transform the individual day to something that can be plotted on a % axis.
```

##Prior 30 year hazardous and caution probability
```{r}
KMMT_Flag_Days.1982.2011 <- KMMT_Flag_Days.last40%>%
  filter(between(date,"1982-01-01","2011-12-31"))

##Create the average occurrence of "caution" day
KMMT_Caution_Days.prev30 <- KMMT_Flag_Days.1982.2011%>%
  filter(Lagged==TRUE | Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Caution Days" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Caution Days",Julian,date)
KMMT_Caution_Days.prev30$Avg_Count_Caution_Days <- KMMT_Caution_Days.prev30$`Caution Days`/30

##Create the average occurrance of heat cat 4-5 "high heat" days
KMMT_High_Heat_Days.prev30<- KMMT_Flag_Days.1982.2011%>%
filter(Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename(High_Heat_Days = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select(High_Heat_Days,Julian,date)
  
KMMT_High_Heat_Days.prev30$High_Heat_Days <- KMMT_High_Heat_Days.prev30$High_Heat_Days/30

KMMT_Caution_Days.prev30 <- full_join(KMMT_Caution_Days.prev30,KMMT_High_Heat_Days.prev30,by="Julian")%>%
  select(High_Heat_Days,Avg_Count_Caution_Days,Julian,date.x)%>%
  rename(date=date.x)

KMMT_Caution_Days.prev30 <- replace(KMMT_Caution_Days.prev30,is.na(KMMT_Caution_Days.prev30),0)
```
###Plot the likelihood that a day is a 'Cautionary' or 'Hazardous' Heat Day
```{r}
##Cautionary Days
KMMT_Likelihood_cautionary_days.plot <- ggplot(KMMT_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=Avg_Count_Caution_Days,color='blue'),se=TRUE,fill='blue',alpha=0.3)+
    geom_smooth(data=KMMT_Caution_Days.last10,aes(x=date,y=Avg_Count_Caution_Days,color='purple'),se=TRUE,fill='purple',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("purple","blue"),labels=c("Last 10-year Avg","Previous 30-year Avg\n(1982-2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing Cautionary Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='Purple', size=16))+
  ggtitle("Cautionary heat daily probability",subtitle="Ft Jackson, SC")
KMMT_Likelihood_cautionary_days.plot

##Hazardous Days
KMMT_Likelihood_hazardous_days.plot <- ggplot(KMMT_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=High_Heat_Days,color='orange'),se=TRUE,fill='orange',alpha=0.3)+
  geom_smooth(data=KMMT_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'),se=TRUE,fill='red',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("red","orange"),labels=c("Last 10-year Avg","Previous 30-year Avg\n(1982-2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing a Hazardous Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='Red', size=16))+
  ggtitle("Hazardous heat (category 4 or 5) daily probability",subtitle="Ft Jackson, SC")
  #geom_line(data=KMMT_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'))
KMMT_Likelihood_hazardous_days.plot
```
## Last 30-year Cat 4 or Cat 5 code section
```{r}
##Create the average occurrence of "cat 4" day
KMMT_Cat4_Days.last30 <- KMMT_RedorBlack_Flag_Days.last30%>%
  filter(Day_Cat4==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Heat Cat 4" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Heat Cat 4",Julian,date)
KMMT_Cat4_Days.last30$`Heat Cat 4` <- KMMT_Cat4_Days.last30$`Heat Cat 4`/30
KMMT_Cat4_Days.last30 <- KMMT_Cat4_Days.last30%>%
  rename(Likelihood_Cat4 = `Heat Cat 4`)

##Create the average occurrence of "cat 5" day
KMMT_Cat5_Days.last30 <- KMMT_RedorBlack_Flag_Days.last30%>%
  filter(Day_Cat5==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Heat Cat 5" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Heat Cat 5",Julian,date)
KMMT_Cat5_Days.last30$`Heat Cat 5` <- KMMT_Cat5_Days.last30$`Heat Cat 5`/30
KMMT_Cat5_Days.last30 <- KMMT_Cat5_Days.last30%>%
  rename(Likelihood_Cat5 = `Heat Cat 5`)

##Create the average occurrence of "hazardous heat" day
KMMT_Hazardous_Days.last30 <- KMMT_RedorBlack_Flag_Days.last30%>%
  filter(Day_Cat5==1 || Day_Cat4==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Hazardous Heat" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Hazardous Heat",Julian,date)
KMMT_Hazardous_Days.last30$`Hazardous Heat` <- KMMT_Hazardous_Days.last30$`Hazardous Heat`/30
KMMT_Hazardous_Days.last30 <- KMMT_Hazardous_Days.last30%>%
  rename(Likelihood_Hazardous_Heat = `Hazardous Heat`)

KMMT_Likelihood_Cat4xCat5.last30 <- full_join(KMMT_Cat4_Days.last30,KMMT_Cat5_Days.last30,by="Julian")%>%
  select(Likelihood_Cat4,Likelihood_Cat5,Julian,date.x)%>%
  rename(date=date.x)

KMMT_Likelihood_Cat4xCat5.last30 <- full_join(KMMT_Likelihood_Cat4xCat5.last30,KMMT_Hazardous_Days.last30,by="Julian")%>%
  select(Likelihood_Cat4,Likelihood_Cat5,Likelihood_Hazardous_Heat,Julian,date.x)%>%
  rename(date=date.x)

KMMT_Likelihood_Cat4xCat5.last30$Likelihood_Cat4 <- replace(KMMT_Likelihood_Cat4xCat5.last30$Likelihood_Cat4,is.na(KMMT_Likelihood_Cat4xCat5.last30$Likelihood_Cat4),0)
KMMT_Likelihood_Cat4xCat5.last30$Likelihood_Cat5 <- replace(KMMT_Likelihood_Cat4xCat5.last30$Likelihood_Cat5,is.na(KMMT_Likelihood_Cat4xCat5.last30$Likelihood_Cat5),0)
KMMT_Likelihood_Cat4xCat5.last30$Likelihood_Hazardous_Heat <- replace(KMMT_Likelihood_Cat4xCat5.last30$Likelihood_Hazardous_Heat,is.na(KMMT_Likelihood_Cat4xCat5.last30$Likelihood_Hazardous_Heat),0)
```

###Plot the likelihood that a day is Cat4 or Cat5
```{r}
##Heat Cat 4 and Heat Cat 5, last 30
KMMT_Likelihood_Cat4xCat5.last30.plot <- ggplot(KMMT_Likelihood_Cat4xCat5.last30)+
  geom_smooth(aes(x=date,y=Likelihood_Cat4,color='orange'),se=TRUE,fill='orange',alpha=0.4)+
    geom_smooth(aes(x=date,y=Likelihood_Cat5,color='red'),se=TRUE,fill='red',alpha=0.4)+
  scale_color_identity(name="Likelihood w/ 95%\nconfidence interval",breaks=c("orange","red"),labels=c("Heat Category 4 (Red flag)","Heat Category 5 (Black flag)"),guide="legend")+
  scale_y_continuous(limits=c(0,.8), labels=scales::percent,breaks=seq(0,1,.1) ,name="Chances a day experiences heat category 4 or 5",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="light grey", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='Black', size=16))+
  ggtitle("Daily probability of experiencing hazardous heat",subtitle="Ft Jackson, SC\n(Oct 1992 - Sept 2022)")
  #geom_line(data=KMMT_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'))
KMMT_Likelihood_Cat4xCat5.last30.plot
```
##25th-75th Quartiles 
```{r}
KMMT_Daily_Values.C <- KMMT_Daily_Values.C%>%
  mutate(day=day(date))%>%
  mutate(Julian=yday(date))
KMMT_Daily_Values.C.previous30 <- KMMT_Daily_Values.C%>%
  filter(between(date,"1982-01-01","2011-12-31"))
KMMT_Daily_Values.C.last10 <- KMMT_Daily_Values.C%>%
  filter(between(date,"2012-10-01","2022-09-30"))
```
###- Previous 30 years - Find Quantiles and create data frame
```{r}
##find the quartiles for each day
##WBGT
###dail avg wbgt
KMMT_Daily_Values.C.previous30.quantile.daily_avg_WBGT <-  do.call("rbind",
        tapply(KMMT_Daily_Values.C.previous30$daily_avg_WBGT,
               KMMT_Daily_Values.C.previous30$Julian,
               quantile))
KMMT_Daily_Values.C.previous30.quantile.daily_avg_WBGT <- KMMT_Daily_Values.C.previous30.quantile.daily_avg_WBGT%>%
  as.data.frame(KMMT_Daily_Values.C.previous30.quantile.daily_avg_WBGT)
##create a new data frame
KMMT_Daily_Values.C.previous30.quantile.daily_avg_WBGT$Julian <- c(1:nrow(KMMT_Daily_Values.C.previous30.quantile.daily_avg_WBGT))
KMMT_Daily_Values.C.previous30.quantile.daily_avg_WBGT$date <- 
  as.Date(KMMT_Daily_Values.C.previous30.quantile.daily_avg_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KMMT_Daily_Values.C.previous30.quantile.daily_avg_WBGT)

KMMT_Daily_Values.C.previous30.quantile.daily_high_WBGT <-  do.call("rbind",
        tapply(KMMT_Daily_Values.C.previous30$daily_high_WBGT,
               KMMT_Daily_Values.C.previous30$Julian,
               quantile))
KMMT_Daily_Values.C.previous30.quantile.daily_high_WBGT <- KMMT_Daily_Values.C.previous30.quantile.daily_high_WBGT%>%
  as.data.frame(KMMT_Daily_Values.C.previous30.quantile.daily_high_WBGT)
##create a new data frame
KMMT_Daily_Values.C.previous30.quantile.daily_high_WBGT$Julian <- c(1:nrow(KMMT_Daily_Values.C.previous30.quantile.daily_high_WBGT))
KMMT_Daily_Values.C.previous30.quantile.daily_high_WBGT$date <- 
  as.Date(KMMT_Daily_Values.C.previous30.quantile.daily_high_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KMMT_Daily_Values.C.previous30.quantile.daily_high_WBGT)

KMMT_Daily_Values.C.previous30.quantile.daily_low_WBGT <-  do.call("rbind",
        tapply(KMMT_Daily_Values.C.previous30$daily_low_WBGT,
               KMMT_Daily_Values.C.previous30$Julian,
               quantile))
KMMT_Daily_Values.C.previous30.quantile.daily_low_WBGT <- KMMT_Daily_Values.C.previous30.quantile.daily_low_WBGT%>%
  as.data.frame(KMMT_Daily_Values.C.previous30.quantile.daily_low_WBGT)
##create a new data frame
KMMT_Daily_Values.C.previous30.quantile.daily_low_WBGT$Julian <- c(1:nrow(KMMT_Daily_Values.C.previous30.quantile.daily_low_WBGT))
KMMT_Daily_Values.C.previous30.quantile.daily_low_WBGT$date <- 
  as.Date(KMMT_Daily_Values.C.previous30.quantile.daily_low_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KMMT_Daily_Values.C.previous30.quantile.daily_low_WBGT)

###temperature
KMMT_Daily_Values.C.previous30.quantile.daily_avg <-  do.call("rbind",
        tapply(KMMT_Daily_Values.C.previous30$daily_avg,
               KMMT_Daily_Values.C.previous30$Julian,
               quantile))
KMMT_Daily_Values.C.previous30.quantile.daily_avg <- KMMT_Daily_Values.C.previous30.quantile.daily_avg%>%
  as.data.frame(KMMT_Daily_Values.C.previous30.quantile.daily_avg)
##create a new data frame
KMMT_Daily_Values.C.previous30.quantile.daily_avg$Julian <- c(1:nrow(KMMT_Daily_Values.C.previous30.quantile.daily_avg))
KMMT_Daily_Values.C.previous30.quantile.daily_avg$date <- 
  as.Date(KMMT_Daily_Values.C.previous30.quantile.daily_avg$Julian, origin=as.Date("2022-12-31"))
#view(KMMT_Daily_Values.C.previous30.quantile.daily_avg)

KMMT_Daily_Values.C.previous30.quantile.daily_high <-  do.call("rbind",
        tapply(KMMT_Daily_Values.C.previous30$daily_high,
               KMMT_Daily_Values.C.previous30$Julian,
               quantile))
KMMT_Daily_Values.C.previous30.quantile.daily_high <- KMMT_Daily_Values.C.previous30.quantile.daily_high%>%
  as.data.frame(KMMT_Daily_Values.C.previous30.quantile.daily_high)
##create a new data frame
KMMT_Daily_Values.C.previous30.quantile.daily_high$Julian <- c(1:nrow(KMMT_Daily_Values.C.previous30.quantile.daily_high))
KMMT_Daily_Values.C.previous30.quantile.daily_high$date <- 
  as.Date(KMMT_Daily_Values.C.previous30.quantile.daily_high$Julian, origin=as.Date("2022-12-31"))
#view(KMMT_Daily_Values.C.previous30.quantile.daily_high)

KMMT_Daily_Values.C.previous30.quantile.daily_low <-  do.call("rbind",
        tapply(KMMT_Daily_Values.C.previous30$daily_low,
               KMMT_Daily_Values.C.previous30$Julian,
               quantile))
KMMT_Daily_Values.C.previous30.quantile.daily_low <- KMMT_Daily_Values.C.previous30.quantile.daily_low%>%
  as.data.frame(KMMT_Daily_Values.C.previous30.quantile.daily_low)
##create a new data frame
KMMT_Daily_Values.C.previous30.quantile.daily_low$Julian <- c(1:nrow(KMMT_Daily_Values.C.previous30.quantile.daily_low))
KMMT_Daily_Values.C.previous30.quantile.daily_low$date <- 
  as.Date(KMMT_Daily_Values.C.previous30.quantile.daily_low$Julian, origin=as.Date("2022-12-31"))
```
###- Last 10 years - Find Quantiles and create data frame
```{r}
##find the quartiles for each day
##WBGT
###daily avg wbgt
KMMT_Daily_Values.C.last10.quantile.daily_avg_WBGT <-  do.call("rbind",
        tapply(KMMT_Daily_Values.C.last10$daily_avg_WBGT,
               KMMT_Daily_Values.C.last10$Julian,
               quantile))
KMMT_Daily_Values.C.last10.quantile.daily_avg_WBGT <- KMMT_Daily_Values.C.last10.quantile.daily_avg_WBGT%>%
  as.data.frame(KMMT_Daily_Values.C.last10.quantile.daily_avg_WBGT)
##create a new data frame
KMMT_Daily_Values.C.last10.quantile.daily_avg_WBGT$Julian <- c(1:nrow(KMMT_Daily_Values.C.last10.quantile.daily_avg_WBGT))
KMMT_Daily_Values.C.last10.quantile.daily_avg_WBGT$date <- 
  as.Date(KMMT_Daily_Values.C.last10.quantile.daily_avg_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KMMT_Daily_Values.C.last10.quantile.daily_avg_WBGT)

KMMT_Daily_Values.C.last10.quantile.daily_high_WBGT <-  do.call("rbind",
        tapply(KMMT_Daily_Values.C.last10$daily_high_WBGT,
               KMMT_Daily_Values.C.last10$Julian,
               quantile))
KMMT_Daily_Values.C.last10.quantile.daily_high_WBGT <- KMMT_Daily_Values.C.last10.quantile.daily_high_WBGT%>%
  as.data.frame(KMMT_Daily_Values.C.last10.quantile.daily_high_WBGT)
##create a new data frame
KMMT_Daily_Values.C.last10.quantile.daily_high_WBGT$Julian <- c(1:nrow(KMMT_Daily_Values.C.last10.quantile.daily_high_WBGT))
KMMT_Daily_Values.C.last10.quantile.daily_high_WBGT$date <- 
  as.Date(KMMT_Daily_Values.C.last10.quantile.daily_high_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KMMT_Daily_Values.C.last10.quantile.daily_high_WBGT)

KMMT_Daily_Values.C.last10.quantile.daily_low_WBGT <-  do.call("rbind",
        tapply(KMMT_Daily_Values.C.last10$daily_low_WBGT,
               KMMT_Daily_Values.C.last10$Julian,
               quantile))
KMMT_Daily_Values.C.last10.quantile.daily_low_WBGT <- KMMT_Daily_Values.C.last10.quantile.daily_low_WBGT%>%
  as.data.frame(KMMT_Daily_Values.C.last10.quantile.daily_low_WBGT)
##create a new data frame
KMMT_Daily_Values.C.last10.quantile.daily_low_WBGT$Julian <- c(1:nrow(KMMT_Daily_Values.C.last10.quantile.daily_low_WBGT))
KMMT_Daily_Values.C.last10.quantile.daily_low_WBGT$date <- 
  as.Date(KMMT_Daily_Values.C.last10.quantile.daily_low_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KMMT_Daily_Values.C.last10.quantile.daily_low_WBGT)

###temperature
KMMT_Daily_Values.C.last10.quantile.daily_avg <-  do.call("rbind",
        tapply(KMMT_Daily_Values.C.last10$daily_avg,
               KMMT_Daily_Values.C.last10$Julian,
               quantile))
KMMT_Daily_Values.C.last10.quantile.daily_avg <- KMMT_Daily_Values.C.last10.quantile.daily_avg%>%
  as.data.frame(KMMT_Daily_Values.C.last10.quantile.daily_avg)
##create a new data frame
KMMT_Daily_Values.C.last10.quantile.daily_avg$Julian <- c(1:nrow(KMMT_Daily_Values.C.last10.quantile.daily_avg))
KMMT_Daily_Values.C.last10.quantile.daily_avg$date <- 
  as.Date(KMMT_Daily_Values.C.last10.quantile.daily_avg$Julian, origin=as.Date("2022-12-31"))
#view(KMMT_Daily_Values.C.last10.quantile.daily_avg)

KMMT_Daily_Values.C.last10.quantile.daily_high <-  do.call("rbind",
        tapply(KMMT_Daily_Values.C.last10$daily_high,
               KMMT_Daily_Values.C.last10$Julian,
               quantile))
KMMT_Daily_Values.C.last10.quantile.daily_high <- KMMT_Daily_Values.C.last10.quantile.daily_high%>%
  as.data.frame(KMMT_Daily_Values.C.last10.quantile.daily_high)
##create a new data frame
KMMT_Daily_Values.C.last10.quantile.daily_high$Julian <- c(1:nrow(KMMT_Daily_Values.C.last10.quantile.daily_high))
KMMT_Daily_Values.C.last10.quantile.daily_high$date <- 
  as.Date(KMMT_Daily_Values.C.last10.quantile.daily_high$Julian, origin=as.Date("2022-12-31"))
#view(KMMT_Daily_Values.C.last10.quantile.daily_high)

KMMT_Daily_Values.C.last10.quantile.daily_low <-  do.call("rbind",
        tapply(KMMT_Daily_Values.C.last10$daily_low,
               KMMT_Daily_Values.C.last10$Julian,
               quantile))
KMMT_Daily_Values.C.last10.quantile.daily_low <- KMMT_Daily_Values.C.last10.quantile.daily_low%>%
  as.data.frame(KMMT_Daily_Values.C.last10.quantile.daily_low)
##create a new data frame
KMMT_Daily_Values.C.last10.quantile.daily_low$Julian <- c(1:nrow(KMMT_Daily_Values.C.last10.quantile.daily_low))
KMMT_Daily_Values.C.last10.quantile.daily_low$date <- 
  as.Date(KMMT_Daily_Values.C.last10.quantile.daily_low$Julian, origin=as.Date("2022-12-31"))
#view(KMMT_Daily_Values.C.last10.quantile.daily_low)
```
###Plot the quartiles
```{r}
KMMT_Quantiles_high_WBGT.plot <- ggplot()+
  geom_ribbon(data=KMMT_Daily_Values.C.previous30.quantile.daily_high_WBGT, aes(x=date,ymin=`25%`,ymax=`75%`,fill='orange'),color='grey',alpha=0.4)+
    geom_ribbon(data=KMMT_Daily_Values.C.last10.quantile.daily_high_WBGT, aes(x=date,ymin=`25%`,ymax=`75%`,fill='red'),color='red',alpha=0.6)+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+theme(axis.text.x = element_text(angle = 30),plot.title=element_text(family='', face='bold', colour='Red', size=16))+
  scale_y_continuous(limits=c(8,35), name="WBGT")+
  geom_ribbon(data=KMMT_Daily_Values.C.previous30.quantile.daily_low_WBGT, aes(x=date,ymin=`25%`,ymax=`75%`,fill='blue'),color='grey',alpha=0.4)+
    geom_ribbon(data=KMMT_Daily_Values.C.last10.quantile.daily_low_WBGT, aes(x=date,ymin=`25%`,ymax=`75%`,fill='purple'),color='purple',alpha=0.6)+
  scale_fill_identity(name="25th-75th Quartiles",breaks=c("red","orange","purple","blue"),labels=c("Last 10-year\nmaximum","Previous 30-year\nmaximum","Last 10-year\nminimum","Previous 30-year\n minimum"),guide="legend")+
  ggtitle("Heat Season Maximum and Minimum WBGT",subtitle="Ft Jackson, SC")
KMMT_Quantiles_high_WBGT.plot
```


#KLSF (Ft Moore, GA)
#### INITIAL DATA LOAD AND SET UP
```{r}
#read in the 14th weather squadron observations. This dataset must be requested from their repository
KLSF <- read_csv("/Users/erikpatton/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/Raw Data/KLSF_Raw.csv")

KLSF <- KLSF %>%
  mutate('datetime' = make_datetime(year=Year, month = Month, day = Day, hour = Hour..UTC.))%>%
  pad()
print(sum(is.na(KLSF$Temperature..F.)))
print(sum(is.na(KLSF$Derived.Wet.Bulb.Globe.Temperature..F.)))

KLSF_last10_raw <- KLSF%>%
  filter(between(datetime,"2012-10-01 00:00:00 UTC","2022-09-30 23:00:00 UTC"))%>%
  filter(between(Hour..UTC.,8,19))
print(43208-(sum(is.na(KLSF_last10_raw$Derived.Wet.Bulb.Globe.Temperature..F.))))
```

###Data Table build
```{r}
KLSF_raw_60s <- KLSF%>%
  filter(between(datetime,"1960-01-01 00:00:00","1969-12-31 23:00:00"))
print((print((nrow(KLSF_raw_60s)))-print(sum(is.na(KLSF_raw_60s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KLSF_raw_60s)

KLSF_raw_60s.daylight <- KLSF_raw_60s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KLSF_raw_60s.daylight)))-print(sum(is.na(KLSF_raw_60s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KLSF_raw_60s.daylight)

KLSF_raw_70s <- KLSF%>%
  filter(between(datetime,"1970-01-01 00:00:00","1979-12-31 23:00:00"))
print((print((nrow(KLSF_raw_70s)))-print(sum(is.na(KLSF_raw_70s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KLSF_raw_70s)

KLSF_raw_70s.daylight <- KLSF_raw_70s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KLSF_raw_70s.daylight)))-print(sum(is.na(KLSF_raw_70s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KLSF_raw_70s.daylight)

KLSF_raw_80s <- KLSF%>%
  filter(between(datetime,"1980-01-01 00:00:00","1989-12-31 23:00:00"))
print((print((nrow(KLSF_raw_80s)))-print(sum(is.na(KLSF_raw_80s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KLSF_raw_80s)

KLSF_raw_80s.daylight <- KLSF_raw_80s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KLSF_raw_80s.daylight)))-print(sum(is.na(KLSF_raw_80s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KLSF_raw_80s.daylight)

KLSF_raw_90s <- KLSF%>%
  filter(between(datetime,"1990-01-01 00:00:00","1999-12-31 23:00:00"))
print((print((nrow(KLSF_raw_90s)))-print(sum(is.na(KLSF_raw_90s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KLSF_raw_90s)

KLSF_raw_90s.daylight <- KLSF_raw_90s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KLSF_raw_90s.daylight)))-print(sum(is.na(KLSF_raw_90s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KLSF_raw_90s.daylight)

KLSF_raw_00s <- KLSF%>%
  filter(between(datetime,"2000-01-01 00:00:00","2009-12-31 23:00:00"))
print((print((nrow(KLSF_raw_00s)))-print(sum(is.na(KLSF_raw_00s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KLSF_raw_00s)

KLSF_raw_00s.daylight <- KLSF_raw_00s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KLSF_raw_00s.daylight)))-print(sum(is.na(KLSF_raw_00s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KLSF_raw_00s.daylight)

KLSF_raw_10s <- KLSF%>%
  filter(between(datetime,"2010-01-01 00:00:00","2019-12-31 23:00:00"))
print((print((nrow(KLSF_raw_10s)))-print(sum(is.na(KLSF_raw_10s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KLSF_raw_10s)

KLSF_raw_10s.daylight <- KLSF_raw_10s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KLSF_raw_10s.daylight)))-print(sum(is.na(KLSF_raw_10s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KLSF_raw_10s.daylight)

KLSF_raw_20s <- KLSF%>%
  filter(between(datetime,"2020-01-01 00:00:00","2022-12-31 23:00:00"))
print((print((nrow(KLSF_raw_20s)))-print(sum(is.na(KLSF_raw_20s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KLSF_raw_20s)

KLSF_raw_20s.daylight <- KLSF_raw_20s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KLSF_raw_20s.daylight)))-print(sum(is.na(KLSF_raw_20s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KLSF_raw_20s.daylight)

KLSF_raw_last30 <- KLSF%>%
  filter(datetime>"1992-10-01 00:00:00")
print((print((nrow(KLSF_raw_last30)))-print(sum(is.na(KLSF_raw_last30$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KLSF_raw_last30)
```
###Wrangle
```{r}

KLSF <- KLSF%>%
  mutate('Temp.C' = ((Temperature..F.-32)*(5/9)))%>%
  mutate('Dewpoint.Temp.C' = (Dewpoint.Temperature..F.-32)*(5/9))%>%
  mutate('Heat.Index.C'=((Heat.Index..F.-32)*(5/9)))%>%
  mutate('WBGT.C'=((Derived.Wet.Bulb.Globe.Temperature..F.-32)*(5/9)))%>%
  rename('Temp.F' = Temperature..F.)%>%
  rename('Dewpoint.Temp.F'=Dewpoint.Temperature..F.)%>%
  rename('Heat.Index.F'=Heat.Index..F.)%>%
  rename("WBGT.F"=Derived.Wet.Bulb.Globe.Temperature..F.)%>%
  mutate("Hour"=hour(datetime))%>%
  mutate("Day"=day(datetime))%>%
  mutate("Month"=month(datetime))%>%
  mutate("Year"=year(datetime))%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(WBGT.C = zoo::na.approx(WBGT.C, maxgap=3))

##CREATE DECADE COLUMN
KLSF <- KLSF%>%
  mutate(Decade=floor(Year/10)*10)
print(sum(is.na(KLSF$Temp.F)))
print(sum(is.na(KLSF$WBGT.F)))
```
####Check to see if there are trends in the humidity data - note only 
used in FBGA data set, this is not required for the Military Medicine paper.
```{r}
KLSF_Hum.Daily <- KLSF%>%
  group_by(Date)%>%
  summarise(hum.daily=mean(RH....,na.rm=TRUE))%>%
  mutate("Month"=month(Date))%>%
  mutate("Year"=year(Date))%>%
  mutate(hum.daily = zoo::na.approx(hum.daily))

sum(is.na(KLSF_Hum.Daily$hum.daily))

KLSF_Daily_Hum_MannKendall <- kendallSeasonalTrendTest(hum.daily ~ Month+Year, data=KLSF_Hum.Daily)
print(KLSF_Daily_Hum_MannKendall)

KLSF_Daily_Hum_MannKendall.TS <- ts(KLSF_Hum.Daily$hum.daily, start = c(1960,01,01),frequency=365)
KLSF_Daily_Hum.decomp <- stl(KLSF_Daily_Hum_MannKendall.TS,s.window="periodic")
plot(KLSF_Daily_Hum.decomp)
KLSF_Daily_Hum_Trend <- Kendall::SeasonalMannKendall(KLSF_Daily_Hum_MannKendall.TS)
summary(KLSF_Daily_Hum_Trend)
```

## WRANGLE DATA & REPLACE WITH AVERAGE DATA VALUES
###1960-1970
```{r}
KLSF_60s_Hour_Avg <- KLSF%>%
  filter(Decade=="1960"|Decade=="1970")%>% 
  group_by(Hour, Day, Month)%>%  #remove", Decade"
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KLSF_60s <- KLSF%>%
  filter(Decade=="1960")%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KLSF_60s <- full_join(KLSF_60s,KLSF_60s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KLSF_90s)

KLSF_60s <- mutate(KLSF_60s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces NA values in 'x' column with the averaged from 'y' column

KLSF_60s <- mutate(KLSF_60s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KLSF_60s <- KLSF_60s%>%
  dplyr::select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>% #removed "Decade.x"
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
#%>%
#  rename(Decade=Decade.x)
```

####1970s
```{r}
##1970s 20% missing (after maxgap=3)
##Data Prep
KLSF_70s_Hour_Avg <- KLSF%>%
  filter(Decade=="1970"|Decade=="1960"|Decade=="1980")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KLSF_70s <- KLSF%>%
  filter(Decade=="1970")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

KLSF_70s <- full_join(KLSF_70s,KLSF_70s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KLSF_90s)

KLSF_70s <- mutate(KLSF_70s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KLSF_70s <- mutate(KLSF_70s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KLSF_70s <- KLSF_70s%>%
  dplyr::select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####1980
```{r}
##1980s - no missing values
KLSF_80s_Hour_Avg <- KLSF%>%
  filter(Decade=="1970"|Decade=="1980"|Decade=="1990")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KLSF_80s <- KLSF%>%
  filter(Decade=="1980")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KLSF_80s <- full_join(KLSF_80s,KLSF_80s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KLSF_90s)

KLSF_80s <- mutate(KLSF_80s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KLSF_80s <- mutate(KLSF_80s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KLSF_80s <- KLSF_80s%>%
  dplyr::select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####1990
```{r}
##1990s
KLSF_90s_Hour_Avg <- KLSF%>%
  filter(Decade=="1990"|Decade=="1980"|Decade=="2000")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KLSF_90s <- KLSF%>%
  filter(Decade=="1990")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

KLSF_90s <- full_join(KLSF_90s,KLSF_90s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KLSF_90s)

KLSF_90s <- mutate(KLSF_90s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KLSF_90s <- mutate(KLSF_90s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KLSF_90s <- KLSF_90s%>%
  dplyr::select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####2000s 
```{r}
##2000  ##Almost all the NAs are in 2002 and 2003, with 2001 having some and 2008 having a handful.
KLSF_00s_Hour_Avg <- KLSF%>%
  filter(Decade=="2000"|Decade=="1990"|Decade=="2010")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KLSF_00s <- KLSF%>%
  filter(Decade=="2000")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KLSF_00s <- full_join(KLSF_00s,KLSF_00s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KLSF_00s)

KLSF_00s <- mutate(KLSF_00s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KLSF_00s <- mutate(KLSF_00s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KLSF_00s <- KLSF_00s%>%
  dplyr::select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####2010
```{r}
##2010
KLSF_10s_Hour_Avg <- KLSF%>%
  filter(Decade=="2010"|Decade=="2000"|Decade=="2020")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KLSF_10s <- KLSF%>%
  filter(Decade=="2010")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

#view(KLSF_10s)
KLSF_10s <- full_join(KLSF_10s,KLSF_10s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KLSF_00s)

KLSF_10s <- mutate(KLSF_10s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KLSF_10s <- mutate(KLSF_10s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KLSF_10s <- KLSF_10s%>%
  dplyr::select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```

#### 2020 DECADE, EXTRAPOLATED OUT FOR 2020-2029
```{r}
KLSF_20s_Hour_Avg <- KLSF%>%
  filter(Decade=="2020"|Decade=="2010")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KLSF_20s <- KLSF%>%
  filter(Decade=="2020")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:24096))

KLSF_20s <- full_join(KLSF_20s,KLSF_20s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KLSF_20s)

KLSF_20s <- mutate(KLSF_20s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KLSF_20s <- mutate(KLSF_20s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KLSF_20s <- KLSF_20s%>%
  dplyr::select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
###Combine all chunks to new KLSF data set with the interpolated values. Only 22 NAs now.
```{r}
KLSF_Wrangled <- rbind(KLSF_60s,KLSF_70s,KLSF_80s,KLSF_90s,KLSF_00s,KLSF_10s,KLSF_20s) %>%
  #rename(WBGT.C = Ave_WBGT.C)%>%
  rename(date=Date)
sum(is.na(KLSF_Wrangled$Ave_Temp.C))  ##28 still missing
sum(is.na(KLSF_Wrangled$Ave_WBGT.C))  ##22 still missing

```
###Create a single df with daily values
```{r}
KLSF_Daily_Values.C <- KLSF_Wrangled%>%
  group_by(date)%>%
  dplyr::summarise(daily_high = max(Ave_Temp.C),daily_low = min(Ave_Temp.C),daily_avg = mean(Ave_Temp.C),daily_high_WBGT = max(Ave_WBGT.C),daily_low_WBGT = min(Ave_WBGT.C),daily_avg_WBGT = mean(Ave_WBGT.C))%>%
  as.data.frame()

```

### Look for missing dates in the complete data set
No missing dates!
``{r}
FullSeq <- seq.Date(from = min(KLSF_Wrangled$date), to = max(KLSF_Wrangled$date), by = 1)
Missing <- FullSeq[!FullSeq %in% KLSF_Wrangled$date]
Missing
``

###RED and BLACK Count
#### Start Red and Black Flag Day Analysis
```{r}
KLSF_REDorABOVEbyYear <- KLSF_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Red = n)


KLSF_BLACKorABOVEbyYear <- KLSF_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Black = n)

##Filtering by Red or Above will include Black days - Hazard Day
KLSF_AboveRed.last40 <- KLSF_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Above_Red = n)%>%
  mutate(Day_Above_Red =ifelse(Day_Above_Red == '',1,1))%>%
  mutate(Julian=yday(date))%>%
  filter(date>="1982-01-01"&date<="2021-12-31")%>%
  mutate(year=lubridate::year(date))

##Filters for Green and above - Caution Day
KLSF_AboveGreen.last40 <- KLSF_Wrangled%>%
  subset(between(Ave_WBGT.C,Heat_Categories$Category_Temp_Min.C[1],Heat_Categories$Category_Temp_Min.C[2]))%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Green_or_Yellow = n)%>%
  mutate(Day_Green_or_Yellow =ifelse(Day_Green_or_Yellow == '',1,1))%>%
  mutate(Julian=yday(date))%>%
  filter(date>="1982-01-01"&date<="2021-12-31")%>%
  mutate(year=lubridate::year(date))
```
##Create DF with all Flag Day counts only
these are days at are at RED OR ABOVE or are days following RED OR ABOVE that are also GREEN OR ABOVE
```{r}
##Create a df that has all days at/above red OR days at/above green following a red+ day
KLSF_Flag_Days.last40 <- full_join(KLSF_AboveRed.last40,KLSF_AboveGreen.last40,by="date")
KLSF_Flag_Days.last40 <- KLSF_Flag_Days.last40%>%
  dplyr::select(date,Day_Above_Red,Day_Green_or_Yellow,Julian.y,year.y)%>%
    mutate(Julian.y=yday(date))%>%
    mutate(year.y=lubridate::year(date))
KLSF_Flag_Days.last40 <- replace(KLSF_Flag_Days.last40,is.na(KLSF_Flag_Days.last40),0)

#view(KLSF_Flag_Days.last40)

KLSF_Flag_Days.last40 <-  KLSF_Flag_Days.last40%>%
  arrange(ymd(KLSF_Flag_Days.last40$date)) ##this line gets the df in date order for the following loop
KLSF_Flag_Days.last40$count <- c(1:nrow(KLSF_Flag_Days.last40))

KLSF_Flag_Days.last40$Lagged <- lag(KLSF_Flag_Days.last40$Day_Above_Red>0,1)

KLSF_Caution_Days.last40 <- KLSF_Flag_Days.last40%>%
  filter(Lagged==TRUE | Day_Above_Red==1)

#view(KLSF_Flag_Days.last40)
#view(KLSF_Caution_Days.last40)
```

##TIME SERIES
### Seasonal and trend plots
```{r}
####TIME SERIES ANALYSIS ON THE DATASET
##Daily HIGH
KLSF_Daily_High.TS <- ts(KLSF_Daily_Values.C$daily_high, start = c(1960,01,01),frequency=365)
KLSF_Daily_High.decomp <- stl(KLSF_Daily_High.TS,s.window="periodic")
plot(KLSF_Daily_High.decomp)
KLSF_Daily_High_Trend <- Kendall::SeasonalMannKendall(KLSF_Daily_High.TS)
summary(KLSF_Daily_High_Trend)
##This returns a very small p=value, also a small tau, so we can state the trend is STRONGLY statistically significant even though the magnitude of the temperature increase is small (the tau value?).

##Daily LOW
KLSF_Daily_Low.TS <- ts(KLSF_Daily_Values.C$daily_low, start = c(1960,01,01),frequency=365)
KLSF_Daily_Low.decomp <- stl(KLSF_Daily_Low.TS,s.window="periodic")
plot(KLSF_Daily_Low.decomp)
KLSF_Daily_Low_Trend <- Kendall::SeasonalMannKendall(KLSF_Daily_Low.TS)
summary(KLSF_Daily_Low_Trend)

##Daily AVG
KLSF_Daily_Avg.TS <- ts(KLSF_Daily_Values.C$daily_avg, start = c(1960,01,01),frequency=365)
KLSF_Daily_Avg.decomp <- stl(KLSF_Daily_Avg.TS,s.window="periodic")
plot(KLSF_Daily_Avg.decomp)
KLSF_Daily_Avg_Trend <- Kendall::SeasonalMannKendall(KLSF_Daily_Avg.TS)
summary(KLSF_Daily_Avg_Trend)

##WBGT
##WBGT.High
KLSF_Daily_High_WBGT.TS <- ts(KLSF_Daily_Values.C$daily_high_WBGT, start = c(1960,01,01),frequency=365)
KLSF_Daily_High_WBGT.decomp <- stl(KLSF_Daily_High_WBGT.TS,s.window="periodic")
plot(KLSF_Daily_High_WBGT.decomp)
KLSF_Daily_High_WBGT_Trend <- Kendall::SeasonalMannKendall(KLSF_Daily_High_WBGT.TS)
summary(KLSF_Daily_High_WBGT_Trend)

##WBGT Low
KLSF_Daily_low_WBGT.TS <- ts(KLSF_Daily_Values.C$daily_low_WBGT, start = c(1960,01,01),frequency=365)
KLSF_Daily_low_WBGT.decomp <- stl(KLSF_Daily_low_WBGT.TS,s.window="periodic")
plot(KLSF_Daily_low_WBGT.decomp)
KLSF_Daily_low_WBGT_Trend <- Kendall::SeasonalMannKendall(KLSF_Daily_low_WBGT.TS)
summary(KLSF_Daily_low_WBGT_Trend)

##WBGT Avg
KLSF_Daily_avg_WBGT.TS <- ts(KLSF_Daily_Values.C$daily_avg_WBGT, start = c(1960,01,01),frequency=365)
KLSF_Daily_avg_WBGT.decomp <- stl(KLSF_Daily_avg_WBGT.TS,s.window="periodic")
plot(KLSF_Daily_avg_WBGT.decomp)
KLSF_Daily_avg_WBGT_Trend <- Kendall::SeasonalMannKendall(KLSF_Daily_avg_WBGT.TS)
summary(KLSF_Daily_avg_WBGT_Trend)
```
### Mann Kendall Seasonal Values
```{r}
##MANUALLY RUN THE MANN KENDALL TEST - WBGT TEMPERATURE - MAX DAILY HIGH
#Set up month and year columns
KLSF_Daily_Values.C$month <- month(ymd(KLSF_Daily_Values.C$date))
KLSF_Daily_Values.C$year <- year(ymd(KLSF_Daily_Values.C$date))

###WBGT
##HIGH WBGT
KLSF_Daily_High_WBGT.C_MannKendall <- kendallSeasonalTrendTest(daily_high_WBGT ~ month+year, data=KLSF_Daily_Values.C)
#return just tau, slope, and intercept
KLSF_Daily_High_WBGT.C_MannKendall$estimate
##LOW WBGT
KLSF_Daily_Low_WBGT.C_MannKendall <- kendallSeasonalTrendTest(daily_low_WBGT ~ month+year, data=KLSF_Daily_Values.C)
KLSF_Daily_Low_WBGT.C_MannKendall$estimate
##AVG WBGT
KLSF_Daily_Avg_WBGT.C_MannKendall <- kendallSeasonalTrendTest(daily_avg_WBGT ~ month+year, data=KLSF_Daily_Values.C)
KLSF_Daily_Avg_WBGT.C_MannKendall$estimate
##Build data frame with the needed information
KLSF_Daily_TS_estimates.C <- KLSF_Daily_High_WBGT.C_MannKendall$estimate%>%
  as.data.frame()
KLSF_Daily_TS_estimates.C$Low_WBGT <- as.data.frame(KLSF_Daily_Low_WBGT.C_MannKendall$estimate)
KLSF_Daily_TS_estimates.C$Avg_WBGT <- as.data.frame(KLSF_Daily_Avg_WBGT.C_MannKendall$estimate)
KLSF_Daily_TS_estimates.C$High_WBGT <- as.data.frame(KLSF_Daily_High_WBGT.C_MannKendall$estimate)
KLSF_Daily_TS_estimates_WBGT.C <- KLSF_Daily_TS_estimates.C[2:4]
##Remove uneeded variables
#remove(KLSF_Daily_High_WBGT.C_MannKendall,KLSF_Daily_Low_WBGT.C_MannKendall,KLSF_Daily_Avg_WBGT.C_MannKendall)

###Temperature
##HIGH 
KLSF_Daily_High.C_MannKendall <- kendallSeasonalTrendTest(daily_high ~ month+year, data=KLSF_Daily_Values.C)
#return just tau, slope, and intercept
KLSF_Daily_High.C_MannKendall$estimate
##LOW WBGT
KLSF_Daily_Low.C_MannKendall <- kendallSeasonalTrendTest(daily_low ~ month+year, data=KLSF_Daily_Values.C)
KLSF_Daily_Low.C_MannKendall$estimate
##AVG WBGT
KLSF_Daily_Avg.C_MannKendall <- kendallSeasonalTrendTest(daily_avg ~ month+year, data=KLSF_Daily_Values.C)
KLSF_Daily_Avg.C_MannKendall$estimate
##Build data frame with the needed information
KLSF_Daily_TS_estimates.C <- KLSF_Daily_High.C_MannKendall$estimate%>%
  as.data.frame()
KLSF_Daily_TS_estimates.C$Low <- as.data.frame(KLSF_Daily_Low.C_MannKendall$estimate)
KLSF_Daily_TS_estimates.C$Avg <- as.data.frame(KLSF_Daily_Avg.C_MannKendall$estimate)
KLSF_Daily_TS_estimates.C$High <- as.data.frame(KLSF_Daily_High.C_MannKendall$estimate)
KLSF_Daily_TS_estimates.C <- KLSF_Daily_TS_estimates.C[2:4]
##Remove unneeded variables
#remove(KLSF_Daily_High.C_MannKendall,KLSF_Daily_Low.C_MannKendall,KLSF_Daily_Avg.C_MannKendall)
```
###Bottom quarter trend test
```{r}
##Temp
KLSF_quartile_test_lower25 <- KLSF_Wrangled%>%
  group_by(date)%>%
  reframe(qs=quantile(Ave_Temp.C,c(0.25,0.75)),prob=c(0.25,0.75))%>%
  filter(prob==0.25)

KLSF_quartile_test_lower25$month <- month(ymd(KLSF_quartile_test_lower25$date))
KLSF_quartile_test_lower25$year <- year(ymd(KLSF_quartile_test_lower25$date))

##Bottom Quarter Temp
KLSF_quartile_test_lower25_MannKendall <- kendallSeasonalTrendTest(qs ~ month+year, data=KLSF_quartile_test_lower25)
KLSF_quartile_test_lower25_MannKendall$estimate

##returns a small but not 0 positive trend in temperature

##WBGT
KLSF_quartile_test_lower25_WBGT <- KLSF_Wrangled%>%
  group_by(date)%>%
  reframe(qs=quantile(Ave_WBGT.C,c(0.25,0.75)),prob=c(0.25,0.75))%>%
  filter(prob==0.25)

KLSF_quartile_test_lower25_WBGT$month <- month(ymd(KLSF_quartile_test_lower25_WBGT$date))
KLSF_quartile_test_lower25_WBGT$year <- year(ymd(KLSF_quartile_test_lower25_WBGT$date))

##Bottom Quarter Temp
KLSF_quartile_test_lower25_WBGT <- kendallSeasonalTrendTest(qs ~ month+year, data=KLSF_quartile_test_lower25_WBGT)
KLSF_quartile_test_lower25_WBGT$estimate
##returns a small but not 0 positive trend in temperature
```
###Upper quarter trend test
```{r}
##Temp
KLSF_quartile_test_upper25 <- KLSF_Wrangled%>%
  group_by(date)%>%
  reframe(qs=quantile(Ave_Temp.C,c(0.25,0.75)),prob=c(0.25,0.75))%>%
  filter(prob==0.75)

KLSF_quartile_test_upper25$month <- month(ymd(KLSF_quartile_test_upper25$date))
KLSF_quartile_test_upper25$year <- year(ymd(KLSF_quartile_test_upper25$date))

##Bottom Quarter Temp
KLSF_quartile_test_upper25_MannKendall <- kendallSeasonalTrendTest(qs ~ month+year, data=KLSF_quartile_test_upper25)
KLSF_quartile_test_upper25_MannKendall$estimate

##returns a small but not 0 positive trend in temperature

##WBGT
KLSF_quartile_test_upper25_WBGT <- KLSF_Wrangled%>%
  group_by(date)%>%
  reframe(qs=quantile(Ave_WBGT.C,c(0.25,0.75)),prob=c(0.25,0.75))%>%
  filter(prob==0.75)

KLSF_quartile_test_upper25_WBGT$month <- month(ymd(KLSF_quartile_test_upper25_WBGT$date))
KLSF_quartile_test_upper25_WBGT$year <- year(ymd(KLSF_quartile_test_upper25_WBGT$date))

##Bottom Quarter Temp
KLSF_quartile_test_upper25_WBGT <- kendallSeasonalTrendTest(qs ~ month+year, data=KLSF_quartile_test_upper25_WBGT)
KLSF_quartile_test_upper25_WBGT$estimate
##returns a small but not 0 positive trend in temperature
```
##time series full year data tables
```{r}
##Build data frame with the trend information - WBGT
KLSF_Daily_TS_estimates.C <- KLSF_Daily_High.C_MannKendall$estimate%>%
  as.data.frame()
KLSF_Daily_TS_estimates.C$Low <- as.data.frame(KLSF_Daily_Low.C_MannKendall$estimate)
KLSF_Daily_TS_estimates.C$Avg <- as.data.frame(KLSF_Daily_Avg.C_MannKendall$estimate)
KLSF_Daily_TS_estimates.C$Upper <- as.data.frame(KLSF_quartile_test_upper25_MannKendall$estimate)
KLSF_Daily_TS_estimates.C$Lower <- as.data.frame(KLSF_quartile_test_lower25_MannKendall$estimate)
KLSF_Daily_TS_estimates.C$High <- as.data.frame(KLSF_Daily_High.C_MannKendall$estimate)
KLSF_Daily_TS_estimates.C[2:6]

##Build data frame with the trend information - WBGT
KLSF_Daily_TS_estimates_WBGT.C <- KLSF_Daily_High_WBGT.C_MannKendall$estimate%>%
  as.data.frame()
KLSF_Daily_TS_estimates_WBGT.C$Low_WBGT <- as.data.frame(KLSF_Daily_Low_WBGT.C_MannKendall$estimate)
KLSF_Daily_TS_estimates_WBGT.C$Avg_WBGT <- as.data.frame(KLSF_Daily_Avg_WBGT.C_MannKendall$estimate)
KLSF_Daily_TS_estimates_WBGT.C$Upper_WBGT <- as.data.frame(KLSF_quartile_test_upper25_WBGT$estimate)
KLSF_Daily_TS_estimates_WBGT.C$Lower_WBGT <- as.data.frame(KLSF_quartile_test_lower25_WBGT$estimate)
KLSF_Daily_TS_estimates_WBGT.C$High_WBGT <- as.data.frame(KLSF_Daily_High_WBGT.C_MannKendall$estimate)
KLSF_Daily_TS_estimates_WBGT.C[2:6]
```
##warm month season trends
```{r}
KLSF_warm_season.C <- c(mean(KLSF_Daily_High.C_MannKendall$seasonal.estimates[5:9,2]),mean(KLSF_quartile_test_upper25_MannKendall$seasonal.estimates[5:9,2]),mean(KLSF_Daily_Avg.C_MannKendall$seasonal.estimates[5:9,2]),mean(KLSF_quartile_test_lower25_MannKendall$seasonal.estimates[5:9,2]),mean(KLSF_Daily_Low.C_MannKendall$seasonal.estimates[5:9,2]))
print(KLSF_warm_season.C)


KLSF_warm_season_WBGT.C <- c(mean(KLSF_Daily_High_WBGT.C_MannKendall$seasonal.estimates[5:9,2]),mean(KLSF_quartile_test_upper25_WBGT$seasonal.estimates[5:9,2]),mean(KLSF_Daily_Avg_WBGT.C_MannKendall$seasonal.estimates[5:9,2]),mean(KLSF_quartile_test_lower25_WBGT$seasonal.estimates[5:9,2]),mean(KLSF_Daily_Low_WBGT.C_MannKendall$seasonal.estimates[5:9,2]))
print(KLSF_warm_season_WBGT.C)
```
##Finalize trend tables for export
```{r}
KLSF_full_year_trends.C <- as.data.frame(c(KLSF_Daily_High.C_MannKendall$estimate[2],KLSF_quartile_test_upper25_MannKendall$estimate[2],KLSF_Daily_Avg.C_MannKendall$estimate[2],KLSF_quartile_test_lower25_MannKendall$estimate[2],KLSF_Daily_Low.C_MannKendall$estimate[2]))
KLSF_full_year_trends.C$installation <- "FMGA"
KLSF_full_year_trends.C$Metric <- c("Maximum","Upper 75%","Mean","Lower 25%","Minimum")
colnames(KLSF_full_year_trends.C)[1] <- 'Temp.C'
print(KLSF_full_year_trends.C)

KLSF_full_year_trends_WBGT.C <- as.data.frame(c(KLSF_Daily_High_WBGT.C_MannKendall$estimate[2],KLSF_quartile_test_upper25_WBGT$estimate[2],KLSF_Daily_Avg_WBGT.C_MannKendall$estimate[2],KLSF_quartile_test_lower25_WBGT$estimate[2],KLSF_Daily_Low_WBGT.C_MannKendall$estimate[2]))
KLSF_full_year_trends.C$installation <- "FMGA"
KLSF_full_year_trends.C$Metric <- c("Maximum","Upper 75%","Mean","Lower 25%","Minimum")
colnames(KLSF_full_year_trends_WBGT.C)[1] <- 'WBGT.C'
print(KLSF_full_year_trends_WBGT.C)

```
##warm month season trends
```{r}
KLSF_warm_season_trends.C <- as.data.frame(c(mean(KLSF_Daily_High.C_MannKendall$seasonal.estimates[5:9,2]),mean(KLSF_quartile_test_upper25_MannKendall$seasonal.estimates[5:9,2]),mean(KLSF_Daily_Avg.C_MannKendall$seasonal.estimates[5:9,2]),mean(KLSF_quartile_test_lower25_MannKendall$seasonal.estimates[5:9,2]),mean(KLSF_Daily_Low.C_MannKendall$seasonal.estimates[5:9,2])))
KLSF_warm_season_trends.C$installation <- "FMGA"
KLSF_warm_season_trends.C$Metric <- c("Maximum","Upper 75%","Mean","Lower 25%","Minimum")
colnames(KLSF_warm_season_trends.C)[1] <- 'Temp.C_Warm.season'
print(KLSF_warm_season_trends.C)


KLSF_warm_season_trends_WBGT.C <- as.data.frame(c(mean(KLSF_Daily_High_WBGT.C_MannKendall$seasonal.estimates[5:9,2]),mean(KLSF_quartile_test_upper25_WBGT$seasonal.estimates[5:9,2]),mean(KLSF_Daily_Avg_WBGT.C_MannKendall$seasonal.estimates[5:9,2]),mean(KLSF_quartile_test_lower25_WBGT$seasonal.estimates[5:9,2]),mean(KLSF_Daily_Low_WBGT.C_MannKendall$seasonal.estimates[5:9,2])))
KLSF_warm_season_trends_WBGT.C$installation <- "FMGA"
KLSF_warm_season_trends_WBGT.C$Metric <- c("Maximum","Upper 75%","Mean","Lower 25%","Minimum")
colnames(KLSF_warm_season_trends_WBGT.C)[1] <- 'WBGT.C_Warm.season'
print(KLSF_warm_season_trends_WBGT.C)
```
##full installation trend table
```{r}
FMGA_Historical_Trends <- cbind(KLSF_full_year_trends.C,KLSF_full_year_trends_WBGT.C,KLSF_warm_season_trends.C,KLSF_warm_season_trends_WBGT.C)

FMGA_Historical_Trends <- FMGA_Historical_Trends%>%
  select(c(1:5,8))%>%
  select(installation,Metric,Temp.C,WBGT.C,Temp.C_Warm.season,WBGT.C_Warm.season)

#view(FMGA_Historical_Trends)
```

#KCSG (adjactent weather station to Ft Moore, GA)
#### INITIAL DATA LOAD AND SET UP
```{r eval=FALSE}
KCSG <- read_csv("/Users/erikpatton/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/Raw Data/KCSG_Raw.csv")

KCSG <- KCSG %>%
  mutate('datetime' = make_datetime(year=Year, month = Month, day = Day, hour = Hour..UTC.))%>%
  pad()

#%>%
#  mutate('date' = make_date(year=Year, month = Month, day = Day))

KCSG <- KCSG%>%
  mutate('Temp.C' = ((Temperature..F.-32)*(5/9)))%>%
  mutate('Dewpoint.Temp.C' = (Dewpoint.Temperature..F.-32)*(5/9))%>%
  mutate('Heat.Index.C'=((Heat.Index..F.-32)*(5/9)))%>%
  mutate('WBGT.C'=((Derived.Wet.Bulb.Globe.Temperature..F.-32)*(5/9)))%>%
  rename('Temp.F' = Temperature..F.)%>%
  rename('Dewpoint.Temp.F'=Dewpoint.Temperature..F.)%>%
  rename('Heat.Index.F'=Heat.Index..F.)%>%
  rename("WBGT.F"=Derived.Wet.Bulb.Globe.Temperature..F.)%>%
  mutate("Hour"=hour(datetime))%>%
  mutate("Day"=day(datetime))%>%
  mutate("Month"=month(datetime))%>%
  mutate("Year"=year(datetime))%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(WBGT.C = zoo::na.approx(WBGT.C, maxgap=3))

##CREATE DECADE COLUMN
KCSG <- KCSG%>%
  mutate(Decade=floor(Year/10)*10)
```
## WRANGLE DATA & REPLACE WITH AVERAGE DATA VALUES
###1960-1970
```{r eval=FALSE}
KCSG_60s_Hour_Avg <- KCSG%>%
  filter(Decade=="1960")%>% 
  group_by(Hour, Day, Month, Decade)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KCSG_60s <- KCSG%>%
  filter(Decade=="1960")%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KCSG_60s <- full_join(KCSG_60s,KCSG_60s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KCSG_90s)

KCSG_60s <- mutate(KCSG_60s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces NA values in 'x' column with the averaged from 'y' column

KCSG_60s <- mutate(KCSG_60s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KCSG_60s <- KCSG_60s%>%
  dplyr::select(Hour, Day, Month, Year, Decade.x, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)%>%
  rename(Decade=Decade.x)
```

####1970s
```{r eval=FALSE}
##1970s 20% missing (after maxgap=3)
##Data Prep
KCSG_70s_Hour_Avg <- KCSG%>%
  filter(Decade=="1970")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KCSG_70s <- KCSG%>%
  filter(Decade=="1970")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

KCSG_70s <- full_join(KCSG_70s,KCSG_70s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KCSG_90s)

KCSG_70s <- mutate(KCSG_70s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KCSG_70s <- mutate(KCSG_70s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KCSG_70s <- KCSG_70s%>%
  dplyr::select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####1980
```{r eval=FALSE}
##1980s - no missing values
KCSG_80s_Hour_Avg <- KCSG%>%
  filter(Decade=="1970")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KCSG_80s <- KCSG%>%
  filter(Decade=="1980")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KCSG_80s <- full_join(KCSG_80s,KCSG_80s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KCSG_90s)

KCSG_80s <- mutate(KCSG_80s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KCSG_80s <- mutate(KCSG_80s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KCSG_80s <- KCSG_80s%>%
  dplyr::select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####1990
```{r eval=FALSE}
##1990s
KCSG_90s_Hour_Avg <- KCSG%>%
  filter(Decade=="1990")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KCSG_90s <- KCSG%>%
  filter(Decade=="1990")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

KCSG_90s <- full_join(KCSG_90s,KCSG_90s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KCSG_90s)

KCSG_90s <- mutate(KCSG_90s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KCSG_90s <- mutate(KCSG_90s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KCSG_90s <- KCSG_90s%>%
  dplyr::select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####2000s 
```{r eval=FALSE}
##2000  ##Almost all the NAs are in 2002 and 2003, with 2001 having some and 2008 having a handful.
KCSG_00s_Hour_Avg <- KCSG%>%
  filter(Decade=="2000")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KCSG_00s <- KCSG%>%
  filter(Decade=="2000")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KCSG_00s <- full_join(KCSG_00s,KCSG_00s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KCSG_00s)

KCSG_00s <- mutate(KCSG_00s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KCSG_00s <- mutate(KCSG_00s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KCSG_00s <- KCSG_00s%>%
  dplyr::select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####2010
```{r eval=FALSE}
##2010
KCSG_10s_Hour_Avg <- KCSG%>%
  filter(Decade=="2010")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KCSG_10s <- KCSG%>%
  filter(Decade=="2010")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

#view(KCSG_10s)
KCSG_10s <- full_join(KCSG_10s,KCSG_10s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KCSG_00s)

KCSG_10s <- mutate(KCSG_10s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KCSG_10s <- mutate(KCSG_10s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KCSG_10s <- KCSG_10s%>%
  dplyr::select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
#### 2020 DECADE, EXTRAPOLATED OUT FOR 2020-2029
```{r eval=FALSE}
KCSG_20s_Hour_Avg <- KCSG%>%
  filter(Decade=="2020")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KCSG_20s <- KCSG%>%
  filter(Decade=="2020")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:24096))

KCSG_20s <- full_join(KCSG_20s,KCSG_20s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KCSG_20s)

KCSG_20s <- mutate(KCSG_20s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KCSG_20s <- mutate(KCSG_20s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KCSG_20s <- KCSG_20s%>%
  dplyr::select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
###Combine all chunks to new KCSG data set with the interpolated values. Only 22 NAs now.
```{r eval=FALSE}
KCSG_Wrangled <- rbind(KCSG_60s,KCSG_70s,KCSG_80s,KCSG_90s,KCSG_00s,KCSG_10s,KCSG_20s) %>%
  #rename(WBGT.C = Ave_WBGT.C)%>%
  rename(date=Date)
#sum(is.na(KCSG_Wrangled$Ave_Temp.C))  ##28 still missing
#sum(is.na(KCSG_Wrangled$Ave_WBGT.C))  ##22 still missing

##clear the environment of variables no longer needed
remove(KCSG_00s,KCSG_10s,KCSG_20s,KCSG_60s,KCSG_70s,KCSG_80s,KCSG_90s)
remove(KCSG_00s_Hour_Avg,KCSG_10s_Hour_Avg,KCSG_20s_Hour_Avg,KCSG_60s_Hour_Avg,KCSG_70s_Hour_Avg,KCSG_80s_Hour_Avg,KCSG_90s_Hour_Avg)
```

```{r eval=FALSE}
KCSG_Daily_Values.C <- KCSG_Wrangled%>%
  group_by(date)%>%
  dplyr::summarise(daily_high = max(Ave_Temp.C),daily_low = min(Ave_Temp.C),daily_avg = mean(Ave_Temp.C),daily_high_WBGT = max(Ave_WBGT.C),daily_low_WBGT = min(Ave_WBGT.C),daily_avg_WBGT = mean(Ave_WBGT.C))%>%
  filter(date>='1973-01-01')%>%
  as.data.frame()

##very small number of NAs removed here
KCSG_Daily_Values.C <- replace(KCSG_Daily_Values.C,is.na(KCSG_Daily_Values.C),18.8)

```
###RED and BLACK Count
#### Start Red and Black Flag Day Analysis
```{r eval=FALSE}
KCSG_REDorABOVEbyYear <- KCSG_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Red = n)


KCSG_BLACKorABOVEbyYear <- KCSG_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Black = n)

##Filtering by Red or Above will include Black days - Hazard Day
#KCSG_AboveRed.last20 <- KCSG_Wrangled%>%
#  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
#  group_by(date)%>%
#  count(date)%>%
#  rename(Day_Above_Red = n)%>%
#  mutate(Day_Above_Red =ifelse(Day_Above_Red == '',1,1))%>%
#  mutate(Julian=yday(date))%>%
#  filter(date>="2002-01-01"&date<="2021-12-31")%>%
#  mutate(year=lubridate::year(date))

##Filters for Green and above - Caution Day
#KCSG_AboveGreen.last20 <- KCSG_Wrangled%>%
#  subset(between(Ave_WBGT.C,Heat_Categories$Category_Temp_Min.C[1],Heat_Categories$C#ategory_Temp_Min.C[2]))%>%
#  group_by(date)%>%
#  count(date)%>%
#  rename(Day_Green_or_Yellow = n)%>%
#  mutate(Day_Green_or_Yellow =ifelse(Day_Green_or_Yellow == '',1,1))%>%
#  mutate(Julian=yday(date))%>%
#  filter(date>="2002-01-01"&date<="2021-12-31")%>%
#  mutate(year=lubridate::year(date))
```


##TIME SERIES
### Seasonal and trend plots
```{r eval=FALSE}
####TIME SERIES ANALYSIS ON THE DATASET
##Daily HIGH
KCSG_Daily_High.TS <- ts(KCSG_Daily_Values.C$daily_high, start = c(1973,01,01),frequency=365)
KCSG_Daily_High.decomp <- stl(KCSG_Daily_High.TS,s.window="periodic")
plot(KCSG_Daily_High.decomp)
KCSG_Daily_High_Trend <- Kendall::SeasonalMannKendall(KCSG_Daily_High.TS)
summary(KCSG_Daily_High_Trend)
##This returns a very small p=value, also a small tau, so we can state the trend is STRONGLY statistically significant even though the magnitude of the temperature increase is small (the tau value?).

##Daily LOW
KCSG_Daily_Low.TS <- ts(KCSG_Daily_Values.C$daily_low, start = c(1973,01,01),frequency=365)
KCSG_Daily_Low.decomp <- stl(KCSG_Daily_Low.TS,s.window="periodic")
plot(KCSG_Daily_Low.decomp)
KCSG_Daily_Low_Trend <- Kendall::SeasonalMannKendall(KCSG_Daily_Low.TS)
summary(KCSG_Daily_Low_Trend)

##Daily AVG
KCSG_Daily_Avg.TS <- ts(KCSG_Daily_Values.C$daily_avg, start = c(1973,01,01),frequency=365)
KCSG_Daily_Avg.decomp <- stl(KCSG_Daily_Avg.TS,s.window="periodic")
plot(KCSG_Daily_Avg.decomp)
KCSG_Daily_Avg_Trend <- Kendall::SeasonalMannKendall(KCSG_Daily_Avg.TS)
summary(KCSG_Daily_Avg_Trend)

##WBGT
##WBGT.High
KCSG_Daily_High_WBGT.TS <- ts(KCSG_Daily_Values.C$daily_high_WBGT, start = c(1973,01,01),frequency=365)
KCSG_Daily_High_WBGT.decomp <- stl(KCSG_Daily_High_WBGT.TS,s.window="periodic")
plot(KCSG_Daily_High_WBGT.decomp)
KCSG_Daily_High_WBGT_Trend <- Kendall::SeasonalMannKendall(KCSG_Daily_High_WBGT.TS)
summary(KCSG_Daily_High_WBGT_Trend)

##WBGT Low
KCSG_Daily_low_WBGT.TS <- ts(KCSG_Daily_Values.C$daily_low_WBGT, start = c(1973,01,01),frequency=365)
KCSG_Daily_low_WBGT.decomp <- stl(KCSG_Daily_low_WBGT.TS,s.window="periodic")
plot(KCSG_Daily_low_WBGT.decomp)
KCSG_Daily_low_WBGT_Trend <- Kendall::SeasonalMannKendall(KCSG_Daily_low_WBGT.TS)
summary(KCSG_Daily_low_WBGT_Trend)

##WBGT Avg
KCSG_Daily_avg_WBGT.TS <- ts(KCSG_Daily_Values.C$daily_avg_WBGT, start = c(1973,01,01),frequency=365)
KCSG_Daily_avg_WBGT.decomp <- stl(KCSG_Daily_avg_WBGT.TS,s.window="periodic")
plot(KCSG_Daily_avg_WBGT.decomp)
KCSG_Daily_avg_WBGT_Trend <- Kendall::SeasonalMannKendall(KCSG_Daily_avg_WBGT.TS)
summary(KCSG_Daily_avg_WBGT_Trend)
```
### Mann Kendall Seasonal Values
```{r eval=FALSE}
##MANUALLY RUN THE MANN KENDALL TEST - WBGT TEMPERATURE - MAX DAILY HIGH
#Set up month and year columns
KCSG_Daily_Values.C$month <- month(ymd(KCSG_Daily_Values.C$date))
KCSG_Daily_Values.C$year <- year(ymd(KCSG_Daily_Values.C$date))

###WBGT
##HIGH WBGT
KCSG_Daily_High_WBGT.C_MannKendall <- kendallSeasonalTrendTest(daily_high_WBGT ~ month+year, data=KCSG_Daily_Values.C)
#return just tau, slope, and intercept
KCSG_Daily_High_WBGT.C_MannKendall$estimate
##LOW WBGT
KCSG_Daily_Low_WBGT.C_MannKendall <- kendallSeasonalTrendTest(daily_low_WBGT ~ month+year, data=KCSG_Daily_Values.C)
KCSG_Daily_Low_WBGT.C_MannKendall$estimate
##AVG WBGT
KCSG_Daily_Avg_WBGT.C_MannKendall <- kendallSeasonalTrendTest(daily_avg_WBGT ~ month+year, data=KCSG_Daily_Values.C)
KCSG_Daily_Avg_WBGT.C_MannKendall$estimate
##Build data frame with the needed information
KCSG_Daily_TS_estimates.C <- KCSG_Daily_High_WBGT.C_MannKendall$estimate%>%
  as.data.frame()
KCSG_Daily_TS_estimates.C$Low_WBGT <- as.data.frame(KCSG_Daily_Low_WBGT.C_MannKendall$estimate)
KCSG_Daily_TS_estimates.C$Avg_WBGT <- as.data.frame(KCSG_Daily_Avg_WBGT.C_MannKendall$estimate)
KCSG_Daily_TS_estimates.C$High_WBGT <- as.data.frame(KCSG_Daily_High_WBGT.C_MannKendall$estimate)
KCSG_Daily_TS_estimates_WBGT.C <- KCSG_Daily_TS_estimates.C[2:4]
##Remove uneeded variables
remove(KCSG_Daily_High_WBGT.C_MannKendall,KCSG_Daily_Low_WBGT.C_MannKendall,KCSG_Daily_Avg_WBGT.C_MannKendall)

###Temperature
##HIGH 
KCSG_Daily_High.C_MannKendall <- kendallSeasonalTrendTest(daily_high ~ month+year, data=KCSG_Daily_Values.C)
#return just tau, slope, and intercept
KCSG_Daily_High.C_MannKendall$estimate
##LOW WBGT
KCSG_Daily_Low.C_MannKendall <- kendallSeasonalTrendTest(daily_low ~ month+year, data=KCSG_Daily_Values.C)
KCSG_Daily_Low.C_MannKendall$estimate
##AVG WBGT
KCSG_Daily_Avg.C_MannKendall <- kendallSeasonalTrendTest(daily_avg ~ month+year, data=KCSG_Daily_Values.C)
KCSG_Daily_Avg.C_MannKendall$estimate
##Build data frame with the needed information
KCSG_Daily_TS_estimates.C <- KCSG_Daily_High.C_MannKendall$estimate%>%
  as.data.frame()
KCSG_Daily_TS_estimates.C$Low <- as.data.frame(KCSG_Daily_Low.C_MannKendall$estimate)
KCSG_Daily_TS_estimates.C$Avg <- as.data.frame(KCSG_Daily_Avg.C_MannKendall$estimate)
KCSG_Daily_TS_estimates.C$High <- as.data.frame(KCSG_Daily_High.C_MannKendall$estimate)
KCSG_Daily_TS_estimates.C <- KCSG_Daily_TS_estimates.C[2:4]
##Remove unneeded variables
remove(KCSG_Daily_High.C_MannKendall,KCSG_Daily_Low.C_MannKendall,KCSG_Daily_Avg.C_MannKendall)
```


# KLSF vs KCSG COMPARISON
```{r eval=FALSE}
##CONDUCT A CORRELATION TEST BETWEEN THE TWO SITES TO SEE IF THEY MOVE IN THE SAME DIRECTION (AT LEAST STATISTICALLY)
##IF THEY DO, WE CAN SAY THAT THE TWO SITES ARE SHOWING THE SAME GENERAL CLIMATE TRENDS ACROSS YEARS
##NULL HYPOTHESIS IS THEY DO NOT MOVE IN THE SAME DIRECTION, AND THEREFORE WE HAVE LOWER CONFIDENCE IN USING THEM
KCSG_WBGT_REDorABOVE_Hourly_byYear <- KCSG%>%
  filter(Date>='1973-01-01')%>%
  subset(WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Red = n)
KLSF_WBGT_REDorABOVE_Hourly_byYear <- KLSF%>%
  filter(Date>='1973-01-01')%>%
  subset(WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Red = n)
##CHECK FOR NORMAL DISTRIBUTION TO THE POINTS
##Shapiro test, if P>0.05, then it IS normally distributed. In this case, hourly summary of KCSG is not normally distributed.
shapiro.test(KCSG_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red)
shapiro.test(KLSF_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red)
##Not normally distributed, so should not sure Pearson's correlation tests

#ggplot(KCSG_WBGT_REDorABOVE_Hourly_byYear)+
#  geom_histogram(aes(y=KCSG_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red))
##Visualize normal distribution

#FBGA_Combined%>%dplyr::select(.,KLSF_RedorAbove_Hourly,KCSG_RedorAbove_Hourly)%>%map(~fBasics::dagoTest(.))
##Omnibus test <0.05 would indicate that the set is NOT normally distributed.

##USE KENDALL RANK CORRELATION TEST - can be used if not from normal distribution
cor.test(KCSG_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red,KLSF_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red,method="kendall")
##Shows positive correlation between the 2 sites (tau=0.279) and p-value < 0.05 (0.004) ; implies correlation

##USE SPEARMAN RANK CORRELATION COEFFICIENT - can be used if data is not norma;
cor.test(KCSG_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red,KLSF_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red,method="spearman")
##Shows rho = 0.416 and p-value <0.05 ; implies correlation
##All tests return positive correlation and low p-values, including tests robust to non-normal 

ggplot()+
  geom_line(data=KLSF_Daily_Values.C,aes(x=date,y=daily_avg))+
  geom_line(data=KCSG_Daily_Values.C,aes(x=date,y=daily_avg),color='blue')
```
### Compare anomaly from KLSF to anomaly from NOAA
https://www.ncei.noaa.gov/access/monitoring/climate-at-a-glance/divisional/time-series/0904/tavg/ann/5/1960-2022?base_prd=true&begbaseyear=1990&endbaseyear=2020
```{r}
##SET UP DATA
##manually load in the regional anomaly trend
WCGA_Avg_Temp_Anom <-  read_csv('/Users/erikpatton/Library/Mobile Documents/com~apple~CloudDocs/R Files/JAN 2023 Data Projects/FBGA/FBGA-/WCGA_Avg_Temp_Anom.csv')
WCGA_Avg_Temp_Anom <- WCGA_Avg_Temp_Anom[2:64,]

KLSF_Anom_Avg.C <- KLSF_Wrangled%>%
  filter(between(Year,1960,2021))%>% 
  group_by(Hour, Day, Month)%>%
  dplyr::summarise(Ave_Temp.C = mean(Ave_Temp.C,na.rm=TRUE)) ##create a column with the average WBGT values for that day, ignoring missing data in calculation

KLSF_Anomaly.C <- KLSF_Wrangled%>%
  filter(between(Year,1960,2021))%>%
  group_by(Hour, Day, Month, Year, Decade)%>%
  dplyr::summarise(Ave_Temp.C = mean(Ave_Temp.C))%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:543504))

KLSF_Anomaly.C <- full_join(KLSF_Anomaly.C,KLSF_Anom_Avg.C,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KLSF_00s)

KLSF_Anomaly.C <- mutate(KLSF_Anomaly.C, Ave_Temp.C.x = case_when(
  is.na(Ave_Temp.C.x) ~Ave_Temp.C.y,
  TRUE~Ave_Temp.C.x
))   ##Replaces NA values in 'x' column with the averaged from 'y' column

##START ACTUAL PLOT
KLSF_Anomaly.C <- KLSF_Anomaly.C%>%
  dplyr::select(Hour,Day,Month,Year,Ave_Temp.C.x,Date,count)%>%
  rename(Ave_Temp.C=Ave_Temp.C.x)
KLSF_Ref.C <- KLSF_Anomaly.C%>%
  filter(between(Year,1990,2020))%>%
  summarise(mean=mean(Ave_Temp.C))
  
KLSF_Anomaly.C <- KLSF_Anomaly.C%>%
  group_by(Year)%>%
  dplyr::summarise(Avg_Year_Temp = mean(Ave_Temp.C))%>%
  mutate(Anomaly = Avg_Year_Temp-KLSF_Ref.C$mean)
  #mutate(Date = make_date(year=Year,month=Month))

#KLSF_Anomaly <- KLSF_Anomaly %>% arrange(ymd(KLSF_Anomaly$Date)) ##puts DF in date order

#KLSF_Anomaly$count <- c(1:nrow(KLSF_Anomaly)) ##assigns date ordered number to DF

KLSF_Anomaly.C$Regional_Anom <- WCGA_Avg_Temp_Anom$...3[1:62]
KLSF_Anomaly.C$Regional_Anom <- as.numeric(KLSF_Anomaly.C$Regional_Anom)

KLSF_Monthly_Anomaly.C.plot <- ggplot(KLSF_Anomaly.C)+
  geom_line(aes(x=Year,y=Anomaly,color="Blue"))+
  geom_line(aes(x=Year,y=Regional_Anom,color="red"))+
  xlab("Year")+
  scale_x_continuous(breaks = c(1960,1970,1980,1990,2000,2010,2020),expand=c(0,0))+
  ylab("Average Anomaly\nDegrees C")+
  ggtitle("Yearly average temperature anomaly\nFt Moore, GA vs West-Central Georgia Average")+
  theme_classic()+
 # theme(axis.text.y=element_text(margin=margin(r=0)))+
  geom_hline(yintercept=0,linetype="dashed")+
   scale_color_identity(name="Location",breaks=c("Blue","red"),
                        labels=c("FMGA","Regional Average\n(NOAA)"),guide="legend")+
  labs(caption = "Reference period = Yearly average temp, 1990-2020")
  #annotate("text",x=2009,y=-2,label=("Reference period = Average Temperature, 1990-2020"),size=2)+

KLSF_Monthly_Anomaly.C.plot

FBGAxRegional.cor <-  cor.test(KLSF_Anomaly.C$Regional_Anom ,KLSF_Anomaly.C$Anomaly,method = "pearson")
FBGAxRegional.cor$estimate
FBGAxRegional.cor$p.value
FBGAxRegional.cor$conf.int
```
# KLSF ANALYSIS AND PLOT CREATION
```{r}
####FULL_JOIN ALL DECADES
Avg_Hourly_Decade <- full_join(KLSF_60s,KLSF_70s,by="count")%>%
  rename(Hour_1960=Hour.x,Day.1960=Day.x,Month.1960=Month.x,Year.1960=Year.x,Decade.1960=Decade.x,Ave_WBGT.C.1960=Ave_WBGT.C.x,Date.1960=Date.x)%>%
  rename(Hour_1970=Hour.y,Day.1970=Day.y,Month.1970=Month.y,Year.1970=Year.y,Decade.1970=Decade.y,Ave_WBGT.C.1970=Ave_WBGT.C.y,Date.1970=Date.y)
Avg_Hourly_Decade <-   full_join(Avg_Hourly_Decade,KLSF_80s,by="count")
Avg_Hourly_Decade <- full_join(Avg_Hourly_Decade,KLSF_90s,by="count")%>%
  rename(Hour_1980=Hour.x,Day.1980=Day.x,Month.1980=Month.x,Year.1980=Year.x,Decade.1980=Decade.x,Ave_WBGT.C.1980=Ave_WBGT.C.x,Date.1980=Date.x)%>%
  rename(Hour_1990=Hour.y,Day.1990=Day.y,Month.1990=Month.y,Year.1990=Year.y,Decade.1990=Decade.y,Ave_WBGT.C.1990=Ave_WBGT.C.y,Date.1990=Date.y)
Avg_Hourly_Decade <- full_join(Avg_Hourly_Decade,KLSF_00s,by="count")
Avg_Hourly_Decade <- full_join(Avg_Hourly_Decade,KLSF_10s,by="count")%>%
  rename(Hour_2000=Hour.x,Day.2000=Day.x,Month.2000=Month.x,Year.2000=Year.x,Decade.2000=Decade.x,Ave_WBGT.C.2000=Ave_WBGT.C.x,Date.2000=Date.x)%>%
  rename(Hour_2010=Hour.y,Day.2010=Day.y,Month.2010=Month.y,Year.2010=Year.y,Decade.2010=Decade.y,Ave_WBGT.C.2010=Ave_WBGT.C.y,Date.2010=Date.y)
Avg_Hourly_Decade <- full_join(Avg_Hourly_Decade,KLSF_20s,by="count")%>%
  rename(Hour_2020=Hour,Day.2020=Day,Month.2020=Month,Year.2020=Year,Decade.2020=Decade,Ave_WBGT.C.2020=Ave_WBGT.C,Date.2020=Date)

#Avg_Hourly_Decade$Month.1960 <- paste0("1960s",Avg_Hourly_Decade$Month.1960)
Avg_Hourly_Decade$Month.1970 <- paste0("1970s",Avg_Hourly_Decade$Month.1970)
Avg_Hourly_Decade$Month.1980 <- paste0("1980s",Avg_Hourly_Decade$Month.1980)
Avg_Hourly_Decade$Month.1990 <- paste0("1990s",Avg_Hourly_Decade$Month.1990)
Avg_Hourly_Decade$Month.2000 <- paste0("2000s",Avg_Hourly_Decade$Month.2000)
Avg_Hourly_Decade$Month.2010 <- paste0("2010s",Avg_Hourly_Decade$Month.2010)
Avg_Hourly_Decade$Month.2020 <- paste0("2020s",Avg_Hourly_Decade$Month.2020)

KLSF_Avg_Summer_Hourly_Decade <- Avg_Hourly_Decade%>%
  filter(between(Month.1960,5,9))     ##Filters for just the summer months
#view(Avg_Summer_Hourly_Decade)
KLSF_Thirteen_to_Nineteen <- Avg_Hourly_Decade%>%
  filter(Year.2010>2012)%>%
  dplyr::select(Month.1960,Hour_2010:Ave_Temp.C.y.y.y)%>%
  rename(Hour=Hour_2010,Day=Day.2010,Month=Month.2010,Year=Year.2010,Decade=Decade.2010,Ave_WBGT.C=Ave_WBGT.C.2010,Date=Date.2010,Ave_Temp.C=Ave_Temp.C.y.y.y)
KLSF_Zero_to_Two <- Avg_Hourly_Decade%>%
  filter(Year.2020>0)%>%
  dplyr::select(Month.1960,Hour_2020:Ave_Temp.C)%>%
  rename(Hour=Hour_2020,Day=Day.2020,Month=Month.2020,Year=Year.2020,Decade=Decade.2020,Ave_WBGT.C=Ave_WBGT.C.2020,Date=Date.2020,Ave_Temp.C=Ave_Temp.C)
KLSF_Thirteen_to_Two <- rbind(KLSF_Thirteen_to_Nineteen,KLSF_Zero_to_Two)
KLSF_Thirteen_to_Two <- KLSF_Thirteen_to_Two%>%
  dplyr::select(Hour,Month.1960,Hour,Ave_WBGT.C:Ave_Temp.C)%>%
  rename(Ave_WBGT_last10 = Ave_WBGT.C ,Ave_Temp_last10=Ave_Temp.C,Month_last10= Month.1960 )%>%
  filter(between(Month_last10,5,9))
#%>%add_row(Hour = 21:23, Month_last10=9,Ave_WBGT_last10=NA,Date=NA,Ave_Temp_last10=NA)

KLSF_Avg_Summer_Hourly_Decade <- cbind(KLSF_Avg_Summer_Hourly_Decade,KLSF_Thirteen_to_Two)
```

####DENSITY PLOT AND HISTORGRAMS BY HOURLY AVERAGE BY DECADE
###DENSITY PLOT
````{r}
KLSF_Summer_DensityPlot_decades <- ggplot(KLSF_Avg_Summer_Hourly_Decade)+
  geom_density(aes(x=Ave_WBGT.C.1960,color='white'),linetype=2,linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.1970,color='purple'),linetype=2,linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.1980,color='red'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.1990,color='blue'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.2000, color='yellow'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.2010,color='green'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.2020,color='black'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT_last10 ,color='pink'),alpha=0.7,linewidth=1)+
  scale_color_discrete(name="Hourly WBGT \n Grouped by decade",
                         breaks=c("white","purple","red", "blue", "yellow","green","black","pink"),
                         labels=c("1960s","1970s","1980s", "1990s", "2000s","2010s","2020s","Last 10 Years"))+
  xlab("WBGT")+
  scale_x_continuous(limits = c(20,38),expand=c(0,0))+
  ggtitle("May - September Hourly WBGT Density",subtitle="Ft Benning, GA")+
#  xlim(20,38)+
  theme_light()
KLSF_Summer_DensityPlot_decades
## 2000s is included here without modification to missing data points because this is a density plot, so sample size matters less, assuming the proporation of WBGT readings are accurate.
```
####ANOMALY PLOT SUMMER MONTHS - DAYTIME
```{r}
##SET UP DATA
KLSF_Anom_Avg <- KLSF_Wrangled%>%
  filter(between(Year,1960,2022), between(Month,5,9))%>% 
  group_by(Hour, Day, Month)%>%
  dplyr::summarise(Ave_WBGT.C = mean(Ave_WBGT.C,na.rm=TRUE)) ##create a column with the average WBGT values for that day, ignoring missing data in calculation

KLSF_Anomaly <- KLSF_Wrangled%>%
  filter(between(Year,1960,2022),between(Hour,8,20), between(Month,5,9))%>%
  group_by(Hour, Day, Month, Year, Decade)%>%
  dplyr::summarise(Ave_WBGT.C = mean(Ave_WBGT.C))%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:125307))

KLSF_Anomaly <- full_join(KLSF_Anomaly,KLSF_Anom_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KLSF_00s)

KLSF_Anomaly <- mutate(KLSF_Anomaly, Ave_WBGT.C.x = case_when(
  is.na(Ave_WBGT.C.x) ~Ave_WBGT.C.y,
  TRUE~Ave_WBGT.C.x
))   ##Replaces NA values in 'x' column with the averaged from 'y' column

##START ACTUAL PLOT
KLSF_Anomaly <- KLSF_Anomaly%>%
  dplyr::select(Hour,Day,Month,Year,Ave_WBGT.C.x,Date,count)%>%
  rename(Ave_WBGT.C=Ave_WBGT.C.x)
KLSF_Ref <- KLSF_Anomaly%>%
  filter(between(Year,1990,2020))%>%
  summarise(mean=mean(Ave_WBGT.C))
  
KLSF_Anomaly <- KLSF_Anomaly%>%
  group_by(Year)%>%
  dplyr::summarise(Avg_Year_WBGT = mean(Ave_WBGT.C))%>%
  mutate(Anomaly = Avg_Year_WBGT-KLSF_Ref$mean)
  #mutate(Date = make_date(year=Year,month=Month))

#KLSF_Anomaly <- KLSF_Anomaly %>% arrange(ymd(KLSF_Anomaly$Date)) ##puts DF in date order

#KLSF_Anomaly$count <- c(1:nrow(KLSF_Anomaly)) ##assigns date ordered number to DF

KLSF_Monthly_Anomaly.plot <- ggplot(KLSF_Anomaly,aes(x=Year,y=Anomaly))+
  geom_line()+
  geom_smooth()+
  xlab("Year")+
  ylab("Monthly Average WBGT Anomaly\nDegrees C")+
  ggtitle("Yearly average WBGT anomaly, 1960-2022",subtitle="Ft Benning, GA")+
  labs(caption = "Reference period = Average WBGT, 1990-2020")+
  theme_classic()+
  geom_hline(yintercept=0,linetype="dashed")
  #annotate("text",x=2010,y=-1.5,label=("Reference period = Average WBGT, 1990-2020"),size=2)+
```
####HISTOGRAMS FULL year data used
```{r}
Summer_Histogram_1970s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.1970),color='red', fill="red",alpha=0.4,position="identity",bins=50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("1970-1979")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()  

Summer_Histogram_1980s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.1980),color='red', fill="red",alpha=0.4,position="identity",bins=50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("1980-1989")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()  

Summer_Histogram_1990s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.1990),color='red', fill="red",alpha=0.4,position="identity",bins = 50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("1990-1999")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()  

Summer_Histogram_2000s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.2000),color='red', fill="red",alpha=0.4,position="identity",bins=50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("2000-2009")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()  

Summer_Histogram_2010s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.2010),color='red', fill="red",alpha=0.4,position="identity",bins=50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("2010-2019")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  #xlim(27,36)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()
```
####BAR CHARTS (Full Year data used)
```{r}
##CREATE VECTORS OF HOURS ABOVE A CATEGORY
Green <- c(sum(KLSF_70s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,sum(KLSF_80s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,sum(KLSF_90s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,sum(KLSF_00s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,sum(KLSF_10s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,sum(KLSF_20s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/2.75)
Yellow <- c(sum(KLSF_70s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/10,sum(KLSF_80s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/10,sum(KLSF_90s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/10,sum(KLSF_00s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/10,sum(KLSF_10s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/10,sum(KLSF_20s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/2.75)
Red <- c(sum(KLSF_70s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/10,sum(KLSF_80s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/10,sum(KLSF_90s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/10,sum(KLSF_00s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/10,sum(KLSF_10s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/10,sum(KLSF_20s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/2.75)
Black <- c(sum(KLSF_70s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,sum(KLSF_80s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,sum(KLSF_90s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,sum(KLSF_00s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,sum(KLSF_10s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,sum(KLSF_20s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/2.75)
#/10 in order to get single year avg

KLSF_Hours_Flags <- as.data.frame(cbind(c("1970s","1980s","1990s","2000s","2010s","2020s"),Green,Yellow,Red,Black),stringsAsFactors = FALSE)

KLSF_Hours_Flags <-  KLSF_Hours_Flags%>%
  pivot_longer(!V1,names_to = "Heat_Category",values_to = "Hours")%>%
  as.data.frame()
KLSF_Hours_Flags$Hours <- as.integer(KLSF_Hours_Flags$Hours)
#view(Hours_Flags)

##CREATE GRAPH
KLSF_Threshold_Barplot <- ggplot(data=KLSF_Hours_Flags, aes(x=Heat_Category,y=Hours,fill=V1)) +
  geom_bar(stat="identity",position=position_dodge())+
  geom_text(aes(label=Hours), position=position_dodge(width=0.9),hjust=-0.25,size=3)+
  scale_x_discrete(limits=c("Green","Yellow","Red","Black"))+
  ylab("Average Hours per Year")+
  xlab("Heat Category")+
  theme_classic()+
  scale_fill_brewer(name="Decade",palette = "Spectral",direction=-1,guide = guide_legend(reverse = TRUE))+
  coord_flip()+
  #annotate("text",x=4,y=150,label=c("Hours above Black threshold, average per Year \n2020s : 96 (projected)\n2010s : 124 \n2000s : 74\n1990s : 50 \n1980s : 121 "),hjust=0,size=2)+
ggtitle("Average Hours at Each Heat Catergory per Year",subtitle="Ft Benning, GA")
KLSF_Threshold_Barplot
```
#### COMBINED PLOTS
```{r}
#bottom_row <- plot_grid(Summer_Histogram_2000s,Summer_Histogram_2010s,nrow=1)
#Middle_row <- plot_grid(Summer_Histogram_1980s,Summer_Histogram_1990s,nrow=1)
#top_row <- plot_grid(Summer_DensityPlot_decades, KLSF_Monthly_Anomaly.plot,nrow=2)
Summer_Months_KLSF.plot <- plot_grid(KLSF_Summer_DensityPlot_decades, KLSF_Monthly_Anomaly.plot, KLSF_Threshold_Barplot,nrow=3)
Summer_Months_KLSF.plot <- ggdraw(add_sub(Summer_Months_KLSF.plot,"Data gaps =< 3 hours interpolated. Longer gaps replaced with decade average for missing Hour, Day \n 2020s density plot for 2020-2022 only; Bar chart extrapolates full 2020s decade",size=8))
Summer_Months_KLSF.plot
```

#BCT Analysis
##Load Data
FBGA BCT Wrangle
```{r}
#write.csv(FBGA_BCT_23,file="~/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/ATRRS Data/FT Benning ATRRS Pull/FBGA_BCT_23.csv")
FBGA_BCT_23 <- read.csv("~/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/ATRRS Data/FT Benning ATRRS Pull/FBGA_BCT_23.csv",stringsAsFactors = FALSE)

FBGA_BCT_23 <- FBGA_BCT_23%>%
  mutate_all(.funs = funs(str_trim))%>%
  dplyr::select(Class,X.Report.Date,X.Start.Date,X.End.Date,X.Capacity)

FBGA_BCT_23 <-  na.omit(FBGA_BCT_23)

FBGA_BCT_23$Class <-  as.numeric(FBGA_BCT_23$Class)
FBGA_BCT_23$X.Capacity <- as.numeric(FBGA_BCT_23$X.Capacity)
FBGA_BCT_23$X.Report.Date <-  dmy(FBGA_BCT_23$X.Report.Date)
FBGA_BCT_23$X.Start.Date <-  dmy(FBGA_BCT_23$X.Start.Date)
FBGA_BCT_23$X.End.Date <- dmy(FBGA_BCT_23$X.End.Date)

FBGA_BCT_23 <- FBGA_BCT_23%>%
  rename(Report_Date = X.Report.Date, Start_Date=X.Start.Date,End_Date=X.End.Date)%>%
  mutate(Julian_Start=yday(Start_Date),Julian_End=yday(End_Date))

FBGA_BCT_23 <- FBGA_BCT_23[apply(FBGA_BCT_23!=0,1,all),] #remove any row with a numerical '0' to get rid of classes with 0 capacity

#write.csv(FBGA_BCT_23,file="~/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/ATRRS Data/FT Benning ATRRS Pull/FBGA_BCT_23_Wrangled.csv")
```
##KLSF BCT
##Assign clean history to KLSF data 
```{r}
FBGA_BCT_23$Start_Month <- month(ymd(FBGA_BCT_23$Start_Date))
FBGA_BCT_23$End_Month <- month(ymd(FBGA_BCT_23$End_Date))
FBGA_BCT_23$Start_Day <- day(ymd(FBGA_BCT_23$Start_Date))
FBGA_BCT_23$End_Day <- day(ymd(FBGA_BCT_23$End_Date))

FBGA_BCT_23$Days_Long <- FBGA_BCT_23$End_Date-FBGA_BCT_23$Start_Date #counts number of days in each class
```

```{r}
##code below to get average hourly temp and WBGT for last fourty years for full data (not 2022 or 2023 due to lack of data)
###Averaging out the values left alone, per conversation with Luke
KLSF_Hourly_Avg.last30 <- KLSF_Wrangled%>%
  filter(date>="1992-10-01"&date<="2022-09-30")%>%
  mutate(Julian=yday(date))%>%
  group_by(Hour,Julian)

KLSF_Hourly_Avg.last10 <- KLSF_Wrangled%>%
  filter(date>="2012-10-01"&date<="2022-09-30")%>%
  filter(between(Month,5,9))%>%
  mutate(Julian=yday(date))%>%
  group_by(Hour,Julian)

#Days just in Red category Last 30
KLSF_Cat4 <- KLSF_Hourly_Avg.last30%>%
  subset(between(Ave_WBGT.C,Heat_Categories$Category_Temp_Min.C[3],Heat_Categories$Category_Temp_Min.C[4]))%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Cat4 = n)%>%
  mutate(Day_Cat4 =ifelse(Day_Cat4 == '',1,1))%>%
  mutate(Julian=yday(date))

#Days just in Black category Last 30
KLSF_Cat5 <- KLSF_Hourly_Avg.last30%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4] )%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Cat5 = n)%>%
  mutate(Day_Cat5 =ifelse(Day_Cat5 == '',1,1))%>%
  mutate(Julian=yday(date))

#Unfiltered Heat Cat 4 and Heat Cat 5 in 2022
HC4_2022 <- KLSF_Wrangled%>%
  filter(between(date,"2022-01-01","2022-12-31"))%>%
  filter(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3]&Ave_WBGT.C<Heat_Categories$Category_Temp_Min.C[4])%>%
  count(Ave_WBGT.C)               
print(sum(HC4_2022$n))  #returns 187 hours at HC4, which is very close to the 180 hours that LTC DeGroot states in a 2023 paper

HC5_2022 <- KLSF_Wrangled%>%
  filter(between(date,"2022-01-01","2022-12-31"))%>%
  filter(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])%>%
  count(Ave_WBGT.C)               
print(sum(HC5_2022$n))  #returns 133 hours at HC5, which is very close to the 128 hours that LTC DeGroot states in a 2023 paper
```
##Create a data frame of Red and Black Flag Days - last 30
```{r}
##Create a df that has all days at RED and all days at BLACK
KLSF_RedorBlack_Flag_Days.last30 <- full_join(KLSF_Cat4,KLSF_Cat5,by="date")
KLSF_RedorBlack_Flag_Days.last30 <- KLSF_RedorBlack_Flag_Days.last30%>%
  select(date,Day_Cat4,Day_Cat5)%>%
    mutate(Julian=yday(date))%>%
    mutate(year.y=lubridate::year(date))
KLSF_RedorBlack_Flag_Days.last30 <- replace(KLSF_RedorBlack_Flag_Days.last30,is.na(KLSF_RedorBlack_Flag_Days.last30),0)
```
##Filter 
```{r}
##Get ATRRS Data and re-wrangled
FBGA_BCT_23_Wrangled <-  read.csv("/Users/erikpatton/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/ATRRS Data/FT Benning ATRRS Pull/FBGA_BCT_23_Wrangled.csv")

FBGA_BCT_23_Wrangled$Start_Date <-  ymd(FBGA_BCT_23_Wrangled$Start_Date)
FBGA_BCT_23_Wrangled$End_Date <- ymd(FBGA_BCT_23_Wrangled$End_Date)
```
##Filter with ATRRS Data for course dates
```{r}
FBGA_BCT_23_Wrangled$Count <- c(1:nrow(FBGA_BCT_23_Wrangled)) #add a counter column for following loop

##Loop to count average hours in each flag condition by cohort.
for (i in FBGA_BCT_23_Wrangled$Count){
  Flags <- KLSF_Hourly_Avg.last30%>%
  filter(between(Julian,FBGA_BCT_23_Wrangled$Julian_Start[i],FBGA_BCT_23_Wrangled$Julian_End[i]))
  ##divide the below by 40 because there are 40 years in this filtered data set (changed to 10)
  FBGA_BCT_23_Wrangled$NoFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C<Heat_Categories$Category_Temp_Min.C[1])) /(30))
FBGA_BCT_23_Wrangled$GreenFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1]&Flags$Ave_WBGT.C<Heat_Categories$Category_Temp_Min.C[2])) /(30))
FBGA_BCT_23_Wrangled$YellowFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2]&Flags$Ave_WBGT.C<Heat_Categories$Category_Temp_Min.C[3])) /(30))
FBGA_BCT_23_Wrangled$RedFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3]&Flags$Ave_WBGT.C<Heat_Categories$Category_Temp_Min.C[4])) /(30))
FBGA_BCT_23_Wrangled$BlackFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])) /(30))
FBGA_BCT_23_Wrangled$Dangerous_Heat_Hours_Avg[i] <- as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])) /(30))
}

##Loop to count up total hours from NoFlag:BlackFlag as a check that all hours are accounted for
for (i in FBGA_BCT_23_Wrangled$Count){
  FBGA_BCT_23_Wrangled$TotalCourseHours[i] <- FBGA_BCT_23_Wrangled$NoFlags_Avg[i]+FBGA_BCT_23_Wrangled$GreenFlags_Avg[i]+FBGA_BCT_23_Wrangled$YellowFlags_Avg[i]+FBGA_BCT_23_Wrangled$RedFlags_Avg[i]+FBGA_BCT_23_Wrangled$BlackFlags_Avg[i]
}
#view(FBGA_BCT_23_Wrangled)
```

##Create a list of Green and Yellow days that follow Red or Black Days
```{r}
##Create a df that has all days at/above red OR days at/above green following a red+ day
KLSF_Flag_Days.last40 <- full_join(KLSF_AboveRed.last40,KLSF_AboveGreen.last40,by="date")
KLSF_Flag_Days.last40 <- KLSF_Flag_Days.last40%>%
  dplyr::select(date,Day_Above_Red,Day_Green_or_Yellow,Julian.y,year.y)%>%
    mutate(Julian.y=yday(date))%>%
    mutate(year.y=lubridate::year(date))
KLSF_Flag_Days.last40 <- replace(KLSF_Flag_Days.last40,is.na(KLSF_Flag_Days.last40),0)

KLSF_Flag_Days.last40 <-  KLSF_Flag_Days.last40%>%arrange(ymd(KLSF_Flag_Days.last40$date)) ##this line gets the df in date order for the following loop
#KLSF_Flag_Days.last40$count <- c(1:nrow(KLSF_Flag_Days.last40))

KLSF_Flag_Days.last40$Lagged <- lag(KLSF_Flag_Days.last40$Day_Above_Red>0,1) ##create a column identifying days after RED or BLACK

##Create the average occurrence of "caution" day
KLSF_Caution_Days.last40 <- KLSF_Flag_Days.last40%>%
  filter(Lagged==TRUE | Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Caution Days" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  dplyr::select("Caution Days",Julian,date)
KLSF_Caution_Days.last40$Avg_Count_Caution_Days <- KLSF_Caution_Days.last40$`Caution Days`/40

##Create the average occurrence of heat cat 4-5 "high heat" days
KLSF_High_Heat_Days.last40 <- KLSF_Flag_Days.last40%>%
filter(Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename(High_Heat_Days = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  dplyr::select(High_Heat_Days,Julian,date)
  
KLSF_High_Heat_Days.last40$High_Heat_Days <- KLSF_High_Heat_Days.last40$High_Heat_Days/40

#"Caution Days" include both high heat days and caitiopn days
KLSF_Caution_Days.last40 <- full_join(KLSF_Caution_Days.last40,KLSF_High_Heat_Days.last40,by="Julian")%>%
  dplyr::select(High_Heat_Days,Avg_Count_Caution_Days,Julian,date.x)%>%
  rename(date=date.x)

KLSF_Caution_Days.last40 <- replace(KLSF_Caution_Days.last40,is.na(KLSF_Caution_Days.last40),0)

#view(KLSF_Caution_Days.last40)

##Create last 30 years of flag days
KLSF_Flag_Days.last30 <- KLSF_Flag_Days.last40%>%
  filter(between(date,"1992-10-01","2022-09-30"))

KLSF_Caution_Days.last30 <- KLSF_Flag_Days.last30%>%
  filter(Lagged==TRUE | Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Caution Days" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  dplyr::select("Caution Days",Julian,date)
KLSF_Caution_Days.last30$Avg_Count_Caution_Days <- KLSF_Caution_Days.last30$`Caution Days`/30

##Create the average occurance of heat cat 4-5 "high heat" days
KLSF_High_Heat_Days.last30<- KLSF_Flag_Days.last30%>%
filter(Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename(High_Heat_Days = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  dplyr::select(High_Heat_Days,Julian,date)
  
KLSF_High_Heat_Days.last30$High_Heat_Days <- KLSF_High_Heat_Days.last30$High_Heat_Days/30

KLSF_Caution_Days.last30 <- full_join(KLSF_Caution_Days.last30,KLSF_High_Heat_Days.last30,by="Julian")%>%
  dplyr::select(High_Heat_Days,Avg_Count_Caution_Days,Julian,date.x)%>%
  rename(date=date.x)

KLSF_Caution_Days.last30 <- replace(KLSF_Caution_Days.last30,is.na(KLSF_Caution_Days.last30),0)

##Create last 10 years of flag days
KLSF_Flag_Days.last10 <- KLSF_Flag_Days.last40%>%
  filter(between(date,"2012-10-01","2022-09-30"))

KLSF_Caution_Days.last10 <- KLSF_Flag_Days.last10%>%
  filter(Lagged==TRUE | Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Caution Days" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  dplyr::select("Caution Days",Julian,date)
KLSF_Caution_Days.last10$Avg_Count_Caution_Days <- KLSF_Caution_Days.last10$`Caution Days`/10

##Create the average occurance of heat cat 4-5 "high heat" days
KLSF_High_Heat_Days.last10<- KLSF_Flag_Days.last10%>%
filter(Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename(High_Heat_Days = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  dplyr::select(High_Heat_Days,Julian,date)
  
KLSF_High_Heat_Days.last10$High_Heat_Days <- KLSF_High_Heat_Days.last10$High_Heat_Days/10

KLSF_Caution_Days.last10 <- full_join(KLSF_Caution_Days.last10,KLSF_High_Heat_Days.last10,by="Julian")%>%
  dplyr::select(High_Heat_Days,Avg_Count_Caution_Days,Julian,date.x)%>%
  rename(date=date.x)

KLSF_Caution_Days.last10 <- replace(KLSF_Caution_Days.last10,is.na(KLSF_Caution_Days.last10),0)

```
##Plot the training cycle vs red and black flag days (Dangerous Heat per Class)
```{r}
###this graph uses the most recent thirty year period
FBGA_heat_hours.plot <-  ggplot (FBGA_BCT_23_Wrangled,aes(x=Start_Date,y=Dangerous_Heat_Hours_Avg))+
  geom_segment(aes(x=Start_Date,xend=Start_Date,y=0,yend=Dangerous_Heat_Hours_Avg))+
  geom_point(aes(size=Dangerous_Heat_Hours_Avg,fill=Dangerous_Heat_Hours_Avg),color="red",fill=alpha("red",0.3),alpha=0.7,shape=21,stroke=2)+scale_size(range=c(0.01,7),name="Hours of Hazardous\nHeat per Class")+
    theme_light() +
  geom_text(aes(label=round(Dangerous_Heat_Hours_Avg,0)), position=position_dodge(width=0.9),vjust=-2,hjust=0,size=3)+
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x = element_text(angle=30)
  )+
  scale_x_date(date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week",limits=as.Date(c("2023-03-01","2023-10-01")))+
  geom_text(aes(label=Class,y=-3), position=position_dodge(width=0.9),hjust=0,size=4,fontface='bold')+
  xlab("FBGA BCT Class Number (FY23)\nVertical lines correspond to class start date")+ylab("'Red' or 'Black' Hours per Class")+
  ggtitle("Average (mean) hours of hazardous heat by basic training class",subtitle="Ft Benning, GA")+
  labs(caption = "Hours averaged from last 30 year period (October 1992 - September 2022).\nClass dates from FY 2023 basic combat training schedule")
FBGA_heat_hours.plot
```
###PLOT COURSE NUMBER AND THE LIKLIHOOD THAT THE DAY IS DANGEROUS ON SAME GRAPH
```{r}
ggplot()+
  geom_smooth(data=KLSF_Caution_Days.last30,aes(x=date,y=Avg_Count_Caution_Days*200),se=TRUE,fill='blue',alpha=0.3)+
  scale_y_continuous(limits=c(0,200), name="Hours of hazardous heat per class",expand=c(0,0),sec.axis = sec_axis( trans=~./200, name="Probability of experiencing a cautionary heat day",labels=scales::percent,breaks=seq(0,1,.1)))+
  ##second axis is divided by 200, since to plot the geom_smooth line we multiplied by 250
  geom_segment(data=FBGA_BCT_23_Wrangled,aes(x=Start_Date,xend=End_Date,y=Dangerous_Heat_Hours_Avg,yend=Dangerous_Heat_Hours_Avg))+
  geom_text(data=FBGA_BCT_23_Wrangled,aes(x=(Start_Date),y=Dangerous_Heat_Hours_Avg,label=round(Class,0)), position=position_dodge(width=0.9),vjust=-0.1,hjust=1.5,size=5,fontface='bold')+
    scale_x_date(date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week")+
  theme_light()+
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x = element_text(angle=30),
    axis.title.x = element_blank()
  )+  ggtitle("Ft Moore, GA")

```

###Rank days by high heat and by heat wave probability
```{r}
KLSF_Caution_Days.last40.sorted <- KLSF_Caution_Days.last40%>%
  mutate(High_Heat_Ranked=rank(-High_Heat_Days))%>%
  mutate(Avg_Count_Caution_Days_Ranked=rank(-Avg_Count_Caution_Days))

KLSF_Caution_Days.last40.sorted <- KLSF_Caution_Days.last40.sorted%>%
  arrange(KLSF_Caution_Days.last40.sorted$High_Heat_Ranked,KLSF_Caution_Days.last40.sorted$Avg_Count_Caution_Days_Ranked)%>%
  mutate(newcol=High_Heat_Ranked[]+Avg_Count_Caution_Days_Ranked)%>%
  mutate(ranking=c(1:nrow(KLSF_Caution_Days.last40.sorted)))%>%
  mutate(ranking_rescaled=rescale(-ranking))
##this ranking is based on the following - probability of a high heat day, probability of a cautionary heat day, and then earlier in the year. scale is used to transform the individual day to somethign that can be plotted on a % axis.

#view(KLSF_Caution_Days.last40.sorted)
#write.csv(KLSF_Caution_Days.last40.sorted, "/Users/erikpatton/Library/Mobile Documents/com~apple~CloudDocs/Duke/Dissertation/Data Frames/FBGA_last40_days_ranked.csv")
```

##Prior 30 year hazardous and caution probability
```{r}
KLSF_Flag_Days.1982.2011 <- KLSF_Flag_Days.last40%>%
  filter(between(date,"1982-01-01","2011-12-31"))

##Create the average occurance of "caution" day

KLSF_Caution_Days.prev30 <- KLSF_Flag_Days.1982.2011%>%
  filter(Lagged==TRUE | Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Caution Days" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  dplyr::select("Caution Days",Julian,date)
KLSF_Caution_Days.prev30$Avg_Count_Caution_Days <- KLSF_Caution_Days.prev30$`Caution Days`/30

##Create the average occurance of heat cat 4-5 "high heat" days
KLSF_High_Heat_Days.prev30<- KLSF_Flag_Days.1982.2011%>%
filter(Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename(High_Heat_Days = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  dplyr::select(High_Heat_Days,Julian,date)
  
KLSF_High_Heat_Days.prev30$High_Heat_Days <- KLSF_High_Heat_Days.prev30$High_Heat_Days/30

KLSF_Caution_Days.prev30 <- full_join(KLSF_Caution_Days.prev30,KLSF_High_Heat_Days.prev30,by="Julian")%>%
  dplyr::select(High_Heat_Days,Avg_Count_Caution_Days,Julian,date.x)%>%
  rename(date=date.x)

KLSF_Caution_Days.prev30 <- replace(KLSF_Caution_Days.prev30,is.na(KLSF_Caution_Days.prev30),0)

###Plot the likelihood that a day is a 'Cautionary' or 'Hazardous' Heat Day
```
##Plot cautionary and hazardous days
```{r}
##Cautionary Days
KLSF_Likelihood_cautionary_days.plot <- ggplot(KLSF_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=Avg_Count_Caution_Days,color='blue'),se=TRUE,fill='blue',alpha=0.3)+
    geom_smooth(data=KLSF_Caution_Days.last10,aes(x=date,y=Avg_Count_Caution_Days,color='purple'),se=TRUE,fill='purple',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("purple","blue"),labels=c("Last 10-year Avg","Previous 30-year Avg\n(1982-2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing Cautionary Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='Purple', size=16))+
  #annotate("text",label="5% increase in likelihood of cautionary days in the last week of July",size=6,x=KLSF_Caution_Days.prev30$date[45] ,y=.9)+
#  geom_point(data=KLSF_Caution_Days.last40.sorted[1:10,],aes(x=date,y=ranking_rescaled))+
  #geom_text(data=KLSF_Caution_Days.last40.sorted[1:10,],aes(x=date,y=ranking_rescaled-.9,label=ranking))+
  ggtitle("Cautionary heat daily probability",subtitle="Ft Benning, GA")
  #geom_line(data=KLSF_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'))
KLSF_Likelihood_cautionary_days.plot

##Hazardous Days
KLSF_Likelihood_hazardous_days.plot <- ggplot(KLSF_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=High_Heat_Days,color='orange'),se=TRUE,fill='orange',alpha=0.3)+
  geom_smooth(data=KLSF_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'),se=TRUE,fill='red',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("red","orange"),labels=c("Last 10-year Avg","Previous 30-year Avg\n(1982-2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing a Hazardous Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='Red', size=16))+
  #annotate("text",label="~10% increase in likelihood of hazardous days in the last week of July",size=6,x=KLSF_Caution_Days.last10$date[40] ,y=.9)+
  ggtitle("Hazardous heat (category 4 or 5) daily probability",subtitle="Ft Benning, GA")
  #geom_line(data=KLSF_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'))
KLSF_Likelihood_hazardous_days.plot
```
## [NEW] Last 30-year Cat 4 or Cat 5
```{r}
##Create the average occurrence of "cat 4" day
KLSF_Cat4_Days.last30 <- KLSF_RedorBlack_Flag_Days.last30%>%
  filter(Day_Cat4==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Heat Cat 4" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Heat Cat 4",Julian,date)
KLSF_Cat4_Days.last30$`Heat Cat 4` <- KLSF_Cat4_Days.last30$`Heat Cat 4`/30
KLSF_Cat4_Days.last30 <- KLSF_Cat4_Days.last30%>%
  rename(Likelihood_Cat4 = `Heat Cat 4`)

##Create the average occurrence of "cat 5" day
KLSF_Cat5_Days.last30 <- KLSF_RedorBlack_Flag_Days.last30%>%
  filter(Day_Cat5==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Heat Cat 5" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Heat Cat 5",Julian,date)
KLSF_Cat5_Days.last30$`Heat Cat 5` <- KLSF_Cat5_Days.last30$`Heat Cat 5`/30
KLSF_Cat5_Days.last30 <- KLSF_Cat5_Days.last30%>%
  rename(Likelihood_Cat5 = `Heat Cat 5`)

##Create the average occurrence of "hazardous heat" day
KLSF_Hazardous_Days.last30 <- KLSF_RedorBlack_Flag_Days.last30%>%
  filter(Day_Cat5==1 || Day_Cat4==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Hazardous Heat" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Hazardous Heat",Julian,date)
KLSF_Hazardous_Days.last30$`Hazardous Heat` <- KLSF_Hazardous_Days.last30$`Hazardous Heat`/30
KLSF_Hazardous_Days.last30 <- KLSF_Hazardous_Days.last30%>%
  rename(Likelihood_Hazardous_Heat = `Hazardous Heat`)

KLSF_Likelihood_Cat4xCat5.last30 <- full_join(KLSF_Cat4_Days.last30,KLSF_Cat5_Days.last30,by="Julian")%>%
  select(Likelihood_Cat4,Likelihood_Cat5,Julian,date.x)%>%
  rename(date=date.x)

KLSF_Likelihood_Cat4xCat5.last30 <- full_join(KLSF_Likelihood_Cat4xCat5.last30,KLSF_Hazardous_Days.last30,by="Julian")%>%
  select(Likelihood_Cat4,Likelihood_Cat5,Likelihood_Hazardous_Heat,Julian,date.x)%>%
  rename(date=date.x)

KLSF_Likelihood_Cat4xCat5.last30 <- replace(KLSF_Likelihood_Cat4xCat5.last30,is.na(KLSF_Likelihood_Cat4xCat5.last30),0)
```

### [NEW] Plot the likelihood that a day is Cat4 or Cat5
```{r}
##Heat Cat 4 and Heat Cat 5, last 30
KLSF_Likelihood_Cat4xCat5.last30.plot <- ggplot(KLSF_Likelihood_Cat4xCat5.last30)+
  geom_smooth(aes(x=date,y=Likelihood_Cat4,color='orange'),se=TRUE,fill='orange',alpha=0.4)+
    geom_smooth(aes(x=date,y=Likelihood_Cat5,color='red'),se=TRUE,fill='red',alpha=0.4)+
  scale_color_identity(name="Likelihood w/ 95%\nconfidence interval",breaks=c("orange","red"),labels=c("Heat Category 4 (Red flag)","Heat Category 5 (Black flag)"),guide="legend")+
  scale_y_continuous(limits=c(0,.8), labels=scales::percent,breaks=seq(0,1,.1) ,name="Chances a day experiences heat category 4 or 5",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="light grey", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='Black', size=16))+
  ggtitle("Daily probability of experiencing hazardous heat",subtitle="Ft Moore, GA\n(Oct 1992 - Sept 2022)")

KLSF_Likelihood_Cat4xCat5.last30.plot
```
##25th-75th Quartiles 
```{r}
KLSF_Daily_Values.C <- KLSF_Daily_Values.C%>%
  mutate(day=day(date))%>%
  mutate(Julian=yday(date))
KLSF_Daily_Values.C.previous30 <- KLSF_Daily_Values.C%>%
  filter(between(date,"1982-01-01","2011-12-31"))
KLSF_Daily_Values.C.last10 <- KLSF_Daily_Values.C%>%
  filter(between(date,"2012-10-01","2022-09-30"))
```
###- Previous 30 years - Find Quantiles and create data frame
```{r}
##fidn the quartile for each day
##WBGT

###dail avg wbgt
KLSF_Daily_Values.C.previous30.quantile.daily_avg_WBGT <-  do.call("rbind",
        tapply(KLSF_Daily_Values.C.previous30$daily_avg_WBGT,
               KLSF_Daily_Values.C.previous30$Julian,
               quantile))
KLSF_Daily_Values.C.previous30.quantile.daily_avg_WBGT <- KLSF_Daily_Values.C.previous30.quantile.daily_avg_WBGT%>%
  as.data.frame(KLSF_Daily_Values.C.previous30.quantile.daily_avg_WBGT)
##create a new data frame
KLSF_Daily_Values.C.previous30.quantile.daily_avg_WBGT$Julian <- c(1:nrow(KLSF_Daily_Values.C.previous30.quantile.daily_avg_WBGT))
KLSF_Daily_Values.C.previous30.quantile.daily_avg_WBGT$date <- 
  as.Date(KLSF_Daily_Values.C.previous30.quantile.daily_avg_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KLSF_Daily_Values.C.previous30.quantile.daily_avg_WBGT)

KLSF_Daily_Values.C.previous30.quantile.daily_high_WBGT <-  do.call("rbind",
        tapply(KLSF_Daily_Values.C.previous30$daily_high_WBGT,
               KLSF_Daily_Values.C.previous30$Julian,
               quantile))
KLSF_Daily_Values.C.previous30.quantile.daily_high_WBGT <- KLSF_Daily_Values.C.previous30.quantile.daily_high_WBGT%>%
  as.data.frame(KLSF_Daily_Values.C.previous30.quantile.daily_high_WBGT)
##create a new data frame
KLSF_Daily_Values.C.previous30.quantile.daily_high_WBGT$Julian <- c(1:nrow(KLSF_Daily_Values.C.previous30.quantile.daily_high_WBGT))
KLSF_Daily_Values.C.previous30.quantile.daily_high_WBGT$date <- 
  as.Date(KLSF_Daily_Values.C.previous30.quantile.daily_high_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KLSF_Daily_Values.C.previous30.quantile.daily_high_WBGT)

KLSF_Daily_Values.C.previous30.quantile.daily_low_WBGT <-  do.call("rbind",
        tapply(KLSF_Daily_Values.C.previous30$daily_low_WBGT,
               KLSF_Daily_Values.C.previous30$Julian,
               quantile))
KLSF_Daily_Values.C.previous30.quantile.daily_low_WBGT <- KLSF_Daily_Values.C.previous30.quantile.daily_low_WBGT%>%
  as.data.frame(KLSF_Daily_Values.C.previous30.quantile.daily_low_WBGT)
##create a new data frame
KLSF_Daily_Values.C.previous30.quantile.daily_low_WBGT$Julian <- c(1:nrow(KLSF_Daily_Values.C.previous30.quantile.daily_low_WBGT))
KLSF_Daily_Values.C.previous30.quantile.daily_low_WBGT$date <- 
  as.Date(KLSF_Daily_Values.C.previous30.quantile.daily_low_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KLSF_Daily_Values.C.previous30.quantile.daily_low_WBGT)

###temperature
KLSF_Daily_Values.C.previous30.quantile.daily_avg <-  do.call("rbind",
        tapply(KLSF_Daily_Values.C.previous30$daily_avg,
               KLSF_Daily_Values.C.previous30$Julian,
               quantile))
KLSF_Daily_Values.C.previous30.quantile.daily_avg <- KLSF_Daily_Values.C.previous30.quantile.daily_avg%>%
  as.data.frame(KLSF_Daily_Values.C.previous30.quantile.daily_avg)
##create a new data frame
KLSF_Daily_Values.C.previous30.quantile.daily_avg$Julian <- c(1:nrow(KLSF_Daily_Values.C.previous30.quantile.daily_avg))
KLSF_Daily_Values.C.previous30.quantile.daily_avg$date <- 
  as.Date(KLSF_Daily_Values.C.previous30.quantile.daily_avg$Julian, origin=as.Date("2022-12-31"))
#view(KLSF_Daily_Values.C.previous30.quantile.daily_avg)

KLSF_Daily_Values.C.previous30.quantile.daily_high <-  do.call("rbind",
        tapply(KLSF_Daily_Values.C.previous30$daily_high,
               KLSF_Daily_Values.C.previous30$Julian,
               quantile))
KLSF_Daily_Values.C.previous30.quantile.daily_high <- KLSF_Daily_Values.C.previous30.quantile.daily_high%>%
  as.data.frame(KLSF_Daily_Values.C.previous30.quantile.daily_high)
##create a new data frame
KLSF_Daily_Values.C.previous30.quantile.daily_high$Julian <- c(1:nrow(KLSF_Daily_Values.C.previous30.quantile.daily_high))
KLSF_Daily_Values.C.previous30.quantile.daily_high$date <- 
  as.Date(KLSF_Daily_Values.C.previous30.quantile.daily_high$Julian, origin=as.Date("2022-12-31"))
#view(KLSF_Daily_Values.C.previous30.quantile.daily_high)

KLSF_Daily_Values.C.previous30.quantile.daily_low <-  do.call("rbind",
        tapply(KLSF_Daily_Values.C.previous30$daily_low,
               KLSF_Daily_Values.C.previous30$Julian,
               quantile))
KLSF_Daily_Values.C.previous30.quantile.daily_low <- KLSF_Daily_Values.C.previous30.quantile.daily_low%>%
  as.data.frame(KLSF_Daily_Values.C.previous30.quantile.daily_low)
##create a new data frame
KLSF_Daily_Values.C.previous30.quantile.daily_low$Julian <- c(1:nrow(KLSF_Daily_Values.C.previous30.quantile.daily_low))
KLSF_Daily_Values.C.previous30.quantile.daily_low$date <- 
  as.Date(KLSF_Daily_Values.C.previous30.quantile.daily_low$Julian, origin=as.Date("2022-12-31"))
```
###- Last 10 years - Find Quantiles and create data frame
```{r}
##fidn the quartile for each day
##WBGT

###dail avg wbgt
KLSF_Daily_Values.C.last10.quantile.daily_avg_WBGT <-  do.call("rbind",
        tapply(KLSF_Daily_Values.C.last10$daily_avg_WBGT,
               KLSF_Daily_Values.C.last10$Julian,
               quantile))
KLSF_Daily_Values.C.last10.quantile.daily_avg_WBGT <- KLSF_Daily_Values.C.last10.quantile.daily_avg_WBGT%>%
  as.data.frame(KLSF_Daily_Values.C.last10.quantile.daily_avg_WBGT)
##create a new data frame
KLSF_Daily_Values.C.last10.quantile.daily_avg_WBGT$Julian <- c(1:nrow(KLSF_Daily_Values.C.last10.quantile.daily_avg_WBGT))
KLSF_Daily_Values.C.last10.quantile.daily_avg_WBGT$date <- 
  as.Date(KLSF_Daily_Values.C.last10.quantile.daily_avg_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KLSF_Daily_Values.C.last10.quantile.daily_avg_WBGT)

KLSF_Daily_Values.C.last10.quantile.daily_high_WBGT <-  do.call("rbind",
        tapply(KLSF_Daily_Values.C.last10$daily_high_WBGT,
               KLSF_Daily_Values.C.last10$Julian,
               quantile))
KLSF_Daily_Values.C.last10.quantile.daily_high_WBGT <- KLSF_Daily_Values.C.last10.quantile.daily_high_WBGT%>%
  as.data.frame(KLSF_Daily_Values.C.last10.quantile.daily_high_WBGT)
##create a new data frame
KLSF_Daily_Values.C.last10.quantile.daily_high_WBGT$Julian <- c(1:nrow(KLSF_Daily_Values.C.last10.quantile.daily_high_WBGT))
KLSF_Daily_Values.C.last10.quantile.daily_high_WBGT$date <- 
  as.Date(KLSF_Daily_Values.C.last10.quantile.daily_high_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KLSF_Daily_Values.C.last10.quantile.daily_high_WBGT)

KLSF_Daily_Values.C.last10.quantile.daily_low_WBGT <-  do.call("rbind",
        tapply(KLSF_Daily_Values.C.last10$daily_low_WBGT,
               KLSF_Daily_Values.C.last10$Julian,
               quantile))
KLSF_Daily_Values.C.last10.quantile.daily_low_WBGT <- KLSF_Daily_Values.C.last10.quantile.daily_low_WBGT%>%
  as.data.frame(KLSF_Daily_Values.C.last10.quantile.daily_low_WBGT)
##create a new data frame
KLSF_Daily_Values.C.last10.quantile.daily_low_WBGT$Julian <- c(1:nrow(KLSF_Daily_Values.C.last10.quantile.daily_low_WBGT))
KLSF_Daily_Values.C.last10.quantile.daily_low_WBGT$date <- 
  as.Date(KLSF_Daily_Values.C.last10.quantile.daily_low_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KLSF_Daily_Values.C.last10.quantile.daily_low_WBGT)

###temperature
KLSF_Daily_Values.C.last10.quantile.daily_avg <-  do.call("rbind",
        tapply(KLSF_Daily_Values.C.last10$daily_avg,
               KLSF_Daily_Values.C.last10$Julian,
               quantile))
KLSF_Daily_Values.C.last10.quantile.daily_avg <- KLSF_Daily_Values.C.last10.quantile.daily_avg%>%
  as.data.frame(KLSF_Daily_Values.C.last10.quantile.daily_avg)
##create a new data frame
KLSF_Daily_Values.C.last10.quantile.daily_avg$Julian <- c(1:nrow(KLSF_Daily_Values.C.last10.quantile.daily_avg))
KLSF_Daily_Values.C.last10.quantile.daily_avg$date <- 
  as.Date(KLSF_Daily_Values.C.last10.quantile.daily_avg$Julian, origin=as.Date("2022-12-31"))
#view(KLSF_Daily_Values.C.last10.quantile.daily_avg)

KLSF_Daily_Values.C.last10.quantile.daily_high <-  do.call("rbind",
        tapply(KLSF_Daily_Values.C.last10$daily_high,
               KLSF_Daily_Values.C.last10$Julian,
               quantile))
KLSF_Daily_Values.C.last10.quantile.daily_high <- KLSF_Daily_Values.C.last10.quantile.daily_high%>%
  as.data.frame(KLSF_Daily_Values.C.last10.quantile.daily_high)
##create a new data frame
KLSF_Daily_Values.C.last10.quantile.daily_high$Julian <- c(1:nrow(KLSF_Daily_Values.C.last10.quantile.daily_high))
KLSF_Daily_Values.C.last10.quantile.daily_high$date <- 
  as.Date(KLSF_Daily_Values.C.last10.quantile.daily_high$Julian, origin=as.Date("2022-12-31"))
#view(KLSF_Daily_Values.C.last10.quantile.daily_high)

KLSF_Daily_Values.C.last10.quantile.daily_low <-  do.call("rbind",
        tapply(KLSF_Daily_Values.C.last10$daily_low,
               KLSF_Daily_Values.C.last10$Julian,
               quantile))
KLSF_Daily_Values.C.last10.quantile.daily_low <- KLSF_Daily_Values.C.last10.quantile.daily_low%>%
  as.data.frame(KLSF_Daily_Values.C.last10.quantile.daily_low)
##create a new data frame
KLSF_Daily_Values.C.last10.quantile.daily_low$Julian <- c(1:nrow(KLSF_Daily_Values.C.last10.quantile.daily_low))
KLSF_Daily_Values.C.last10.quantile.daily_low$date <- 
  as.Date(KLSF_Daily_Values.C.last10.quantile.daily_low$Julian, origin=as.Date("2022-12-31"))
#view(KLSF_Daily_Values.C.last10.quantile.daily_low)
```
###Plot the quartiles
```{r}
KLSF_Quantiles_high_WBGT.plot <- ggplot()+
  geom_ribbon(data=KLSF_Daily_Values.C.previous30.quantile.daily_high_WBGT, aes(x=date,ymin=`25%`,ymax=`75%`,fill='orange'),color='grey',alpha=0.4)+
    geom_ribbon(data=KLSF_Daily_Values.C.last10.quantile.daily_high_WBGT, aes(x=date,ymin=`25%`,ymax=`75%`,fill='red'),color='red',alpha=0.6)+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+theme(axis.text.x = element_text(angle = 30),plot.title=element_text(family='', face='bold', colour='Red', size=16))+
  scale_y_continuous(limits=c(8,35), name="WBGT")+
  geom_ribbon(data=KLSF_Daily_Values.C.previous30.quantile.daily_low_WBGT, aes(x=date,ymin=`25%`,ymax=`75%`,fill='blue'),color='grey',alpha=0.4)+
    geom_ribbon(data=KLSF_Daily_Values.C.last10.quantile.daily_low_WBGT, aes(x=date,ymin=`25%`,ymax=`75%`,fill='purple'),color='purple',alpha=0.6)+
  scale_fill_identity(name="25th-75th Quartiles",breaks=c("red","orange","purple","blue"),labels=c("Last 10-year\nmaximum","Previous 30-year\nmaximum","Last 10-year\nminimum","Previous 30-year\n minimum"),guide="legend")+
  ggtitle("Heat Season Maximum and Minimum WBGT",subtitle="Ft Moore, GA")
KLSF_Quantiles_high_WBGT.plot
```


#KTBN (Ft Leonard Wood, MO)
#### INITIAL DATA LOAD AND SET UP
```{r}
#this dataset must be requested from the 14th weather squadron
KTBN <- read_csv("/Users/erikpatton/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/Raw Data/KTBN_Raw.csv")

KTBN <- KTBN %>%
  mutate('datetime' = make_datetime(year=Year, month = Month, day = Day, hour = Hour..UTC.))%>%
  pad()
print(sum(is.na(KTBN$Temperature..F.)))
print(521873-sum(is.na(KTBN$Temperature..F.)))
print(sum(is.na(KTBN$Derived.Wet.Bulb.Globe.Temperature..F.)))
521873-sum(is.na(KTBN$Derived.Wet.Bulb.Globe.Temperature..F.))

KTBN_last10_raw <- KTBN%>%
  filter(between(datetime,"2012-10-01 00:00:00 UTC","2022-09-30 23:00:00 UTC"))%>%
  filter(between(Hour..UTC.,8,19))
print(43208-(sum(is.na(KTBN_last10_raw$Derived.Wet.Bulb.Globe.Temperature..F.))))
```

###Data Table build
```{r}
KTBN_raw_60s <- KTBN%>%
  filter(between(datetime,"1960-01-01 00:00:00","1969-12-31 23:00:00"))
print((print((nrow(KTBN_raw_60s)))-print(sum(is.na(KTBN_raw_60s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KTBN_raw_60s)

KTBN_raw_60s.daylight <- KTBN_raw_60s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KTBN_raw_60s.daylight)))-print(sum(is.na(KTBN_raw_60s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KTBN_raw_60s.daylight)

KTBN_raw_70s <- KTBN%>%
  filter(between(datetime,"1970-01-01 00:00:00","1979-12-31 23:00:00"))
print((print((nrow(KTBN_raw_70s)))-print(sum(is.na(KTBN_raw_70s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KTBN_raw_70s)

KTBN_raw_70s.daylight <- KTBN_raw_70s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KTBN_raw_70s.daylight)))-print(sum(is.na(KTBN_raw_70s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KTBN_raw_70s.daylight)

KTBN_raw_80s <- KTBN%>%
  filter(between(datetime,"1980-01-01 00:00:00","1989-12-31 23:00:00"))
print((print((nrow(KTBN_raw_80s)))-print(sum(is.na(KTBN_raw_80s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KTBN_raw_80s)

KTBN_raw_80s.daylight <- KTBN_raw_80s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KTBN_raw_80s.daylight)))-print(sum(is.na(KTBN_raw_80s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KTBN_raw_80s.daylight)

KTBN_raw_90s <- KTBN%>%
  filter(between(datetime,"1990-01-01 00:00:00","1999-12-31 23:00:00"))
print((print((nrow(KTBN_raw_90s)))-print(sum(is.na(KTBN_raw_90s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KTBN_raw_90s)

KTBN_raw_90s.daylight <- KTBN_raw_90s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KTBN_raw_90s.daylight)))-print(sum(is.na(KTBN_raw_90s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KTBN_raw_90s.daylight)

KTBN_raw_00s <- KTBN%>%
  filter(between(datetime,"2000-01-01 00:00:00","2009-12-31 23:00:00"))
print((print((nrow(KTBN_raw_00s)))-print(sum(is.na(KTBN_raw_00s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KTBN_raw_00s)

KTBN_raw_00s.daylight <- KTBN_raw_00s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KTBN_raw_00s.daylight)))-print(sum(is.na(KTBN_raw_00s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KTBN_raw_00s.daylight)

KTBN_raw_10s <- KTBN%>%
  filter(between(datetime,"2010-01-01 00:00:00","2019-12-31 23:00:00"))
print((print((nrow(KTBN_raw_10s)))-print(sum(is.na(KTBN_raw_10s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KTBN_raw_10s)

KTBN_raw_10s.daylight <- KTBN_raw_10s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KTBN_raw_10s.daylight)))-print(sum(is.na(KTBN_raw_10s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KTBN_raw_10s.daylight)

KTBN_raw_20s <- KTBN%>%
  filter(between(datetime,"2020-01-01 00:00:00","2022-12-31 23:00:00"))
print((print((nrow(KTBN_raw_20s)))-print(sum(is.na(KTBN_raw_20s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KTBN_raw_20s)

KTBN_raw_20s.daylight <- KTBN_raw_20s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KTBN_raw_20s.daylight)))-print(sum(is.na(KTBN_raw_20s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KTBN_raw_20s.daylight)

KTBN_raw_last30 <- KTBN%>%
  filter(datetime>"1992-10-01 00:00:00")
print((print((nrow(KTBN_raw_last30)))-print(sum(is.na(KTBN_raw_last30$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KTBN_raw_last30)
```
###Wrangle
```{r}
KTBN <- KTBN%>%
  mutate('Temp.C' = ((Temperature..F.-32)*(5/9)))%>%
  mutate('Dewpoint.Temp.C' = (Dewpoint.Temperature..F.-32)*(5/9))%>%
  mutate('Heat.Index.C'=((Heat.Index..F.-32)*(5/9)))%>%
  mutate('WBGT.C'=((Derived.Wet.Bulb.Globe.Temperature..F.-32)*(5/9)))%>%
  rename('Temp.F' = Temperature..F.)%>%
  rename('Dewpoint.Temp.F'=Dewpoint.Temperature..F.)%>%
  rename('Heat.Index.F'=Heat.Index..F.)%>%
  rename("WBGT.F"=Derived.Wet.Bulb.Globe.Temperature..F.)%>%
  mutate("Hour"=hour(datetime))%>%
  mutate("Day"=day(datetime))%>%
  mutate("Month"=month(datetime))%>%
  mutate("Year"=year(datetime))%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(WBGT.C = zoo::na.approx(WBGT.C, maxgap=3))%>%
    mutate(Temp.C = zoo::na.approx(Temp.C, maxgap=3))


##CREATE DECADE COLUMN
KTBN <- KTBN%>%
  mutate(Decade=floor(Year/10)*10)

print(sum(is.na(KTBN$Temp.C)))
print(sum(is.na(KTBN$WBGT.C)))
521873-sum(is.na(KTBN$WBGT.C))
```
## WRANGLE DATA & REPLACE WITH AVERAGE DATA VALUES
###1960-1970
```{r}
KTBN_60s_Hour_Avg <- KTBN%>%
  filter(Decade=="1960"|Decade=="1970")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KTBN_60s <- KTBN%>%
  filter(Decade=="1960")%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:59489))

KTBN_60s <- full_join(KTBN_60s,KTBN_60s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KTBN_90s)

KTBN_60s <- mutate(KTBN_60s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces NA values in 'x' column with the averaged from 'y' column

KTBN_60s <- mutate(KTBN_60s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KTBN_60s <- KTBN_60s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
#%>%
#  rename(Decade=Decade.x)
```

####1970s
```{r}
##1970s 20% missing (after maxgap=3)
##Data Prep
KTBN_70s_Hour_Avg <- KTBN%>%
  filter(Decade=="1970")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KTBN_70s <- KTBN%>%
  filter(Decade=="1970")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

KTBN_70s <- full_join(KTBN_70s,KTBN_70s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KTBN_90s)

KTBN_70s <- mutate(KTBN_70s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KTBN_70s <- mutate(KTBN_70s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KTBN_70s <- KTBN_70s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####1980 note this uses a two decade average
```{r}
##1980s - no missing values
KTBN_80s_Hour_Avg <- KTBN%>%
  filter(Decade=="1980"|Decade=="1970"|Decade=="1960")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation
print(sum(is.na(KTBN_80s_Hour_Avg)))

KTBN_80s <- KTBN%>%
  filter(Decade=="1980")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KTBN_80s <- full_join(KTBN_80s,KTBN_80s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KTBN_90s)

KTBN_80s <- mutate(KTBN_80s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KTBN_80s <- mutate(KTBN_80s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KTBN_80s <- KTBN_80s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####1990 note this uses a two decade average
```{r}
##1990s
KTBN_90s_Hour_Avg <- KTBN%>%
  filter(Decade=="1990"|Decade=="2000"|Decade=="1980")%>% 
  group_by(Month, Day, Hour)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)
  ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation
#print(sum(is.na(KTBN_90s_Hour_Avg)))
#view(KTBN_90s_Hour_Avg)

NACheck <- KTBN_90s_Hour_Avg[rowSums(is.na(KTBN_90s_Hour_Avg))>0,]
#view(NACheck)
#print(sum(is.na(KTBN_90s)))

##create new hours column
hours <-  rep(c(0:23),times=3652)
#avg_WBGT_90s <- rep(KTBN_90s_Hour_Avg$WBGT.C,times=10)
#avg_Temp_90s <- rep(KTBN_90s_Hour_Avg$Temp.C,times=10)


KTBN_90s <- KTBN%>%
  filter(Decade=="1990")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))%>%
  mutate(Hour=hours)

KTBN_90s <- full_join(KTBN_90s,KTBN_90s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KTBN_90s)
#print(sum(is.na(KTBN_90s$WBGT.C.x)))

KTBN_90s <- mutate(KTBN_90s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KTBN_90s <- mutate(KTBN_90s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KTBN_90s <- KTBN_90s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
#print(sum(is.na(KTBN_90s$Ave_WBGT.C)))
```
####2000s 
```{r}
##2000  ##Almost all the NAs are in 2002 and 2003, with 2001 having some and 2008 having a handful.
KTBN_00s_Hour_Avg <- KTBN%>%
  filter(Decade=="2000"|Decade=="1990"|Decade=="2010")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KTBN_00s <- KTBN%>%
  filter(Decade=="2000")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KTBN_00s <- full_join(KTBN_00s,KTBN_00s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KTBN_00s)

KTBN_00s <- mutate(KTBN_00s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KTBN_00s <- mutate(KTBN_00s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KTBN_00s <- KTBN_00s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####2010
```{r}
##2010
KTBN_10s_Hour_Avg <- KTBN%>%
  filter(Decade=="2010"|Decade=="2000"|Decade=="2020")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KTBN_10s <- KTBN%>%
  filter(Decade=="2010")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

#view(KTBN_10s)
KTBN_10s <- full_join(KTBN_10s,KTBN_10s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KTBN_00s)

KTBN_10s <- mutate(KTBN_10s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KTBN_10s <- mutate(KTBN_10s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KTBN_10s <- KTBN_10s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
#### 2020 DECADE, EXTRAPOLATED OUT FOR 2020-2029
```{r}
KTBN_20s_Hour_Avg <- KTBN%>%
  filter(Decade=="2020"|Decade=="2010")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KTBN_20s <- KTBN%>%
  filter(Decade=="2020")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:24096))

KTBN_20s <- full_join(KTBN_20s,KTBN_20s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KTBN_20s)

KTBN_20s <- mutate(KTBN_20s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KTBN_20s <- mutate(KTBN_20s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KTBN_20s <- KTBN_20s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
###Combine all chunks to new KTBN data set with the interpolated values. Only 22 NAs now.
```{r}
KTBN_Wrangled <- rbind(KTBN_60s,KTBN_70s,KTBN_80s,KTBN_90s,KTBN_00s,KTBN_10s,KTBN_20s) %>%
  #rename(WBGT.C = Ave_WBGT.C)%>%
  rename(date=Date)
sum(is.na(KTBN_Wrangled$Ave_Temp.C))  ##28 still missing
sum(is.na(KTBN_Wrangled$Ave_WBGT.C))  ##22 still missing


missing_values <- c(sum(is.na(KTBN_60s$Ave_WBGT.C)),sum(is.na(KTBN_70s$Ave_WBGT.C)),sum(is.na(KTBN_80s$Ave_WBGT.C)),sum(is.na(KTBN_90s$Ave_WBGT.C)),sum(is.na(KTBN_00s$Ave_WBGT.C)),sum(is.na(KTBN_10s$Ave_WBGT.C)))
```
###Create a single df with daily values
```{r}
KTBN_Daily_Values.C <- KTBN_Wrangled%>%
  group_by(date)%>%
  dplyr::summarise(daily_high = max(Ave_Temp.C),daily_low = min(Ave_Temp.C),daily_avg = mean(Ave_Temp.C),daily_high_WBGT = max(Ave_WBGT.C),daily_low_WBGT = min(Ave_WBGT.C),daily_avg_WBGT = mean(Ave_WBGT.C))%>%
  as.data.frame()
```

### Look for missing dates in the complete data set
No missing dates!
``{r}
FullSeq <- seq.Date(from = min(KTBN_Wrangled$date), to = max(KTBN_Wrangled$date), by = 1)
Missing <- FullSeq[!FullSeq %in% KTBN_Wrangled$date]
Missing
``

###RED and BLACK Count
#### Start Red and Black Flag Day Analysis
```{r}
KTBN_REDorABOVEbyYear <- KTBN_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Red = n)


KTBN_BLACKorABOVEbyYear <- KTBN_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Black = n)

##Filtering by Red or Above will include Black days - Hazard Day
KTBN_AboveRed.last40 <- KTBN_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Above_Red = n)%>%
  mutate(Day_Above_Red =ifelse(Day_Above_Red == '',1,1))%>%
  mutate(Julian=yday(date))%>%
  filter(date>="1982-01-01"&date<="2021-12-31")%>%
  mutate(year=lubridate::year(date))

##Filters for Green and above - Caution Day
KTBN_AboveGreen.last40 <- KTBN_Wrangled%>%
  subset(between(Ave_WBGT.C,Heat_Categories$Category_Temp_Min.C[1],Heat_Categories$Category_Temp_Min.C[2]))%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Green_or_Yellow = n)%>%
  mutate(Day_Green_or_Yellow =ifelse(Day_Green_or_Yellow == '',1,1))%>%
  mutate(Julian=yday(date))%>%
  filter(date>="1982-01-01"&date<="2021-12-31")%>%
  mutate(year=lubridate::year(date))
```
##Create DF with all Flag Day counts only
these are days at are at RED OR ABOVE or are days following RED OR ABOVE that are also GREEN OR ABOVE
```{r}
##Create a df that has all days at/above red OR days at/above green following a red+ day
KTBN_Flag_Days.last40 <- full_join(KTBN_AboveRed.last40,KTBN_AboveGreen.last40,by="date")
KTBN_Flag_Days.last40 <- KTBN_Flag_Days.last40%>%
  select(date,Day_Above_Red,Day_Green_or_Yellow,Julian.y,year.y)%>%
    mutate(Julian.y=yday(date))%>%
    mutate(year.y=lubridate::year(date))
KTBN_Flag_Days.last40 <- replace(KTBN_Flag_Days.last40,is.na(KTBN_Flag_Days.last40),0)

#view(KTBN_Flag_Days.last40)

####YOU STOPPED HERE ON 28-MAR-2023 trying to make a script that counts days above red or days above green following a day above red
KTBN_Flag_Days.last40 <-  KTBN_Flag_Days.last40%>%arrange(ymd(KTBN_Flag_Days.last40$date)) ##this line gets the df in date order for the following loop
KTBN_Flag_Days.last40$count <- c(1:nrow(KTBN_Flag_Days.last40))

KTBN_Flag_Days.last40$Lagged <- lag(KTBN_Flag_Days.last40$Day_Above_Red>0,1)

KTBN_Caution_Days.last40 <- KTBN_Flag_Days.last40%>%
  filter(Lagged==TRUE | Day_Above_Red==1)

#view(KTBN_Flag_Days.last40)
#view(KTBN_Caution_Days.last40)
```

##TIME SERIES
### Seasonal and trend plots
```{r}
####TIME SERIES ANALYSIS ON THE DATASET
##Daily HIGH
print(sum(is.na(KTBN_Daily_Values.C)))

KTBN_Daily_Values.C <- KTBN_Daily_Values.C%>%
  mutate(daily_high = zoo::na.approx(daily_high))%>%
  mutate(daily_avg_WBGT = zoo::na.approx(daily_avg_WBGT))%>%
  mutate(daily_low = zoo::na.approx(daily_low))%>%
  mutate(daily_low_WBGT = zoo::na.approx(daily_low_WBGT))%>%
  mutate(daily_avg = zoo::na.approx(daily_avg))%>%
  mutate(daily_avg_WBGT=zoo::na.approx(daily_avg_WBGT))%>%
  mutate(daily_high_WBGT=zoo::na.approx(daily_high_WBGT))

KTBN_Daily_High.TS <- ts(KTBN_Daily_Values.C$daily_high, start = c(1960,01,01),frequency=365)
KTBN_Daily_High.decomp <- stl(KTBN_Daily_High.TS,s.window="periodic")
plot(KTBN_Daily_High.decomp)
KTBN_Daily_High_Trend <- Kendall::SeasonalMannKendall(KTBN_Daily_High.TS)
summary(KTBN_Daily_High_Trend)
##This returns a very small p=value, also a small tau, so we can state the trend is STRONGLY statistically significant even though the magnitude of the temperature increase is small (the tau value?).

##Daily LOW
KTBN_Daily_Low.TS <- ts(KTBN_Daily_Values.C$daily_low, start = c(1960,01,01),frequency=365)
KTBN_Daily_Low.decomp <- stl(KTBN_Daily_Low.TS,s.window="periodic")
plot(KTBN_Daily_Low.decomp)
KTBN_Daily_Low_Trend <- Kendall::SeasonalMannKendall(KTBN_Daily_Low.TS)
summary(KTBN_Daily_Low_Trend)

##Daily AVG
KTBN_Daily_Avg.TS <- ts(KTBN_Daily_Values.C$daily_avg, start = c(1960,01,01),frequency=365)
KTBN_Daily_Avg.decomp <- stl(KTBN_Daily_Avg.TS,s.window="periodic")
plot(KTBN_Daily_Avg.decomp)
KTBN_Daily_Avg_Trend <- Kendall::SeasonalMannKendall(KTBN_Daily_Avg.TS)
summary(KTBN_Daily_Avg_Trend)

##WBGT
##WBGT.High
KTBN_Daily_High_WBGT.TS <- ts(KTBN_Daily_Values.C$daily_high_WBGT, start = c(1960,01,01),frequency=365)
KTBN_Daily_High_WBGT.decomp <- stl(KTBN_Daily_High_WBGT.TS,s.window="periodic")
plot(KTBN_Daily_High_WBGT.decomp)
KTBN_Daily_High_WBGT_Trend <- Kendall::SeasonalMannKendall(KTBN_Daily_High_WBGT.TS)
summary(KTBN_Daily_High_WBGT_Trend)

##WBGT Low
KTBN_Daily_low_WBGT.TS <- ts(KTBN_Daily_Values.C$daily_low_WBGT, start = c(1960,01,01),frequency=365)
KTBN_Daily_low_WBGT.decomp <- stl(KTBN_Daily_low_WBGT.TS,s.window="periodic")
plot(KTBN_Daily_low_WBGT.decomp)
KTBN_Daily_low_WBGT_Trend <- Kendall::SeasonalMannKendall(KTBN_Daily_low_WBGT.TS)
summary(KTBN_Daily_low_WBGT_Trend)

##WBGT Avg
KTBN_Daily_avg_WBGT.TS <- ts(KTBN_Daily_Values.C$daily_avg_WBGT, start = c(1960,01,01),frequency=365)
KTBN_Daily_avg_WBGT.decomp <- stl(KTBN_Daily_avg_WBGT.TS,s.window="periodic")
plot(KTBN_Daily_avg_WBGT.decomp)
KTBN_Daily_avg_WBGT_Trend <- Kendall::SeasonalMannKendall(KTBN_Daily_avg_WBGT.TS)
summary(KTBN_Daily_avg_WBGT_Trend)
```
### Mann Kendall Seasonal Values
```{r}
##MANUALLY RUN THE MANN KENDALL TEST - WBGT TEMPERATURE - MAX DAILY HIGH
#Set up month and year columns
KTBN_Daily_Values.C$month <- month(ymd(KTBN_Daily_Values.C$date))
KTBN_Daily_Values.C$year <- year(ymd(KTBN_Daily_Values.C$date))

###WBGT
##HIGH WBGT
KTBN_Daily_High_WBGT.C_MannKendall <- kendallSeasonalTrendTest(daily_high_WBGT ~ month+year, data=KTBN_Daily_Values.C)
#return just tau, slope, and intercept
KTBN_Daily_High_WBGT.C_MannKendall$estimate
##LOW WBGT
KTBN_Daily_Low_WBGT.C_MannKendall <- kendallSeasonalTrendTest(daily_low_WBGT ~ month+year, data=KTBN_Daily_Values.C)
KTBN_Daily_Low_WBGT.C_MannKendall$estimate
##AVG WBGT
KTBN_Daily_Avg_WBGT.C_MannKendall <- kendallSeasonalTrendTest(daily_avg_WBGT ~ month+year, data=KTBN_Daily_Values.C)
KTBN_Daily_Avg_WBGT.C_MannKendall$estimate
##Build data frame with the needed information
KTBN_Daily_TS_estimates.C <- KTBN_Daily_High_WBGT.C_MannKendall$estimate%>%
  as.data.frame()
KTBN_Daily_TS_estimates.C$Low_WBGT <- as.data.frame(KTBN_Daily_Low_WBGT.C_MannKendall$estimate)
KTBN_Daily_TS_estimates.C$Avg_WBGT <- as.data.frame(KTBN_Daily_Avg_WBGT.C_MannKendall$estimate)
KTBN_Daily_TS_estimates.C$High_WBGT <- as.data.frame(KTBN_Daily_High_WBGT.C_MannKendall$estimate)
KTBN_Daily_TS_estimates_WBGT.C <- KTBN_Daily_TS_estimates.C[2:4]
##Remove uneeded variables
#remove(KTBN_Daily_High_WBGT.C_MannKendall,KTBN_Daily_Low_WBGT.C_MannKendall,KTBN_Daily_Avg_WBGT.C_MannKendall)

###Temperature
##HIGH 
KTBN_Daily_High.C_MannKendall <- kendallSeasonalTrendTest(daily_high ~ month+year, data=KTBN_Daily_Values.C)
#return just tau, slope, and intercept
KTBN_Daily_High.C_MannKendall$estimate
##LOW WBGT
KTBN_Daily_Low.C_MannKendall <- kendallSeasonalTrendTest(daily_low ~ month+year, data=KTBN_Daily_Values.C)
KTBN_Daily_Low.C_MannKendall$estimate
##AVG WBGT
KTBN_Daily_Avg.C_MannKendall <- kendallSeasonalTrendTest(daily_avg ~ month+year, data=KTBN_Daily_Values.C)
KTBN_Daily_Avg.C_MannKendall$estimate
##Build data frame with the needed information
KTBN_Daily_TS_estimates.C <- KTBN_Daily_High.C_MannKendall$estimate%>%
  as.data.frame()
KTBN_Daily_TS_estimates.C$Low <- as.data.frame(KTBN_Daily_Low.C_MannKendall$estimate)
KTBN_Daily_TS_estimates.C$Avg <- as.data.frame(KTBN_Daily_Avg.C_MannKendall$estimate)
KTBN_Daily_TS_estimates.C$High <- as.data.frame(KTBN_Daily_High.C_MannKendall$estimate)
KTBN_Daily_TS_estimates.C <- KTBN_Daily_TS_estimates.C[2:4]
##Remove unneeded variables
#remove(KTBN_Daily_High.C_MannKendall,KTBN_Daily_Low.C_MannKendall,KTBN_Daily_Avg.C_MannKendall)
```
###Bottom quarter trend test
```{r}
print(sum(is.na(KTBN_Wrangled))) ##24 missing values in data set, 12 each temp and WBGT
KTBN_Wrangled <- KTBN_Wrangled%>%
  mutate(Ave_Temp.C = zoo::na.approx(Ave_Temp.C))%>%
  mutate(Ave_WBGT.C = zoo::na.approx(Ave_WBGT.C))

##Temp
KTBN_quartile_test_lower25 <- KTBN_Wrangled%>%
  group_by(date)%>%
  reframe(qs=quantile(Ave_Temp.C,c(0.25,0.75)),prob=c(0.25,0.75))%>%
  filter(prob==0.25)

KTBN_quartile_test_lower25$month <- month(ymd(KTBN_quartile_test_lower25$date))
KTBN_quartile_test_lower25$year <- year(ymd(KTBN_quartile_test_lower25$date))

##Bottom Quarter Temp
KTBN_quartile_test_lower25_MannKendall <- kendallSeasonalTrendTest(qs ~ month+year, data=KTBN_quartile_test_lower25)
KTBN_quartile_test_lower25_MannKendall$estimate

##returns a small but not 0 positive trend in temperature

##WBGT
KTBN_quartile_test_lower25_WBGT <- KTBN_Wrangled%>%
  group_by(date)%>%
  reframe(qs=quantile(Ave_WBGT.C,c(0.25,0.75)),prob=c(0.25,0.75))%>%
  filter(prob==0.25)

KTBN_quartile_test_lower25_WBGT$month <- month(ymd(KTBN_quartile_test_lower25_WBGT$date))
KTBN_quartile_test_lower25_WBGT$year <- year(ymd(KTBN_quartile_test_lower25_WBGT$date))

##Bottom Quarter Temp
KTBN_quartile_test_lower25_WBGT <- kendallSeasonalTrendTest(qs ~ month+year, data=KTBN_quartile_test_lower25_WBGT)
KTBN_quartile_test_lower25_WBGT$estimate
##returns a small but not 0 positive trend in temperature
```
###Upper quarter trend test
```{r}
##Temp
KTBN_quartile_test_upper25 <- KTBN_Wrangled%>%
  group_by(date)%>%
  reframe(qs=quantile(Ave_Temp.C,c(0.25,0.75)),prob=c(0.25,0.75))%>%
  filter(prob==0.75)

KTBN_quartile_test_upper25$month <- month(ymd(KTBN_quartile_test_upper25$date))
KTBN_quartile_test_upper25$year <- year(ymd(KTBN_quartile_test_upper25$date))

##Upper Quarter Temp
KTBN_quartile_test_upper25_MannKendall <- kendallSeasonalTrendTest(qs ~ month+year, data=KTBN_quartile_test_upper25)
KTBN_quartile_test_upper25_MannKendall$estimate

##returns a small but not 0 positive trend in temperature

##WBGT
KTBN_quartile_test_upper25_WBGT <- KTBN_Wrangled%>%
  group_by(date)%>%
  reframe(qs=quantile(Ave_WBGT.C,c(0.25,0.75)),prob=c(0.25,0.75))%>%
  filter(prob==0.75)

KTBN_quartile_test_upper25_WBGT$month <- month(ymd(KTBN_quartile_test_upper25_WBGT$date))
KTBN_quartile_test_upper25_WBGT$year <- year(ymd(KTBN_quartile_test_upper25_WBGT$date))

##Upper Quarter Temp
KTBN_quartile_test_upper25_WBGT <- kendallSeasonalTrendTest(qs ~ month+year, data=KTBN_quartile_test_upper25_WBGT)
KTBN_quartile_test_upper25_WBGT$estimate
##returns a small but not 0 positive trend in temperature
```
##time series full year data tables
```{r}
##Build data frame with the trend information - WBGT
KTBN_Daily_TS_estimates.C <- KTBN_Daily_High.C_MannKendall$estimate%>%
  as.data.frame()
KTBN_Daily_TS_estimates.C$Low <- as.data.frame(KTBN_Daily_Low.C_MannKendall$estimate)
KTBN_Daily_TS_estimates.C$Avg <- as.data.frame(KTBN_Daily_Avg.C_MannKendall$estimate)
KTBN_Daily_TS_estimates.C$Upper <- as.data.frame(KTBN_quartile_test_upper25_MannKendall$estimate)
KTBN_Daily_TS_estimates.C$Lower <- as.data.frame(KTBN_quartile_test_lower25_MannKendall$estimate)
KTBN_Daily_TS_estimates.C$High <- as.data.frame(KTBN_Daily_High.C_MannKendall$estimate)
KTBN_Daily_TS_estimates.C[2:6]

##Build data frame with the trend information - WBGT
KTBN_Daily_TS_estimates_WBGT.C <- KTBN_Daily_High_WBGT.C_MannKendall$estimate%>%
  as.data.frame()
KTBN_Daily_TS_estimates_WBGT.C$Low_WBGT <- as.data.frame(KTBN_Daily_Low_WBGT.C_MannKendall$estimate)
KTBN_Daily_TS_estimates_WBGT.C$Avg_WBGT <- as.data.frame(KTBN_Daily_Avg_WBGT.C_MannKendall$estimate)
KTBN_Daily_TS_estimates_WBGT.C$Upper_WBGT <- as.data.frame(KTBN_quartile_test_upper25_WBGT$estimate)
KTBN_Daily_TS_estimates_WBGT.C$Lower_WBGT <- as.data.frame(KTBN_quartile_test_lower25_WBGT$estimate)
KTBN_Daily_TS_estimates_WBGT.C$High_WBGT <- as.data.frame(KTBN_Daily_High_WBGT.C_MannKendall$estimate)
KTBN_Daily_TS_estimates_WBGT.C[2:6]
```
##warm month season trends
```{r}
KTBN_warm_season.C <- c(mean(KTBN_Daily_High.C_MannKendall$seasonal.estimates[5:9,2]),mean(KTBN_quartile_test_upper25_MannKendall$seasonal.estimates[5:9,2]),mean(KTBN_Daily_Avg.C_MannKendall$seasonal.estimates[5:9,2]),mean(KTBN_quartile_test_lower25_MannKendall$seasonal.estimates[5:9,2]),mean(KTBN_Daily_Low.C_MannKendall$seasonal.estimates[5:9,2]))
print(KTBN_warm_season.C)

KTBN_warm_season_WBGT.C <- c(mean(KTBN_Daily_High_WBGT.C_MannKendall$seasonal.estimates[5:9,2]),mean(KTBN_quartile_test_upper25_WBGT$seasonal.estimates[5:9,2]),mean(KTBN_Daily_Avg_WBGT.C_MannKendall$seasonal.estimates[5:9,2]),mean(KTBN_quartile_test_lower25_WBGT$seasonal.estimates[5:9,2]),mean(KTBN_Daily_Low_WBGT.C_MannKendall$seasonal.estimates[5:9,2]))
print(KTBN_warm_season_WBGT.C)
```
##Finalize trend tables for export
```{r}
KTBN_full_year_trends.C <- as.data.frame(c(KTBN_Daily_High.C_MannKendall$estimate[2],KTBN_quartile_test_upper25_MannKendall$estimate[2],KTBN_Daily_Avg.C_MannKendall$estimate[2],KTBN_quartile_test_lower25_MannKendall$estimate[2],KTBN_Daily_Low.C_MannKendall$estimate[2]))
KTBN_full_year_trends.C$installation <- "FLW"
KTBN_full_year_trends.C$Metric <- c("Maximum","Upper 75%","Mean","Lower 25%","Minimum")
colnames(KTBN_full_year_trends.C)[1] <- 'Temp.C'
print(KTBN_full_year_trends.C)

KTBN_full_year_trends_WBGT.C <- as.data.frame(c(KTBN_Daily_High_WBGT.C_MannKendall$estimate[2],KTBN_quartile_test_upper25_WBGT$estimate[2],KTBN_Daily_Avg_WBGT.C_MannKendall$estimate[2],KTBN_quartile_test_lower25_WBGT$estimate[2],KTBN_Daily_Low_WBGT.C_MannKendall$estimate[2]))
KTBN_full_year_trends.C$installation <- "FLW"
KTBN_full_year_trends.C$Metric <- c("Maximum","Upper 75%","Mean","Lower 25%","Minimum")
colnames(KTBN_full_year_trends_WBGT.C)[1] <- 'WBGT.C'
print(KTBN_full_year_trends_WBGT.C)

```
##warm month season trends
```{r}
KTBN_warm_season_trends.C <- as.data.frame(c(mean(KTBN_Daily_High.C_MannKendall$seasonal.estimates[5:9,2]),mean(KTBN_quartile_test_upper25_MannKendall$seasonal.estimates[5:9,2]),mean(KTBN_Daily_Avg.C_MannKendall$seasonal.estimates[5:9,2]),mean(KTBN_quartile_test_lower25_MannKendall$seasonal.estimates[5:9,2]),mean(KTBN_Daily_Low.C_MannKendall$seasonal.estimates[5:9,2])))
KTBN_warm_season_trends.C$installation <- "FLW"
KTBN_warm_season_trends.C$Metric <- c("Maximum","Upper 75%","Mean","Lower 25%","Minimum")
colnames(KTBN_warm_season_trends.C)[1] <- 'Temp.C_Warm.season'
print(KTBN_warm_season_trends.C)


KTBN_warm_season_trends_WBGT.C <- as.data.frame(c(mean(KTBN_Daily_High_WBGT.C_MannKendall$seasonal.estimates[5:9,2]),mean(KTBN_quartile_test_upper25_WBGT$seasonal.estimates[5:9,2]),mean(KTBN_Daily_Avg_WBGT.C_MannKendall$seasonal.estimates[5:9,2]),mean(KTBN_quartile_test_lower25_WBGT$seasonal.estimates[5:9,2]),mean(KTBN_Daily_Low_WBGT.C_MannKendall$seasonal.estimates[5:9,2])))
KTBN_warm_season_trends_WBGT.C$installation <- "FLW"
KTBN_warm_season_trends_WBGT.C$Metric <- c("Maximum","Upper 75%","Mean","Lower 25%","Minimum")
colnames(KTBN_warm_season_trends_WBGT.C)[1] <- 'WBGT.C_Warm.season'
print(KTBN_warm_season_trends_WBGT.C)
```
##full installation trend table
```{r}
FLW_Historical_Trends <- cbind(KTBN_full_year_trends.C,KTBN_full_year_trends_WBGT.C,KTBN_warm_season_trends.C,KTBN_warm_season_trends_WBGT.C)

FLW_Historical_Trends <- FLW_Historical_Trends%>%
  select(c(1:5,8))%>%
  select(installation,Metric,Temp.C,WBGT.C,Temp.C_Warm.season,WBGT.C_Warm.season)

view(FLW_Historical_Trends)
```

#KVIH (adjacent weather station)
#### INITIAL DATA LOAD AND SET UP
```{r eval=FALSE}
KVIH <- read_csv("/Users/erikpatton/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/Raw Data/KVIH_Raw.csv")

KVIH <- KVIH %>%
  mutate('datetime' = make_datetime(year=Year, month = Month, day = Day, hour = Hour..UTC.))%>%
  pad()

#%>%
#  mutate('date' = make_date(year=Year, month = Month, day = Day))

KVIH <- KVIH%>%
  mutate('Temp.C' = ((Temperature..F.-32)*(5/9)))%>%
  mutate('Dewpoint.Temp.C' = (Dewpoint.Temperature..F.-32)*(5/9))%>%
  mutate('Heat.Index.C'=((Heat.Index..F.-32)*(5/9)))%>%
  mutate('WBGT.C'=((Derived.Wet.Bulb.Globe.Temperature..F.-32)*(5/9)))%>%
  rename('Temp.F' = Temperature..F.)%>%
  rename('Dewpoint.Temp.F'=Dewpoint.Temperature..F.)%>%
  rename('Heat.Index.F'=Heat.Index..F.)%>%
  rename("WBGT.F"=Derived.Wet.Bulb.Globe.Temperature..F.)%>%
  mutate("Hour"=hour(datetime))%>%
  mutate("Day"=day(datetime))%>%
  mutate("Month"=month(datetime))%>%
  mutate("Year"=year(datetime))%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(WBGT.C = zoo::na.approx(WBGT.C, maxgap=3))

##CREATE DECADE COLUMN
KVIH <- KVIH%>%
  mutate(Decade=floor(Year/10)*10)
```
## WRANGLE DATA & REPLACE WITH AVERAGE DATA VALUES
###DECADES
####1970s
```{r eval=FALSE}
##1970s - no missing values
KVIH_70s_Hour_Avg <- KVIH%>%
  filter(Decade=="1980")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KVIH_70s <- KVIH%>%
  filter(Decade=="1970")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:61344))

KVIH_70s <- full_join(KVIH_70s,KVIH_70s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KVIH_90s)

KVIH_70s <- mutate(KVIH_70s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KVIH_70s <- mutate(KVIH_70s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KVIH_70s <- KVIH_70s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####1980
```{r eval=FALSE}
##1980s - no missing values
KVIH_80s_Hour_Avg <- KVIH%>%
  filter(Decade=="1980")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KVIH_80s <- KVIH%>%
  filter(Decade=="1980")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KVIH_80s <- full_join(KVIH_80s,KVIH_80s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KVIH_90s)

KVIH_80s <- mutate(KVIH_80s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KVIH_80s <- mutate(KVIH_80s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KVIH_80s <- KVIH_80s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####1990
```{r eval=FALSE}
##1990s
KVIH_90s_Hour_Avg <- KVIH%>%
  filter(Decade=="1990")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KVIH_90s <- KVIH%>%
  filter(Decade=="1990")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

KVIH_90s <- full_join(KVIH_90s,KVIH_90s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KVIH_90s)

KVIH_90s <- mutate(KVIH_90s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KVIH_90s <- mutate(KVIH_90s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KVIH_90s <- KVIH_90s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####2000s 
```{r eval=FALSE}
##2000  ##Almost all the NAs are in 2002 and 2003, with 2001 having some and 2008 having a handful.
KVIH_00s_Hour_Avg <- KVIH%>%
  filter(Decade=="2000")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KVIH_00s <- KVIH%>%
  filter(Decade=="2000")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KVIH_00s <- full_join(KVIH_00s,KVIH_00s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KVIH_00s)

KVIH_00s <- mutate(KVIH_00s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KVIH_00s <- mutate(KVIH_00s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KVIH_00s <- KVIH_00s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####2010
```{r eval=FALSE}
##2010
KVIH_10s_Hour_Avg <- KVIH%>%
  filter(Decade=="2010")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KVIH_10s <- KVIH%>%
  filter(Decade=="2010")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

KVIH_10s <- full_join(KVIH_10s,KVIH_10s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KVIH_10s)

KVIH_10s <- mutate(KVIH_10s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KVIH_10s <- mutate(KVIH_10s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KVIH_10s <- KVIH_10s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)

```

#### 2020 DECADE, EXTRAPOLATED OUT FOR 2020-2029
```{r eval=FALSE}
KVIH_20s_Hour_Avg <- KVIH%>%
  filter(Decade=="2020")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KVIH_20s <- KVIH%>%
  filter(Decade=="2020")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:24096))

KVIH_20s <- full_join(KVIH_20s,KVIH_20s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KVIH_20s)

KVIH_20s <- mutate(KVIH_20s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KVIH_20s <- mutate(KVIH_20s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KVIH_20s <- KVIH_20s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
###Combine all chunks to new KVIH data set with the interpolated values. 
```{r eval=FALSE}
KVIH_Wrangled <- rbind(KVIH_80s,KVIH_90s,KVIH_00s,KVIH_10s,KVIH_20s) %>%
  #rename(WBGT.C = Ave_WBGT.C)%>%
  rename(date=Date)
#sum(is.na(KVIH_Wrangled$Ave_Temp.C))  ##28 still missing
sum(is.na(KVIH_Wrangled$Ave_WBGT.C))  ##22 still missing
```
##CREATE SINGLE DAY HIGHS AND LOWS
```{r eval=FALSE}
#Create single day high temp
KVIH_DAILY_HIGH.C <- KVIH_Wrangled%>%
  group_by(date)%>%
  dplyr::summarise(daily_high = max(Ave_Temp.C))%>%
  as.data.frame()
#Create single day low temp
KVIH_DAILY_LOW.C <- KVIH_Wrangled%>%
  group_by(date)%>%
  dplyr::summarise(daily_LOW = min(Ave_Temp.C))%>%
  as.data.frame()
#Create single day high WBGT temp
KVIH_DAILY_HIGH_WBGT.C <- KVIH_Wrangled%>%
  group_by(date)%>%
  dplyr::summarise(daily_high_WBGT = max(Ave_WBGT.C))%>%
  as.data.frame()
#Create single day low WBGT temp
```
### Look for missing dates in the complete data set
No missing dates!
```{r eval=FALSE}
FullSeq <- seq.Date(from = min(KVIH_Wrangled$date), to = max(KVIH_Wrangled$date), by = 1)
Missing <- FullSeq[!FullSeq %in% KVIH_Wrangled$date]
Missing
```


###RED and BLACK Count
#### Start Red and Black Flag Day Analysis
```{r eval=FALSE}
KVIH_REDorABOVEbyYear <- KVIH_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Red = n)

KVIH_BLACKorABOVEbyYear <- KVIH_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Black = n)

##Plots hours Red or Above
KVIH_REDorABOVEbyYear.plot <- ggplot(KVIH_REDorABOVEbyYear, aes(x = Year, y=Hours_Above_Red))+
  geom_line()+
  geom_point()
```


##TIME SERIES
### Seasonal and trend plots
```{r eval=FALSE}
####TIME SERIES ANALYSIS ON THE DATASET
KVIH_Daily_High.C_Clean <- KVIH_DAILY_HIGH.C%>%
  mutate(Temp.C_Clean = zoo::na.approx(daily_high))%>%
  select(date,Temp.C_Clean)

KVIH_Daily_High.TS <- ts(KVIH_Daily_High.C_Clean$Temp.C_Clean, start = c(1985,01,01),frequency=365)
KVIH_Daily_High.decomp <- stl(KVIH_Daily_High.TS,s.window="periodic")
plot(KVIH_Daily_High.decomp)
KVIH_Daily_High_Trend <- Kendall::SeasonalMannKendall(KVIH_Daily_High.TS)
summary(KVIH_Daily_High_Trend)
##This returns a very small p=value, also a small tau, so we can state the trend is STRONGLY statistically significant even though the magnitude of the temperature increase is small (the tau value?).

##WBGT - INSUFFICIENT DATA IN EARLY YEARS
#summary(KVIH_DAILY_HIGH_WBGT.C)
#KVIH_Daily_High_WBGT.C_Clean <- KVIH_DAILY_HIGH_WBGT.C%>%
#  mutate(WBGT.C_Clean = zoo::na.approx(KVIH_DAILY_HIGH_WBGT.C$daily_high_WBGT))%>%
#  select(date,WBGT.C_Clean)
#summary(KVIH_Daily_High_WBGT.C_Clean)
##No more NAs

##Temp Trend using Mann-Kendall Seasonal Analysis
#KVIH_Daily_High_WBGT.TS <- ts(KVIH_Daily_High_WBGT.C_Clean$WBGT.C_Clean, start = c(1973,01,01),frequency=365)
#KVIH_Daily_High_WBGT.decomp <- stl(KVIH_Daily_High_WBGT.TS,s.window="periodic")
#plot(KVIH_Daily_High_WBGT.decomp)
#KVIH_Daily_High_WBGT_Trend <- Kendall::SeasonalMannKendall(KVIH_Daily_High_WBGT.TS)
#summary(KVIH_Daily_High_WBGT_Trend)
##This returns a very small p=value, also a small tau, so we can state the trend is STRONGLY statistically significant even though the magnitude of the temperature increase is small (the tau value).

##Daily LOW
KVIH_Daily_Low.C_Clean <- KVIH_DAILY_LOW.C%>%
  mutate(Temp.C_Clean = zoo::na.approx(daily_LOW))%>%
  select(date,Temp.C_Clean)

KVIH_Daily_Low.TS <- ts(KVIH_Daily_Low.C_Clean$Temp.C_Clean, start = c(1990,01,01),frequency=365)
KVIH_Daily_Low.decomp <- stl(KVIH_Daily_Low.TS,s.window="periodic")
plot(KVIH_Daily_Low.decomp)
KVIH_Daily_Low_Trend <- Kendall::SeasonalMannKendall(KVIH_Daily_Low.TS)
summary(KVIH_Daily_Low_Trend)
```
### Mann Kendall Seasonal Values
```{r eval=FALSE}
##MANUALLY RUN THE MANN KENDALL TEST - WBGT TEMPERATURE - MAX DAILY HIGH
#Set up month and year columns
#KVIH_Daily_High_WBGT.C_Clean$month <- month(ymd(KVIH_Daily_High_WBGT.C_Clean$date))
#KVIH_Daily_High_WBGT.C_Clean$year <- year(ymd(KVIH_Daily_High_WBGT.C_Clean$date))

#KVIH_Daily_High_WBGT.C_MannKendall <- kendallSeasonalTrendTest(WBGT.C_Clean ~ month+year, data=KVIH_Daily_High_WBGT.C_Clean)
#return just tau, slope, and intercept
#KVIH_Daily_High_WBGT.C_MannKendall$estimate

##MANN KENDALL TEST - HIGH TEMPERATURE - MAX DAILY HIGH
#Set up month and year columns
KVIH_Daily_High.C_Clean$month <- month(ymd(KVIH_Daily_High.C_Clean$date))
KVIH_Daily_High.C_Clean$year <- year(ymd(KVIH_Daily_High.C_Clean$date))

KVIH_Daily_High.C_MannKendall <- kendallSeasonalTrendTest(Temp.C_Clean ~ month+year, data=KVIH_Daily_High.C_Clean)
#return just tau, slope, and intercept
KVIH_Daily_High.C_MannKendall$estimate


##MANN KENDALL TEST - LOW TEMPERATURE - MIN DAILY LOW
KVIH_Daily_Low.C_Clean$month <- month(ymd(KVIH_Daily_Low.C_Clean$date))
KVIH_Daily_Low.C_Clean$year <- year(ymd(KVIH_Daily_Low.C_Clean$date))

KVIH_Daily_Low.C_MannKendall <- kendallSeasonalTrendTest(Temp.C_Clean ~ month+year, data=KVIH_Daily_Low.C_Clean)
#return just tau, slope, and intercept
KVIH_Daily_Low.C_MannKendall$estimate
```


# KTBN vs KVIH COMPARISON
```{r eval=FALSE}
##CONDUCT A CORRELATION TEST BETWEEN THE TWO SITES TO SEE IF THEY MOVE IN THE SAME DIRECTION (AT LEAST STATISTICALLY)
##IF THEY DO, WE CAN SAY THAT THE TWO SITES ARE SHOWING THE SAME GENERAL CLIMATE TRENDS ACROSS YEARS
##NULL HYPOTHESIS IS THEY DO NOT MOVE IN THE SAME DIRECTION, AND THEREFORE WE HAVE LOWER CONFIDENCE IN USING THEM
KVIH_WBGT_REDorABOVE_Hourly_byYear <- KVIH_Wrangled%>%
  filter(date>='1993-01-01')%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Red = n)
KTBN_WBGT_REDorABOVE_Hourly_byYear <- KTBN_Wrangled%>%
  filter(date>='1993-01-01')%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Red = n)
##CHECK FOR NORMAL DISTRIBUTION TO THE POINTS
##Shapiro test, if P>0.05, then it IS normally distributed. In this case, hourly summary of KVIH is not normally distributed.
shapiro.test(KVIH_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red)
shapiro.test(KTBN_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red)
##Not normally distributed, so should not sure Pearson's correlation tests

#ggplot(KVIH_WBGT_REDorABOVE_Hourly_byYear)+
#  geom_histogram(aes(y=KVIH_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red))
##Visualize normal distribution

#FLW_Combined%>%select(.,KTBN_RedorAbove_Hourly,KVIH_RedorAbove_Hourly)%>%map(~fBasics::dagoTest(.))
##Omnibus test <0.05 would indicate that the set is NOT normally distributed.

##USE KENDALL RANK CORRELATION TEST - can be used if not from normal distribution
cor.test(KVIH_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red,KTBN_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red,method="kendall")
##Shows positive correlation between the 2 sites (tau=0.279) and p-value < 0.05 (0.004) ; implies correlation

##USE SPEARMAN RANK CORRELATION COEFFICIENT - can be used if data is not norma;
cor.test(KVIH_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red,KTBN_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red,method="spearman")
##Shows rho = 0.416 and p-value <0.05 ; implies correlation
##All tests return positive correlation and low p-values, including tests robust to non-normal 
```
# KTBN ANALYSIS AND PLOT CREATION
```{r}
####FULL_JOIN ALL DECADES
Avg_Hourly_Decade <- full_join(KTBN_60s,KTBN_70s,by="count")%>%
  rename(Hour_1960=Hour.x,Day.1960=Day.x,Month.1960=Month.x,Year.1960=Year.x,Decade.1960=Decade.x,Ave_WBGT.C.1960=Ave_WBGT.C.x,Date.1960=Date.x)%>%
  rename(Hour_1970=Hour.y,Day.1970=Day.y,Month.1970=Month.y,Year.1970=Year.y,Decade.1970=Decade.y,Ave_WBGT.C.1970=Ave_WBGT.C.y,Date.1970=Date.y)
Avg_Hourly_Decade <-   full_join(Avg_Hourly_Decade,KTBN_80s,by="count")
Avg_Hourly_Decade <- full_join(Avg_Hourly_Decade,KTBN_90s,by="count")%>%
  rename(Hour_1980=Hour.x,Day.1980=Day.x,Month.1980=Month.x,Year.1980=Year.x,Decade.1980=Decade.x,Ave_WBGT.C.1980=Ave_WBGT.C.x,Date.1980=Date.x)%>%
  rename(Hour_1990=Hour.y,Day.1990=Day.y,Month.1990=Month.y,Year.1990=Year.y,Decade.1990=Decade.y,Ave_WBGT.C.1990=Ave_WBGT.C.y,Date.1990=Date.y)
Avg_Hourly_Decade <- full_join(Avg_Hourly_Decade,KTBN_00s,by="count")
Avg_Hourly_Decade <- full_join(Avg_Hourly_Decade,KTBN_10s,by="count")%>%
  rename(Hour_2000=Hour.x,Day.2000=Day.x,Month.2000=Month.x,Year.2000=Year.x,Decade.2000=Decade.x,Ave_WBGT.C.2000=Ave_WBGT.C.x,Date.2000=Date.x)%>%
  rename(Hour_2010=Hour.y,Day.2010=Day.y,Month.2010=Month.y,Year.2010=Year.y,Decade.2010=Decade.y,Ave_WBGT.C.2010=Ave_WBGT.C.y,Date.2010=Date.y)
Avg_Hourly_Decade <- full_join(Avg_Hourly_Decade,KTBN_20s,by="count")%>%
  rename(Hour_2020=Hour,Day.2020=Day,Month.2020=Month,Year.2020=Year,Decade.2020=Decade,Ave_WBGT.C.2020=Ave_WBGT.C,Date.2020=Date)

#Avg_Hourly_Decade$Month.1960 <- paste0("1960s",Avg_Hourly_Decade$Month.1960)
Avg_Hourly_Decade$Month.1970 <- paste0("1970s",Avg_Hourly_Decade$Month.1970)
Avg_Hourly_Decade$Month.1980 <- paste0("1980s",Avg_Hourly_Decade$Month.1980)
Avg_Hourly_Decade$Month.1990 <- paste0("1990s",Avg_Hourly_Decade$Month.1990)
Avg_Hourly_Decade$Month.2000 <- paste0("2000s",Avg_Hourly_Decade$Month.2000)
Avg_Hourly_Decade$Month.2010 <- paste0("2010s",Avg_Hourly_Decade$Month.2010)
Avg_Hourly_Decade$Month.2020 <- paste0("2020s",Avg_Hourly_Decade$Month.2020)

KTBN_Avg_Summer_Hourly_Decade <- Avg_Hourly_Decade%>%
  filter(between(Month.1960,5,9))     ##Filters for just the summer months
#view(Avg_Summer_Hourly_Decade)
KTBN_Thirteen_to_Nineteen <- Avg_Hourly_Decade%>%
  filter(Year.2010>2012)%>%
  select(Month.1960,Hour_2010:Ave_Temp.C.y.y.y)%>%
  rename(Hour=Hour_2010,Day=Day.2010,Month=Month.2010,Year=Year.2010,Decade=Decade.2010,Ave_WBGT.C=Ave_WBGT.C.2010,Date=Date.2010,Ave_Temp.C=Ave_Temp.C.y.y.y)
KTBN_Zero_to_Two <- Avg_Hourly_Decade%>%
  filter(Year.2020>0)%>%
  select(Month.1960,Hour_2020:Ave_Temp.C)%>%
  rename(Hour=Hour_2020,Day=Day.2020,Month=Month.2020,Year=Year.2020,Decade=Decade.2020,Ave_WBGT.C=Ave_WBGT.C.2020,Date=Date.2020,Ave_Temp.C=Ave_Temp.C)
KTBN_Thirteen_to_Two <- rbind(KTBN_Thirteen_to_Nineteen,KTBN_Zero_to_Two)
KTBN_Thirteen_to_Two <- KTBN_Thirteen_to_Two%>%
  select(Hour,Month.1960,Hour,Ave_WBGT.C:Ave_Temp.C)%>%
  rename(Ave_WBGT_last10 = Ave_WBGT.C ,Ave_Temp_last10=Ave_Temp.C,Month_last10= Month.1960 )%>%
  filter(between(Month_last10,5,9))
#%>%add_row(Hour = 21:23, Month_last10=9,Ave_WBGT_last10=NA,Date=NA,Ave_Temp_last10=NA)

KTBN_Avg_Summer_Hourly_Decade <- cbind(KTBN_Avg_Summer_Hourly_Decade,KTBN_Thirteen_to_Two)
```

####DENSITY PLOT AND HISTORGRAMS BY HOURLY AVERAGE BY DECADE
###DENSITY PLOT
````{r}
KTBN_Summer_DensityPlot_decades <- ggplot(KTBN_Avg_Summer_Hourly_Decade)+
  geom_density(aes(x=Ave_WBGT.C.1960,color='white'),linetype=2,linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.1970,color='purple'),linetype=2,linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.1980,color='red'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.1990,color='blue'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.2000, color='yellow'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.2010,color='green'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.2020,color='black'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT_last10 ,color='pink'),alpha=0.7,linewidth=1)+
  scale_color_discrete(name="Hourly WBGT \n Grouped by decade",
                         breaks=c("white","purple","red", "blue", "yellow","green","black","pink"),
                         labels=c("1960s","1970s","1980s", "1990s", "2000s","2010s","2020s","Last 10 Years"))+
  xlab("WBGT")+
  scale_x_continuous(limits = c(20,38),expand=c(0,0))+
  ggtitle("May - September Hourly WBGT Density",subtitle="Ft Leonard Wood, MO")+
#  xlim(20,38)+
  theme_light()
KTBN_Summer_DensityPlot_decades
## 2000s is included here without modification to missing data points because this is a density plot, so sample size matters less, assuming the proporation of WBGT readings are accurate.

```
####ANOMALY PLOT SUMMER MONTHS - DAYTIME
```{r}
##SET UP DATA
KTBN_Anom_Avg <- KTBN_Wrangled%>%
  filter(between(Year,1960,2022), between(Month,5,9))%>% 
  group_by(Hour, Day, Month)%>%
  dplyr::summarise(Ave_WBGT.C = mean(Ave_WBGT.C,na.rm=TRUE)) ##create a column with the average WBGT values for that day, ignoring missing data in calculation

KTBN_Anomaly <- KTBN_Wrangled%>%
  filter(between(Year,1960,2022),between(Hour,8,20), between(Month,5,9))%>%
  group_by(Hour, Day, Month, Year, Decade)%>%
  dplyr::summarise(Ave_WBGT.C = mean(Ave_WBGT.C))%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:119340))

KTBN_Anomaly <- full_join(KTBN_Anomaly,KTBN_Anom_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KTBN_00s)

KTBN_Anomaly <- mutate(KTBN_Anomaly, Ave_WBGT.C.x = case_when(
  is.na(Ave_WBGT.C.x) ~Ave_WBGT.C.y,
  TRUE~Ave_WBGT.C.x
))   ##Replaces NA values in 'x' column with the averaged from 'y' column

##START ACTUAL PLOT
KTBN_Anomaly <- KTBN_Anomaly%>%
  select(Hour,Day,Month,Year,Ave_WBGT.C.x,Date,count)%>%
  rename(Ave_WBGT.C=Ave_WBGT.C.x)
KTBN_Ref <- KTBN_Anomaly%>%
  filter(between(Year,1990,2020))%>%
  summarise(mean=mean(Ave_WBGT.C))
  
KTBN_Anomaly <- KTBN_Anomaly%>%
  group_by(Year)%>%
  dplyr::summarise(Avg_Year_WBGT = mean(Ave_WBGT.C))%>%
  mutate(Anomaly = Avg_Year_WBGT-KTBN_Ref$mean)
  #mutate(Date = make_date(year=Year,month=Month))

#KTBN_Anomaly <- KTBN_Anomaly %>% arrange(ymd(KTBN_Anomaly$Date)) ##puts DF in date order

#KTBN_Anomaly$count <- c(1:nrow(KTBN_Anomaly)) ##assigns date ordered number to DF

KTBN_Monthly_Anomaly.plot <- ggplot(KTBN_Anomaly,aes(x=Year,y=Anomaly))+
  geom_line()+
  geom_smooth()+
  xlab("Year")+
  ylab("Monthly Average WBGT Anomaly\nDegrees C")+
  ggtitle("Yearly average WBGT anomaly, 1963-2022",subtitle="Ft Leonard Wood, MO")+
  labs(caption = "Reference period = Average WBGT, 1990-2020")+
  theme_classic()+
  geom_hline(yintercept=0,linetype="dashed")
  #annotate("text",x=2010,y=-1.5,label=("Reference period = Average WBGT, 1990-2020"),size=2)+
KTBN_Monthly_Anomaly.plot
```

### Compare anomaly from KTBN to anomaly from NOAA
https://www.ncei.noaa.gov/access/monitoring/climate-at-a-glance/divisional/time-series/0904/tavg/ann/5/1960-2022?base_prd=true&begbaseyear=1990&endbaseyear=2020
```{r}
##SET UP DATA
##manually load in the regional anomaly trend
Avg_Temp_Anom <-  read.csv ('/Users/erikpatton/Library/Mobile Documents/com~apple~CloudDocs/R Files/JAN 2023 Data Projects/NOAA Climate Anomaly Data.csv')
EOMO_Avg_Temp_Anom <- Avg_Temp_Anom$Anomaly.EO.MO
EOMO_Avg_Temp_Anom <- EOMO_Avg_Temp_Anom[4:61]

KTBN_Anom_Avg.C <- KTBN_Wrangled%>%
  filter(between(Year,1960,2021))%>% 
  group_by(Hour, Day, Month)%>%
  dplyr::summarise(Ave_Temp.C = mean(Ave_Temp.C,na.rm=TRUE)) ##create a column with the average WBGT values for that day, ignoring missing data in calculation

KTBN_Anomaly.C <- KTBN_Wrangled%>%
  filter(between(Year,1960,2021))%>%
  group_by(Hour, Day, Month, Year, Decade)%>%
  dplyr::summarise(Ave_Temp.C = mean(Ave_Temp.C))%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:515321))

KTBN_Anomaly.C <- full_join(KTBN_Anomaly.C,KTBN_Anom_Avg.C,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KTBN_00s)

KTBN_Anomaly.C <- mutate(KTBN_Anomaly.C, Ave_Temp.C.x = case_when(
  is.na(Ave_Temp.C.x) ~Ave_Temp.C.y,
  TRUE~Ave_Temp.C.x
))   ##Replaces NA values in 'x' column with the averaged from 'y' column

##START ACTUAL PLOT
KTBN_Anomaly.C <- KTBN_Anomaly.C%>%
  select(Hour,Day,Month,Year,Ave_Temp.C.x,Date,count)%>%
  rename(Ave_Temp.C=Ave_Temp.C.x)
KTBN_Ref.C <- KTBN_Anomaly.C%>%
  filter(between(Year,1990,2020))%>%
  summarise(mean=mean(Ave_Temp.C))
  
KTBN_Anomaly.C <- KTBN_Anomaly.C%>%
  group_by(Year)%>%
  dplyr::summarise(Avg_Year_Temp = mean(Ave_Temp.C))%>%
  mutate(Anomaly = Avg_Year_Temp-KTBN_Ref.C$mean)
  #mutate(Date = make_date(year=Year,month=Month))

#KTBN_Anomaly <- KTBN_Anomaly %>% arrange(ymd(KTBN_Anomaly$Date)) ##puts DF in date order

#KTBN_Anomaly$count <- c(1:nrow(KTBN_Anomaly)) ##assigns date ordered number to DF

KTBN_Anomaly.C$Regional_Anom <- EOMO_Avg_Temp_Anom[1:59]
KTBN_Anomaly.C$Regional_Anom <- as.numeric(KTBN_Anomaly.C$Regional_Anom)

KTBN_Monthly_Anomaly.C.plot <- ggplot(KTBN_Anomaly.C)+
  geom_line(aes(x=Year,y=Anomaly,color="Blue"))+
  geom_line(aes(x=Year,y=Regional_Anom,color="red"))+
  xlab("Year")+
  scale_x_continuous(breaks = c(1960,1970,1980,1990,2000,2010,2020),expand=c(0,0))+
  ylab("Average Anomaly\nDegrees C")+
  ggtitle("Yearly average temperature anomaly\nFt Leaonard Wood, MO vs East-Ozarks Missouri Average")+
  theme_classic()+
 # theme(axis.text.y=element_text(margin=margin(r=0)))+
  geom_hline(yintercept=0,linetype="dashed")+
   scale_color_identity(name="Location",breaks=c("Blue","red"),
                        labels=c("FLW","Regional Average\n(NOAA)"),guide="legend")+
  labs(caption = "Reference period = Yearly average temp, 1990-2020")
  #annotate("text",x=2009,y=-2,label=("Reference period = Average Temperature, 1990-2020"),size=2)+

KTBN_Monthly_Anomaly.C.plot


FLWxRegional.cor <-  cor.test(KTBN_Anomaly.C$Regional_Anom ,KTBN_Anomaly.C$Anomaly,method = "pearson")
FLWxRegional.cor$estimate
FLWxRegional.cor$p.value
FLWxRegional.cor$conf.int
```

####HISTOGRAMS FULL year data used
```{r}
Summer_Histogram_1970s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.1980),color='red', fill="red",alpha=0.4,position="identity",bins=50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("1970-1979")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()  
Summer_Histogram_1980s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.1980),color='red', fill="red",alpha=0.4,position="identity",bins=50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("1980-1989")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()  
Summer_Histogram_1990s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.1990),color='red', fill="red",alpha=0.4,position="identity",bins = 50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("1990-1999")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()  
Summer_Histogram_2000s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.2000),color='red', fill="red",alpha=0.4,position="identity",bins=50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("2000-2009")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()  
Summer_Histogram_2010s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.2010),color='red', fill="red",alpha=0.4,position="identity",bins=50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("2010-2019")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  #xlim(27,36)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()
```
####BAR CHARTS (Full Year data used)
```{r}
##CREATE VECTORS OF HOURS ABOVE A CATEGORY
Green <- c(sum((KTBN_70s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,na.rm=TRUE),sum(KTBN_80s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,sum(KTBN_90s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,sum(KTBN_00s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,sum(KTBN_10s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,sum(KTBN_20s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/2.75)
Yellow <- c(sum((KTBN_70s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/10,na.rm=TRUE),sum(KTBN_80s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/10,sum(KTBN_90s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/10,sum(KTBN_00s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/10,sum(KTBN_20s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/2.75)
Red <- c(sum((KTBN_70s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/10,na.rm=TRUE),sum(KTBN_80s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/10,sum(KTBN_90s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/10,sum(KTBN_00s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/10,sum(KTBN_10s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/10,sum(KTBN_20s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/2.75)
Black <- c(sum((KTBN_70s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,na.rm=TRUE),sum(KTBN_80s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,sum(KTBN_90s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,sum(KTBN_00s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,sum(KTBN_10s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,sum(KTBN_20s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/2.75)
#/10 in order to get single year avg

KTBN_Hours_Flags <- as.data.frame(cbind(c("1970s","1980s","1990s","2000s","2010s","2020s"),Green,Yellow,Red,Black),stringsAsFactors = FALSE)

KTBN_Hours_Flags <-  KTBN_Hours_Flags%>%
  pivot_longer(!V1,names_to = "Heat_Category",values_to = "Hours")%>%
  as.data.frame()
KTBN_Hours_Flags$Hours <- as.integer(KTBN_Hours_Flags$Hours)
#view(KTBN_Hours_Flags)

##CREATE GRAPH
KTBN_Threshold_Barplot <- ggplot(data=KTBN_Hours_Flags, aes(x=Heat_Category,y=Hours,fill=V1)) +
  geom_bar(stat="identity",position=position_dodge())+
  geom_text(aes(label=Hours), position=position_dodge(width=0.9),hjust=-0.25,size=3)+
  scale_x_discrete(limits=c("Green","Yellow","Red","Black"))+
  ylab("Average Hours per Year")+
  xlab("Heat Category")+
  theme_classic()+
  scale_fill_brewer(name="Decade",palette = "Spectral",direction=-1,guide = guide_legend(reverse = TRUE))+
  coord_flip()+
  #annotate("text",x=4,y=150,label=c("Hours above Black threshold, average per Year \n2020s : 96 (projected)\n2010s : 124 \n2000s : 74\n1990s : 50 \n1980s : 121 "),hjust=0,size=2)+
ggtitle("Average Hours at Each Heat Catergory per Year",subtitle="Ft Leonard Wood, MO")
KTBN_Threshold_Barplot

```
#### COMBINED PLOTS
```{r}
#bottom_row <- plot_grid(Summer_Histogram_2000s,Summer_Histogram_2010s,nrow=1)
#Middle_row <- plot_grid(Summer_Histogram_1980s,Summer_Histogram_1990s,nrow=1)
#top_row <- plot_grid(Summer_DensityPlot_decades, KTBN_Monthly_Anomaly.plot,nrow=2)
Summer_Months_KTBN.plot <- plot_grid(KTBN_Summer_DensityPlot_decades, KTBN_Monthly_Anomaly.plot, KTBN_Threshold_Barplot,nrow=3)
Summer_Months_KTBN.plot <- ggdraw(add_sub(Summer_Months_KTBN.plot,"Data gaps =< 3 hours interpolated. Longer gaps replaced with decade average for missing Hour, Day \n 2020s density plot for 2020-2022 only; Bar chart extrapolates full 2020s decade",size=8))
Summer_Months_KTBN.plot
```

#BCT Analysis
##Load Data
FLW BCT Wrangle
```{r}
#write.csv(FLW_BCT_23,file="~/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/ATRRS Data/FT Leonard Wood ATRRS Pull/FLW_BCT_23.csv")
FLW_BCT_23 <- read.csv("~/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/ATRRS Data/FT Leonard Wood ATRRS Pull/FLW_BCT_23.csv",stringsAsFactors = FALSE)

FLW_BCT_23 <- FLW_BCT_23%>%
  mutate_all(.funs = funs(str_trim))%>%
  select(Class,X.Report.Date,X.Start.Date,X.End.Date,X.Capacity)

FLW_BCT_23 <-  na.omit(FLW_BCT_23)

FLW_BCT_23$Class <-  as.numeric(FLW_BCT_23$Class)
FLW_BCT_23$X.Capacity <- as.numeric(FLW_BCT_23$X.Capacity)
FLW_BCT_23$X.Report.Date <-  dmy(FLW_BCT_23$X.Report.Date)
FLW_BCT_23$X.Start.Date <-  dmy(FLW_BCT_23$X.Start.Date)
FLW_BCT_23$X.End.Date <- dmy(FLW_BCT_23$X.End.Date)

FLW_BCT_23 <- FLW_BCT_23%>%
  rename(Report_Date = X.Report.Date, Start_Date=X.Start.Date,End_Date=X.End.Date)%>%
  mutate(Julian_Start=yday(Start_Date),Julian_End=yday(End_Date))

FLW_BCT_23 <- FLW_BCT_23[apply(FLW_BCT_23!=0,1,all),] #remove any row with a numerical '0' to get rid of classes with 0 capacity

#write.csv(FLW_BCT_23,file="~/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/ATRRS Data/FT Leonard Wood ATRRS Pull/FLW_BCT_23_Wrangled.csv")
```
##KTBN BCT
##Assign clean history to KTBN data 
```{r}
FLW_BCT_23$Start_Month <- month(ymd(FLW_BCT_23$Start_Date))
FLW_BCT_23$End_Month <- month(ymd(FLW_BCT_23$End_Date))
FLW_BCT_23$Start_Day <- day(ymd(FLW_BCT_23$Start_Date))
FLW_BCT_23$End_Day <- day(ymd(FLW_BCT_23$End_Date))

FLW_BCT_23$Days_Long <- FLW_BCT_23$End_Date-FLW_BCT_23$Start_Date #counts number of days in each class

```

```{r}
##code below to get average hourly temp and WBGT for last fourty years for full data (not 2022 or 2023 due to lack of data)
###Averaging out the values left alone, per conversation with Luke
KTBN_Hourly_Avg.last30 <- KTBN_Wrangled%>%
  filter(date>="1992-10-01"&date<="2022-09-30")%>%
  mutate(Julian=yday(date))%>%
  group_by(Hour,Julian)

KTBN_Hourly_Avg.last10 <- KTBN_Wrangled%>%
  filter(date>="2012-10-01"&date<="2022-09-30")%>%
  filter(between(Month,5,9))%>%
  mutate(Julian=yday(date))%>%
  group_by(Hour,Julian)
#  dplyr::summarize(Avg_Hourly_Low_WBGT.C=min(Ave_WBGT.C),Avg_Hourly_Avg_WBGT.C=mean(Ave_WBGT.C),Avg_Hourly_High_WBGT.C=max(Ave_WBGT.C),Avg_Hourly_Low_Temp.C=min(Ave_Temp.C),Avg_Hourly_Avg_Temp.C=mean(Ave_Temp.C),Avg_Hourly_High_Temp.C=max(Ave_Temp.C))

#Days just in Red category Last 30
KTBN_Cat4 <- KTBN_Hourly_Avg.last30%>%
  subset(between(Ave_WBGT.C,Heat_Categories$Category_Temp_Min.C[3],Heat_Categories$Category_Temp_Min.C[4]))%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Cat4 = n)%>%
  mutate(Day_Cat4 =ifelse(Day_Cat4 == '',1,1))%>%
  mutate(Julian=yday(date))
#Days just in Black category Last 30
KTBN_Cat5 <- KTBN_Hourly_Avg.last30%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4] )%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Cat5 = n)%>%
  mutate(Day_Cat5 =ifelse(Day_Cat5 == '',1,1))%>%
  mutate(Julian=yday(date))
```
##Filter 
```{r}
##Get ATRRS Data and re-wrangled
#FLW_BCT_23_Wrangled <-  read.csv("/Users/erikpatton/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/ATRRS Data/FT Leonard Wood ATRRS Pull/FLW_BCT_23_Wrangled.csv")
FLW_BCT_23_Wrangled <- FLW_BCT_23
FLW_BCT_23_Wrangled$Start_Date <-  ymd(FLW_BCT_23_Wrangled$Start_Date)
FLW_BCT_23_Wrangled$End_Date <- ymd(FLW_BCT_23_Wrangled$End_Date)

FLW_BCT_23_Wrangled <- FLW_BCT_23_Wrangled%>%
  filter(between(Start_Date,"2023-03-01","2023-12-31"))
```
##Filter with ATRRS Data for course dates
```{r}
FLW_BCT_23_Wrangled$Count <- c(1:nrow(FLW_BCT_23_Wrangled)) #add a counter column for following loop

##Loop to count average hours in each flag condition by cohort.
for (i in FLW_BCT_23_Wrangled$Count){
  Flags <- KTBN_Hourly_Avg.last30%>%
  filter(between(Julian,FLW_BCT_23_Wrangled$Julian_Start[i],FLW_BCT_23_Wrangled$Julian_End[i]))
  ##divide the below by 30 because there are 30 years in this filtered data set
  FLW_BCT_23_Wrangled$NoFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C<Heat_Categories$Category_Temp_Min.C[1])) /(30))
FLW_BCT_23_Wrangled$GreenFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1]&Flags$Ave_WBGT.C<Heat_Categories$Category_Temp_Min.C[2])) /(30))
FLW_BCT_23_Wrangled$YellowFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2]&Flags$Ave_WBGT.C<Heat_Categories$Category_Temp_Min.C[3])) /(30))
FLW_BCT_23_Wrangled$RedFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3]&Flags$Ave_WBGT.C<Heat_Categories$Category_Temp_Min.C[4])) /(30))
FLW_BCT_23_Wrangled$BlackFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])) /(30))
FLW_BCT_23_Wrangled$Dangerous_Heat_Hours_Avg[i] <- as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])) /(30))
}

##Loop to count up total hours from NoFlag:BlackFlag as a check that all hours are accounted for
for (i in FLW_BCT_23_Wrangled$Count){
  FLW_BCT_23_Wrangled$TotalCourseHours[i] <- FLW_BCT_23_Wrangled$NoFlags_Avg[i]+FLW_BCT_23_Wrangled$GreenFlags_Avg[i]+FLW_BCT_23_Wrangled$YellowFlags_Avg[i]+FLW_BCT_23_Wrangled$RedFlags_Avg[i]+FLW_BCT_23_Wrangled$BlackFlags_Avg[i]
}
#view(FLW_BCT_23_Wrangled)
```

##Create a list of Green and Yellow days that follow Red or Black Days
```{r}
##Create a df that has all days at/above red OR days at/above green following a red+ day
KTBN_Flag_Days.last40 <- full_join(KTBN_AboveRed.last40,KTBN_AboveGreen.last40,by="date")
KTBN_Flag_Days.last40 <- KTBN_Flag_Days.last40%>%
  select(date,Day_Above_Red,Day_Green_or_Yellow,Julian.y,year.y)%>%
    mutate(Julian.y=yday(date))%>%
    mutate(year.y=lubridate::year(date))
KTBN_Flag_Days.last40 <- replace(KTBN_Flag_Days.last40,is.na(KTBN_Flag_Days.last40),0)

KTBN_Flag_Days.last40 <-  KTBN_Flag_Days.last40%>%arrange(ymd(KTBN_Flag_Days.last40$date)) ##this line gets the df in date order for the following loop
#KTBN_Flag_Days.last40$count <- c(1:nrow(KTBN_Flag_Days.last40))

KTBN_Flag_Days.last40$Lagged <- lag(KTBN_Flag_Days.last40$Day_Above_Red>0,1) ##create a column identifying days after RED or BLACK

##Create the average occurrence of "caution" day
KTBN_Caution_Days.last40 <- KTBN_Flag_Days.last40%>%
  filter(Lagged==TRUE | Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Caution Days" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Caution Days",Julian,date)
KTBN_Caution_Days.last40$Avg_Count_Caution_Days <- KTBN_Caution_Days.last40$`Caution Days`/40

##Create the average occurance of heat cat 4-5 "high heat" days
KTBN_High_Heat_Days.last40 <- KTBN_Flag_Days.last40%>%
filter(Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename(High_Heat_Days = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select(High_Heat_Days,Julian,date)
  
KTBN_High_Heat_Days.last40$High_Heat_Days <- KTBN_High_Heat_Days.last40$High_Heat_Days/40

#"Caution Days" include both high heat days and caitiopn days
KTBN_Caution_Days.last40 <- full_join(KTBN_Caution_Days.last40,KTBN_High_Heat_Days.last40,by="Julian")%>%
  select(High_Heat_Days,Avg_Count_Caution_Days,Julian,date.x)%>%
  rename(date=date.x)

KTBN_Caution_Days.last40 <- replace(KTBN_Caution_Days.last40,is.na(KTBN_Caution_Days.last40),0)

##Create last 30 years of flag days
KTBN_Flag_Days.last30 <- KTBN_Flag_Days.last40%>%
  filter(between(date,"1992-10-01","2022-09-30"))

KTBN_Caution_Days.last30 <- KTBN_Flag_Days.last30%>%
  filter(Lagged==TRUE | Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Caution Days" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Caution Days",Julian,date)
KTBN_Caution_Days.last30$Avg_Count_Caution_Days <- KTBN_Caution_Days.last30$`Caution Days`/30

##Create the average occurance of heat cat 4-5 "high heat" days
KTBN_High_Heat_Days.last30<- KTBN_Flag_Days.last30%>%
filter(Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename(High_Heat_Days = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select(High_Heat_Days,Julian,date)
  
KTBN_High_Heat_Days.last30$High_Heat_Days <- KTBN_High_Heat_Days.last30$High_Heat_Days/30

KTBN_Caution_Days.last30 <- full_join(KTBN_Caution_Days.last30,KTBN_High_Heat_Days.last30,by="Julian")%>%
  select(High_Heat_Days,Avg_Count_Caution_Days,Julian,date.x)%>%
  rename(date=date.x)

KTBN_Caution_Days.last30 <- replace(KTBN_Caution_Days.last30,is.na(KTBN_Caution_Days.last30),0)

##Create last 10 years of flag days
KTBN_Flag_Days.last10 <- KTBN_Flag_Days.last40%>%
  filter(between(date,"2012-10-01","2022-09-30"))

KTBN_Caution_Days.last10 <- KTBN_Flag_Days.last10%>%
  filter(Lagged==TRUE | Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Caution Days" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Caution Days",Julian,date)
KTBN_Caution_Days.last10$Avg_Count_Caution_Days <- KTBN_Caution_Days.last10$`Caution Days`/10

##Create the average occurance of heat cat 4-5 "high heat" days
KTBN_High_Heat_Days.last10<- KTBN_Flag_Days.last10%>%
filter(Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename(High_Heat_Days = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select(High_Heat_Days,Julian,date)
  
KTBN_High_Heat_Days.last10$High_Heat_Days <- KTBN_High_Heat_Days.last10$High_Heat_Days/10

KTBN_Caution_Days.last10 <- full_join(KTBN_Caution_Days.last10,KTBN_High_Heat_Days.last10,by="Julian")%>%
  select(High_Heat_Days,Avg_Count_Caution_Days,Julian,date.x)%>%
  rename(date=date.x)

KTBN_Caution_Days.last10 <- replace(KTBN_Caution_Days.last10,is.na(KTBN_Caution_Days.last10),0)

```
##Create a data frame of Red and Black Flag Days - last 30
```{r}
##Create a df that has all days at RED and all days at BLACK
KTBN_RedorBlack_Flag_Days.last30 <- full_join(KTBN_Cat4,KTBN_Cat5,by="date")
KTBN_RedorBlack_Flag_Days.last30 <- KTBN_RedorBlack_Flag_Days.last30%>%
  select(date,Day_Cat4,Day_Cat5)%>%
    mutate(Julian=yday(date))%>%
    mutate(year.y=lubridate::year(date))
KTBN_RedorBlack_Flag_Days.last30 <- replace(KTBN_RedorBlack_Flag_Days.last30,is.na(KTBN_RedorBlack_Flag_Days.last30),0)
```
##Plot the training cycle vs red and black flag days (Dangerous Heat per Class)
```{r}
FLW_heat_hours.plot <-  ggplot (FLW_BCT_23_Wrangled,aes(x=Start_Date,y=Dangerous_Heat_Hours_Avg))+
  geom_segment(aes(x=Start_Date,xend=Start_Date,y=0,yend=Dangerous_Heat_Hours_Avg))+
  geom_point(aes(size=Dangerous_Heat_Hours_Avg,fill=Dangerous_Heat_Hours_Avg),color="red",fill=alpha("red",0.3),alpha=0.7,shape=21,stroke=2)+scale_size(range=c(0.01,7),name="Hours of Hazardous\nHeat per Class")+
    theme_light() +
  geom_text(aes(label=round(Dangerous_Heat_Hours_Avg,0)), position=position_dodge(width=0.9),vjust=-2,hjust=0,size=3)+
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x = element_text(angle=30)
  )+
  scale_x_date(date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week",limits=as.Date(c("2023-03-01","2023-10-01")))+
  geom_text(aes(label=Class,y=-3), position=position_dodge(width=0.9),hjust=0,size=4,fontface='bold')+
  xlab("FLW BCT Class Number (FY23)\nLine corresponds to class start date")+ylab("'Red' or 'Black' Hours per Class")+
  ggtitle("Average (mean) hours of hazardous heat by basic training class",subtitle="Ft Leonard Wood, MO")+
  labs(caption = "Hours averaged from 30 year period (October 1992 - September 2022).\nClass dates from FY 2023 basic combat training schedule")
FLW_heat_hours.plot
```
###PLOT COURSE NUMBER AND THE LIKLIHOOD THAT THE DAY IS DANGEROUS ON SAME GRAPH
```{r}
ggplot()+
   #geom_line(data=KTBN_Caution_Days.last40,aes(x=date,y=Avg_Count_Caution_Days*250))+
  geom_smooth(data=KTBN_Caution_Days.last30,aes(x=date,y=Avg_Count_Caution_Days*200))+
  scale_y_continuous(limits=c(0,200), name="Average Hazardous Heat Hours per Class",expand=c(0,0),sec.axis = sec_axis( trans=~./200, name="Probability of Cautionary Heat Day"))+
  ##second axis is divided by 200, since to plot the geom_smooth line we multiplied by 250
  geom_segment(data=FLW_BCT_23_Wrangled,aes(x=Start_Date,xend=End_Date,y=Dangerous_Heat_Hours_Avg,yend=Dangerous_Heat_Hours_Avg))+
  geom_text(data=FLW_BCT_23_Wrangled,aes(x=(Start_Date),y=Dangerous_Heat_Hours_Avg,label=round(Class,0)), position=position_dodge(width=0.9),vjust=-0.1,hjust=1.5,size=5,fontface='bold')+
  theme_light()+
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x = element_text(angle=30),
    axis.title.x = element_blank()
  )+
  scale_x_date(date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week")+
  xlab("FLW BCT Class Number (FY23 example)")+
  ggtitle("Ft Leonard Wood, MO")
```

###Rank days by high heat and by heat wave probability
```{r}
KTBN_Caution_Days.last40.sorted <- KTBN_Caution_Days.last40%>%
  mutate(High_Heat_Ranked=rank(-High_Heat_Days))%>%
  mutate(Avg_Count_Caution_Days_Ranked=rank(-Avg_Count_Caution_Days))

KTBN_Caution_Days.last40.sorted <- KTBN_Caution_Days.last40.sorted%>%
  arrange(KTBN_Caution_Days.last40.sorted$High_Heat_Ranked,KTBN_Caution_Days.last40.sorted$Avg_Count_Caution_Days_Ranked)%>%
  mutate(newcol=High_Heat_Ranked[]+Avg_Count_Caution_Days_Ranked)%>%
  mutate(ranking=c(1:nrow(KTBN_Caution_Days.last40.sorted)))%>%
  mutate(ranking_rescaled=rescale(-ranking))
##this ranking is based on the following - probability of a high heat day, probability of a cautionary heat day, and then earlier in the year. scale is used to transform the individual day to somethign that can be plotted on a % axis.

#view(KTBN_Caution_Days.last40.sorted)
```

##Prior 30 year hazardous and caution probability
```{r}
KTBN_Flag_Days.1982.2011 <- KTBN_Flag_Days.last40%>%
  filter(between(date,"1982-01-01","2011-12-31"))

##Create the average occurance of "caution" day

KTBN_Caution_Days.prev30 <- KTBN_Flag_Days.1982.2011%>%
  filter(Lagged==TRUE | Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Caution Days" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Caution Days",Julian,date)
KTBN_Caution_Days.prev30$Avg_Count_Caution_Days <- KTBN_Caution_Days.prev30$`Caution Days`/30

##Create the average occurance of heat cat 4-5 "high heat" days
KTBN_High_Heat_Days.prev30<- KTBN_Flag_Days.1982.2011%>%
filter(Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename(High_Heat_Days = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select(High_Heat_Days,Julian,date)
  
KTBN_High_Heat_Days.prev30$High_Heat_Days <- KTBN_High_Heat_Days.prev30$High_Heat_Days/30

KTBN_Caution_Days.prev30 <- full_join(KTBN_Caution_Days.prev30,KTBN_High_Heat_Days.prev30,by="Julian")%>%
  select(High_Heat_Days,Avg_Count_Caution_Days,Julian,date.x)%>%
  rename(date=date.x)

KTBN_Caution_Days.prev30 <- replace(KTBN_Caution_Days.prev30,is.na(KTBN_Caution_Days.prev30),0)
```

###Plot the likelihood that a day is a 'Cautionary' or 'Hazardous' Heat Day
```{r}

##Cautionary Days
KTBN_Likelihood_cautionary_days.plot <- ggplot(KTBN_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=Avg_Count_Caution_Days,color='blue'),se=TRUE,fill='blue',alpha=0.3)+
    geom_smooth(data=KTBN_Caution_Days.last10,aes(x=date,y=Avg_Count_Caution_Days,color='purple'),se=TRUE,fill='purple',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("purple","blue"),labels=c("Last 10-year Avg","Previous 30-year Avg\n(1982-2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing Cautionary Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='Purple', size=16))+
  #annotate("text",label="5% increase in likelihood of cautionary days in the last week of July",size=6,x=KTBN_Caution_Days.prev30$date[45] ,y=.9)+
#  geom_point(data=KTBN_Caution_Days.last40.sorted[1:10,],aes(x=date,y=ranking_rescaled))+
  #geom_text(data=KTBN_Caution_Days.last40.sorted[1:10,],aes(x=date,y=ranking_rescaled-.9,label=ranking))+
  ggtitle("Cautionary heat daily probability",subtitle="Ft Leonard Wood, MO")
  #geom_line(data=KTBN_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'))
KTBN_Likelihood_cautionary_days.plot

##Hazardous Days
KTBN_Likelihood_hazardous_days.plot <- ggplot(KTBN_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=High_Heat_Days,color='orange'),se=TRUE,fill='orange',alpha=0.3)+
  geom_smooth(data=KTBN_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'),se=TRUE,fill='red',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("red","orange"),labels=c("Last 10-year Avg","Previous 30-year Avg\n(1982-2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing a Hazardous Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='Red', size=16))+
  #annotate("text",label="~10% increase in likelihood of hazardous days in the last week of July",size=6,x=KTBN_Caution_Days.last10$date[40] ,y=.9)+
  ggtitle("Hazardous heat (category 4 or 5) daily probability",subtitle="Ft Leonard Wood, MO")
  #geom_line(data=KTBN_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'))
KTBN_Likelihood_hazardous_days.plot

```
## [NEW] Last 30-year Cat 4 or Cat 5
```{r}
##Create the average occurrence of "cat 4" day
KTBN_Cat4_Days.last30 <- KTBN_RedorBlack_Flag_Days.last30%>%
  filter(Day_Cat4==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Heat Cat 4" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Heat Cat 4",Julian,date)
KTBN_Cat4_Days.last30$`Heat Cat 4` <- KTBN_Cat4_Days.last30$`Heat Cat 4`/30
KTBN_Cat4_Days.last30 <- KTBN_Cat4_Days.last30%>%
  rename(Likelihood_Cat4 = `Heat Cat 4`)

##Create the average occurrence of "cat 5" day
KTBN_Cat5_Days.last30 <- KTBN_RedorBlack_Flag_Days.last30%>%
  filter(Day_Cat5==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Heat Cat 5" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Heat Cat 5",Julian,date)
KTBN_Cat5_Days.last30$`Heat Cat 5` <- KTBN_Cat5_Days.last30$`Heat Cat 5`/30
KTBN_Cat5_Days.last30 <- KTBN_Cat5_Days.last30%>%
  rename(Likelihood_Cat5 = `Heat Cat 5`)

##Create the average occurrence of "hazardous heat" day
KTBN_Hazardous_Days.last30 <- KTBN_RedorBlack_Flag_Days.last30%>%
  filter(Day_Cat5==1 || Day_Cat4==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Hazardous Heat" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Hazardous Heat",Julian,date)
KTBN_Hazardous_Days.last30$`Hazardous Heat` <- KTBN_Hazardous_Days.last30$`Hazardous Heat`/30
KTBN_Hazardous_Days.last30 <- KTBN_Hazardous_Days.last30%>%
  rename(Likelihood_Hazardous_Heat = `Hazardous Heat`)

KTBN_Likelihood_Cat4xCat5.last30 <- full_join(KTBN_Cat4_Days.last30,KTBN_Cat5_Days.last30,by="Julian")%>%
  select(Likelihood_Cat4,Likelihood_Cat5,Julian,date.x)%>%
  rename(date=date.x)

KTBN_Likelihood_Cat4xCat5.last30 <- full_join(KTBN_Likelihood_Cat4xCat5.last30,KTBN_Hazardous_Days.last30,by="Julian")%>%
  select(Likelihood_Cat4,Likelihood_Cat5,Likelihood_Hazardous_Heat,Julian,date.x)%>%
  rename(date=date.x)

KTBN_Likelihood_Cat4xCat5.last30 <- replace(KTBN_Likelihood_Cat4xCat5.last30,is.na(KTBN_Likelihood_Cat4xCat5.last30),0)
```

### [NEW] Plot the likelihood that a day is Cat4 or Cat5
```{r}
##Heat Cat 4 and Heat Cat 5, last 30
KTBN_Likelihood_Cat4xCat5.last30.plot <- ggplot(KTBN_Likelihood_Cat4xCat5.last30)+
  geom_smooth(aes(x=date,y=Likelihood_Cat4,color='orange'),se=TRUE,fill='orange',alpha=0.4)+
    geom_smooth(aes(x=date,y=Likelihood_Cat5,color='red'),se=TRUE,fill='red',alpha=0.4)+
  scale_color_identity(name="Likelihood w/ 95%\nconfidence interval",breaks=c("orange","red"),labels=c("Heat Category 4 (Red flag)","Heat Category 5 (Black flag)"),guide="legend")+
  scale_y_continuous(limits=c(0,.8), labels=scales::percent,breaks=seq(0,1,.1) ,name="Chances a day experiences heat category 4 or 5",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="light grey", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='Black', size=16))+
  ggtitle("Daily probability of experiencing hazardous heat",subtitle="Ft Leonard Wood, MO\n(Oct 1992 - Sept 2022)")

KTBN_Likelihood_Cat4xCat5.last30.plot
```
##25th-75th Quartiles 
```{r}
KTBN_Daily_Values.C <- KTBN_Daily_Values.C%>%
  mutate(day=day(date))%>%
  mutate(Julian=yday(date))
KTBN_Daily_Values.C.previous30 <- KTBN_Daily_Values.C%>%
  filter(between(date,"1982-01-01","2011-12-31"))
KTBN_Daily_Values.C.last10 <- KTBN_Daily_Values.C%>%
  filter(between(date,"2012-10-01","2022-09-30"))
```
###- Previous 30 years - Find Quantiles and create data frame
```{r}
##fidn the quartile for each day
##WBGT

###dail avg wbgt
KTBN_Daily_Values.C.previous30.quantile.daily_avg_WBGT <-  do.call("rbind",
        tapply(KTBN_Daily_Values.C.previous30$daily_avg_WBGT,
               KTBN_Daily_Values.C.previous30$Julian,
               quantile))
KTBN_Daily_Values.C.previous30.quantile.daily_avg_WBGT <- KTBN_Daily_Values.C.previous30.quantile.daily_avg_WBGT%>%
  as.data.frame(KTBN_Daily_Values.C.previous30.quantile.daily_avg_WBGT)
##create a new data frame
KTBN_Daily_Values.C.previous30.quantile.daily_avg_WBGT$Julian <- c(1:nrow(KTBN_Daily_Values.C.previous30.quantile.daily_avg_WBGT))
KTBN_Daily_Values.C.previous30.quantile.daily_avg_WBGT$date <- 
  as.Date(KTBN_Daily_Values.C.previous30.quantile.daily_avg_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KTBN_Daily_Values.C.previous30.quantile.daily_avg_WBGT)

KTBN_Daily_Values.C.previous30.quantile.daily_high_WBGT <-  do.call("rbind",
        tapply(KTBN_Daily_Values.C.previous30$daily_high_WBGT,
               KTBN_Daily_Values.C.previous30$Julian,
               quantile))
KTBN_Daily_Values.C.previous30.quantile.daily_high_WBGT <- KTBN_Daily_Values.C.previous30.quantile.daily_high_WBGT%>%
  as.data.frame(KTBN_Daily_Values.C.previous30.quantile.daily_high_WBGT)
##create a new data frame
KTBN_Daily_Values.C.previous30.quantile.daily_high_WBGT$Julian <- c(1:nrow(KTBN_Daily_Values.C.previous30.quantile.daily_high_WBGT))
KTBN_Daily_Values.C.previous30.quantile.daily_high_WBGT$date <- 
  as.Date(KTBN_Daily_Values.C.previous30.quantile.daily_high_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KTBN_Daily_Values.C.previous30.quantile.daily_high_WBGT)

KTBN_Daily_Values.C.previous30.quantile.daily_low_WBGT <-  do.call("rbind",
        tapply(KTBN_Daily_Values.C.previous30$daily_low_WBGT,
               KTBN_Daily_Values.C.previous30$Julian,
               quantile))
KTBN_Daily_Values.C.previous30.quantile.daily_low_WBGT <- KTBN_Daily_Values.C.previous30.quantile.daily_low_WBGT%>%
  as.data.frame(KTBN_Daily_Values.C.previous30.quantile.daily_low_WBGT)
##create a new data frame
KTBN_Daily_Values.C.previous30.quantile.daily_low_WBGT$Julian <- c(1:nrow(KTBN_Daily_Values.C.previous30.quantile.daily_low_WBGT))
KTBN_Daily_Values.C.previous30.quantile.daily_low_WBGT$date <- 
  as.Date(KTBN_Daily_Values.C.previous30.quantile.daily_low_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KTBN_Daily_Values.C.previous30.quantile.daily_low_WBGT)

###temperature
KTBN_Daily_Values.C.previous30.quantile.daily_avg <-  do.call("rbind",
        tapply(KTBN_Daily_Values.C.previous30$daily_avg,
               KTBN_Daily_Values.C.previous30$Julian,
               quantile))
KTBN_Daily_Values.C.previous30.quantile.daily_avg <- KTBN_Daily_Values.C.previous30.quantile.daily_avg%>%
  as.data.frame(KTBN_Daily_Values.C.previous30.quantile.daily_avg)
##create a new data frame
KTBN_Daily_Values.C.previous30.quantile.daily_avg$Julian <- c(1:nrow(KTBN_Daily_Values.C.previous30.quantile.daily_avg))
KTBN_Daily_Values.C.previous30.quantile.daily_avg$date <- 
  as.Date(KTBN_Daily_Values.C.previous30.quantile.daily_avg$Julian, origin=as.Date("2022-12-31"))
#view(KTBN_Daily_Values.C.previous30.quantile.daily_avg)

KTBN_Daily_Values.C.previous30.quantile.daily_high <-  do.call("rbind",
        tapply(KTBN_Daily_Values.C.previous30$daily_high,
               KTBN_Daily_Values.C.previous30$Julian,
               quantile))
KTBN_Daily_Values.C.previous30.quantile.daily_high <- KTBN_Daily_Values.C.previous30.quantile.daily_high%>%
  as.data.frame(KTBN_Daily_Values.C.previous30.quantile.daily_high)
##create a new data frame
KTBN_Daily_Values.C.previous30.quantile.daily_high$Julian <- c(1:nrow(KTBN_Daily_Values.C.previous30.quantile.daily_high))
KTBN_Daily_Values.C.previous30.quantile.daily_high$date <- 
  as.Date(KTBN_Daily_Values.C.previous30.quantile.daily_high$Julian, origin=as.Date("2022-12-31"))
#view(KTBN_Daily_Values.C.previous30.quantile.daily_high)

KTBN_Daily_Values.C.previous30.quantile.daily_low <-  do.call("rbind",
        tapply(KTBN_Daily_Values.C.previous30$daily_low,
               KTBN_Daily_Values.C.previous30$Julian,
               quantile))
KTBN_Daily_Values.C.previous30.quantile.daily_low <- KTBN_Daily_Values.C.previous30.quantile.daily_low%>%
  as.data.frame(KTBN_Daily_Values.C.previous30.quantile.daily_low)
##create a new data frame
KTBN_Daily_Values.C.previous30.quantile.daily_low$Julian <- c(1:nrow(KTBN_Daily_Values.C.previous30.quantile.daily_low))
KTBN_Daily_Values.C.previous30.quantile.daily_low$date <- 
  as.Date(KTBN_Daily_Values.C.previous30.quantile.daily_low$Julian, origin=as.Date("2022-12-31"))
```
###- Last 10 years - Find Quantiles and create data frame
```{r}
##fidn the quartile for each day
##WBGT

###dail avg wbgt
KTBN_Daily_Values.C.last10.quantile.daily_avg_WBGT <-  do.call("rbind",
        tapply(KTBN_Daily_Values.C.last10$daily_avg_WBGT,
               KTBN_Daily_Values.C.last10$Julian,
               quantile))
KTBN_Daily_Values.C.last10.quantile.daily_avg_WBGT <- KTBN_Daily_Values.C.last10.quantile.daily_avg_WBGT%>%
  as.data.frame(KTBN_Daily_Values.C.last10.quantile.daily_avg_WBGT)
##create a new data frame
KTBN_Daily_Values.C.last10.quantile.daily_avg_WBGT$Julian <- c(1:nrow(KTBN_Daily_Values.C.last10.quantile.daily_avg_WBGT))
KTBN_Daily_Values.C.last10.quantile.daily_avg_WBGT$date <- 
  as.Date(KTBN_Daily_Values.C.last10.quantile.daily_avg_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KTBN_Daily_Values.C.last10.quantile.daily_avg_WBGT)

KTBN_Daily_Values.C.last10.quantile.daily_high_WBGT <-  do.call("rbind",
        tapply(KTBN_Daily_Values.C.last10$daily_high_WBGT,
               KTBN_Daily_Values.C.last10$Julian,
               quantile))
KTBN_Daily_Values.C.last10.quantile.daily_high_WBGT <- KTBN_Daily_Values.C.last10.quantile.daily_high_WBGT%>%
  as.data.frame(KTBN_Daily_Values.C.last10.quantile.daily_high_WBGT)
##create a new data frame
KTBN_Daily_Values.C.last10.quantile.daily_high_WBGT$Julian <- c(1:nrow(KTBN_Daily_Values.C.last10.quantile.daily_high_WBGT))
KTBN_Daily_Values.C.last10.quantile.daily_high_WBGT$date <- 
  as.Date(KTBN_Daily_Values.C.last10.quantile.daily_high_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KTBN_Daily_Values.C.last10.quantile.daily_high_WBGT)

KTBN_Daily_Values.C.last10.quantile.daily_low_WBGT <-  do.call("rbind",
        tapply(KTBN_Daily_Values.C.last10$daily_low_WBGT,
               KTBN_Daily_Values.C.last10$Julian,
               quantile))
KTBN_Daily_Values.C.last10.quantile.daily_low_WBGT <- KTBN_Daily_Values.C.last10.quantile.daily_low_WBGT%>%
  as.data.frame(KTBN_Daily_Values.C.last10.quantile.daily_low_WBGT)
##create a new data frame
KTBN_Daily_Values.C.last10.quantile.daily_low_WBGT$Julian <- c(1:nrow(KTBN_Daily_Values.C.last10.quantile.daily_low_WBGT))
KTBN_Daily_Values.C.last10.quantile.daily_low_WBGT$date <- 
  as.Date(KTBN_Daily_Values.C.last10.quantile.daily_low_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KTBN_Daily_Values.C.last10.quantile.daily_low_WBGT)

###temperature
KTBN_Daily_Values.C.last10.quantile.daily_avg <-  do.call("rbind",
        tapply(KTBN_Daily_Values.C.last10$daily_avg,
               KTBN_Daily_Values.C.last10$Julian,
               quantile))
KTBN_Daily_Values.C.last10.quantile.daily_avg <- KTBN_Daily_Values.C.last10.quantile.daily_avg%>%
  as.data.frame(KTBN_Daily_Values.C.last10.quantile.daily_avg)
##create a new data frame
KTBN_Daily_Values.C.last10.quantile.daily_avg$Julian <- c(1:nrow(KTBN_Daily_Values.C.last10.quantile.daily_avg))
KTBN_Daily_Values.C.last10.quantile.daily_avg$date <- 
  as.Date(KTBN_Daily_Values.C.last10.quantile.daily_avg$Julian, origin=as.Date("2022-12-31"))
#view(KTBN_Daily_Values.C.last10.quantile.daily_avg)

KTBN_Daily_Values.C.last10.quantile.daily_high <-  do.call("rbind",
        tapply(KTBN_Daily_Values.C.last10$daily_high,
               KTBN_Daily_Values.C.last10$Julian,
               quantile))
KTBN_Daily_Values.C.last10.quantile.daily_high <- KTBN_Daily_Values.C.last10.quantile.daily_high%>%
  as.data.frame(KTBN_Daily_Values.C.last10.quantile.daily_high)
##create a new data frame
KTBN_Daily_Values.C.last10.quantile.daily_high$Julian <- c(1:nrow(KTBN_Daily_Values.C.last10.quantile.daily_high))
KTBN_Daily_Values.C.last10.quantile.daily_high$date <- 
  as.Date(KTBN_Daily_Values.C.last10.quantile.daily_high$Julian, origin=as.Date("2022-12-31"))
#view(KTBN_Daily_Values.C.last10.quantile.daily_high)

KTBN_Daily_Values.C.last10.quantile.daily_low <-  do.call("rbind",
        tapply(KTBN_Daily_Values.C.last10$daily_low,
               KTBN_Daily_Values.C.last10$Julian,
               quantile))
KTBN_Daily_Values.C.last10.quantile.daily_low <- KTBN_Daily_Values.C.last10.quantile.daily_low%>%
  as.data.frame(KTBN_Daily_Values.C.last10.quantile.daily_low)
##create a new data frame
KTBN_Daily_Values.C.last10.quantile.daily_low$Julian <- c(1:nrow(KTBN_Daily_Values.C.last10.quantile.daily_low))
KTBN_Daily_Values.C.last10.quantile.daily_low$date <- 
  as.Date(KTBN_Daily_Values.C.last10.quantile.daily_low$Julian, origin=as.Date("2022-12-31"))
#view(KTBN_Daily_Values.C.last10.quantile.daily_low)
```
###Plot the quartiles
```{r}
KTBN_Quantiles_high_WBGT.plot <- ggplot()+
  geom_ribbon(data=KTBN_Daily_Values.C.previous30.quantile.daily_high_WBGT, aes(x=date,ymin=`25%`,ymax=`75%`,fill='orange'),color='grey',alpha=0.4)+
    geom_ribbon(data=KTBN_Daily_Values.C.last10.quantile.daily_high_WBGT, aes(x=date,ymin=`25%`,ymax=`75%`,fill='red'),color='red',alpha=0.6)+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+theme(axis.text.x = element_text(angle = 30),plot.title=element_text(family='', face='bold', colour='Red', size=16))+
  scale_y_continuous(limits=c(8,35), name="WBGT")+
  geom_ribbon(data=KTBN_Daily_Values.C.previous30.quantile.daily_low_WBGT, aes(x=date,ymin=`25%`,ymax=`75%`,fill='blue'),color='grey',alpha=0.4)+
    geom_ribbon(data=KTBN_Daily_Values.C.last10.quantile.daily_low_WBGT, aes(x=date,ymin=`25%`,ymax=`75%`,fill='purple'),color='purple',alpha=0.6)+
  scale_fill_identity(name="25th-75th Quartiles",breaks=c("red","orange","purple","blue"),labels=c("Last 10-year\nmaximum","Previous 30-year\nmaximum","Last 10-year\nminimum","Previous 30-year\n minimum"),guide="legend")+
  ggtitle("Heat Season Maximum and Minimum WBGT",subtitle="Ft Leonard Wood, MO")
KTBN_Quantiles_high_WBGT.plot
```


#KFSI (Ft Sill, OK)
#### INITIAL DATA LOAD AND SET UP
```{r}
KFSI <- read_csv("/Users/erikpatton/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/Raw Data/KFSI_Raw.csv")

KFSI <- KFSI %>%
  mutate('datetime' = make_datetime(year=Year, month = Month, day = Day, hour = Hour..UTC.))%>%
  pad()
print(sum(is.na(KFSI$Temperature..F.)))
print(sum(is.na(KFSI$Derived.Wet.Bulb.Globe.Temperature..F.)))
550056-sum(is.na(KFSI$Derived.Wet.Bulb.Globe.Temperature..F.))

KFSI_last10_raw <- KFSI%>%
  filter(between(datetime,"2012-10-01 00:00:00 UTC","2022-09-30 23:00:00 UTC"))%>%
  filter(between(Hour..UTC.,8,19))
print(43208-(sum(is.na(KFSI_last10_raw$Derived.Wet.Bulb.Globe.Temperature..F.))))
```

###Data Table build
```{r}
KFSI_raw_60s <- KFSI%>%
  filter(between(datetime,"1960-01-01 00:00:00","1969-12-31 23:00:00"))
print((print((nrow(KFSI_raw_60s)))-print(sum(is.na(KFSI_raw_60s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KFSI_raw_60s)

KFSI_raw_60s.daylight <- KFSI_raw_60s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KFSI_raw_60s.daylight)))-print(sum(is.na(KFSI_raw_60s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KFSI_raw_60s.daylight)

KFSI_raw_70s <- KFSI%>%
  filter(between(datetime,"1970-01-01 00:00:00","1979-12-31 23:00:00"))
print((print((nrow(KFSI_raw_70s)))-print(sum(is.na(KFSI_raw_70s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KFSI_raw_70s)

KFSI_raw_70s.daylight <- KFSI_raw_70s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KFSI_raw_70s.daylight)))-print(sum(is.na(KFSI_raw_70s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KFSI_raw_70s.daylight)

KFSI_raw_80s <- KFSI%>%
  filter(between(datetime,"1980-01-01 00:00:00","1989-12-31 23:00:00"))
print((print((nrow(KFSI_raw_80s)))-print(sum(is.na(KFSI_raw_80s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KFSI_raw_80s)

KFSI_raw_80s.daylight <- KFSI_raw_80s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KFSI_raw_80s.daylight)))-print(sum(is.na(KFSI_raw_80s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KFSI_raw_80s.daylight)

KFSI_raw_90s <- KFSI%>%
  filter(between(datetime,"1990-01-01 00:00:00","1999-12-31 23:00:00"))
print((print((nrow(KFSI_raw_90s)))-print(sum(is.na(KFSI_raw_90s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KFSI_raw_90s)

KFSI_raw_90s.daylight <- KFSI_raw_90s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KFSI_raw_90s.daylight)))-print(sum(is.na(KFSI_raw_90s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KFSI_raw_90s.daylight)

KFSI_raw_00s <- KFSI%>%
  filter(between(datetime,"2000-01-01 00:00:00","2009-12-31 23:00:00"))
print((print((nrow(KFSI_raw_00s)))-print(sum(is.na(KFSI_raw_00s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KFSI_raw_00s)

KFSI_raw_00s.daylight <- KFSI_raw_00s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KFSI_raw_00s.daylight)))-print(sum(is.na(KFSI_raw_00s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KFSI_raw_00s.daylight)

KFSI_raw_10s <- KFSI%>%
  filter(between(datetime,"2010-01-01 00:00:00","2019-12-31 23:00:00"))
print((print((nrow(KFSI_raw_10s)))-print(sum(is.na(KFSI_raw_10s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KFSI_raw_10s)

KFSI_raw_10s.daylight <- KFSI_raw_10s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KFSI_raw_10s.daylight)))-print(sum(is.na(KFSI_raw_10s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KFSI_raw_10s.daylight)

KFSI_raw_20s <- KFSI%>%
  filter(between(datetime,"2020-01-01 00:00:00","2022-12-31 23:00:00"))
print((print((nrow(KFSI_raw_20s)))-print(sum(is.na(KFSI_raw_20s$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KFSI_raw_20s)

KFSI_raw_20s.daylight <- KFSI_raw_20s%>%
  mutate("Hour"=hour(datetime))%>%
  filter(between(Hour,8,19))
print((print((nrow(KFSI_raw_20s.daylight)))-print(sum(is.na(KFSI_raw_20s.daylight$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KFSI_raw_20s.daylight)

KFSI_raw_last30 <- KFSI%>%
  filter(datetime>"1992-10-01 00:00:00")
print((print((nrow(KFSI_raw_last30)))-print(sum(is.na(KFSI_raw_last30$Derived.Wet.Bulb.Globe.Temperature..F.)))))/nrow(KFSI_raw_last30)
```
###Wrangle
```{r}

KFSI <- KFSI%>%
  mutate('Temp.C' = ((Temperature..F.-32)*(5/9)))%>%
  mutate('Dewpoint.Temp.C' = (Dewpoint.Temperature..F.-32)*(5/9))%>%
  mutate('Heat.Index.C'=((Heat.Index..F.-32)*(5/9)))%>%
  mutate('WBGT.C'=((Derived.Wet.Bulb.Globe.Temperature..F.-32)*(5/9)))%>%
  rename('Temp.F' = Temperature..F.)%>%
  rename('Dewpoint.Temp.F'=Dewpoint.Temperature..F.)%>%
  rename('Heat.Index.F'=Heat.Index..F.)%>%
  rename("WBGT.F"=Derived.Wet.Bulb.Globe.Temperature..F.)%>%
  mutate("Hour"=hour(datetime))%>%
  mutate("Day"=day(datetime))%>%
  mutate("Month"=month(datetime))%>%
  mutate("Year"=year(datetime))%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(WBGT.C = zoo::na.approx(WBGT.C, maxgap=3))

##CREATE DECADE COLUMN
KFSI <- KFSI%>%
  mutate(Decade=floor(Year/10)*10)
print(sum(is.na(KFSI$Temp.F)))
print(sum(is.na(KFSI$WBGT.F)))
550056-sum(is.na(KFSI$WBGT.F))
```
## WRANGLE DATA & REPLACE WITH AVERAGE DATA VALUES
###1960-1970
```{r}
KFSI_60s_Hour_Avg <- KFSI%>%
  filter(Decade=="1960"|Decade=="1970")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KFSI_60s <- KFSI%>%
  filter(Decade=="1960")%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KFSI_60s <- full_join(KFSI_60s,KFSI_60s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KFSI_90s)

KFSI_60s <- mutate(KFSI_60s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces NA values in 'x' column with the averaged from 'y' column

KFSI_60s <- mutate(KFSI_60s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KFSI_60s <- KFSI_60s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)

#%>%
#  rename(Decade=Decade.x)
```
####1970s
```{r}
##1970s 20% missing (after maxgap=3)
##Data Prep
KFSI_70s_Hour_Avg <- KFSI%>%
  filter(Decade=="1970"|Decade=="1960"|Decade=="1980")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KFSI_70s <- KFSI%>%
  filter(Decade=="1970")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

KFSI_70s <- full_join(KFSI_70s,KFSI_70s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KFSI_90s)

KFSI_70s <- mutate(KFSI_70s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KFSI_70s <- mutate(KFSI_70s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KFSI_70s <- KFSI_70s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####1980
```{r}
##1980s - no missing values
KFSI_80s_Hour_Avg <- KFSI%>%
  filter(Decade=="1970"|Decade=="1980"|Decade=="1990")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KFSI_80s <- KFSI%>%
  filter(Decade=="1980")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KFSI_80s <- full_join(KFSI_80s,KFSI_80s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KFSI_90s)

KFSI_80s <- mutate(KFSI_80s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KFSI_80s <- mutate(KFSI_80s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KFSI_80s <- KFSI_80s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####1990
```{r}
##1990s
KFSI_90s_Hour_Avg <- KFSI%>%
  filter(Decade=="1990"|Decade=="1980"|Decade=="2000")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KFSI_90s <- KFSI%>%
  filter(Decade=="1990")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

KFSI_90s <- full_join(KFSI_90s,KFSI_90s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KFSI_90s)

KFSI_90s <- mutate(KFSI_90s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KFSI_90s <- mutate(KFSI_90s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KFSI_90s <- KFSI_90s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####2000s 
```{r}
##2000  ##Almost all the NAs are in 2002 and 2003, with 2001 having some and 2008 having a handful.
KFSI_00s_Hour_Avg <- KFSI%>%
  filter(Decade=="2000"|Decade=="1990"|Decade=="2010")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KFSI_00s <- KFSI%>%
  filter(Decade=="2000")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KFSI_00s <- full_join(KFSI_00s,KFSI_00s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KFSI_00s)

KFSI_00s <- mutate(KFSI_00s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KFSI_00s <- mutate(KFSI_00s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KFSI_00s <- KFSI_00s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####2010
```{r}
##2010
KFSI_10s_Hour_Avg <- KFSI%>%
  filter(Decade=="2010"|Decade=="2000"|Decade=="2020")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KFSI_10s <- KFSI%>%
  filter(Decade=="2010")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

#view(KFSI_10s)
KFSI_10s <- full_join(KFSI_10s,KFSI_10s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KFSI_00s)

KFSI_10s <- mutate(KFSI_10s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KFSI_10s <- mutate(KFSI_10s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KFSI_10s <- KFSI_10s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
#### 2020 DECADE, EXTRAPOLATED OUT FOR 2020-2029
```{r}
KFSI_20s_Hour_Avg <- KFSI%>%
  filter(Decade=="2020"|Decade=="2010")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KFSI_20s <- KFSI%>%
  filter(Decade=="2020")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:24096))

KFSI_20s <- full_join(KFSI_20s,KFSI_20s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KFSI_20s)

KFSI_20s <- mutate(KFSI_20s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KFSI_20s <- mutate(KFSI_20s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KFSI_20s <- KFSI_20s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
###Combine all chunks to new KFSI data set with the interpolated values. Only 22 NAs now.
```{r}
KFSI_Wrangled <- rbind(KFSI_60s,KFSI_70s,KFSI_80s,KFSI_90s,KFSI_00s,KFSI_10s,KFSI_20s) %>%
  #rename(WBGT.C = Ave_WBGT.C)%>%
  rename(date=Date)
sum(is.na(KFSI_Wrangled$Ave_Temp.C))  ##28 still missing
sum(is.na(KFSI_Wrangled$Ave_WBGT.C))  ##22 still missing

```
###Create a single df with daily values
```{r}
KFSI_Daily_Values.C <- KFSI_Wrangled%>%
  group_by(date)%>%
  dplyr::summarise(daily_high = max(Ave_Temp.C),daily_low = min(Ave_Temp.C),daily_avg = mean(Ave_Temp.C),daily_high_WBGT = max(Ave_WBGT.C),daily_low_WBGT = min(Ave_WBGT.C),daily_avg_WBGT = mean(Ave_WBGT.C))%>%
  as.data.frame()
```

### Look for missing dates in the complete data set
No missing dates!
``{r}
FullSeq <- seq.Date(from = min(KFSI_Wrangled$date), to = max(KFSI_Wrangled$date), by = 1)
Missing <- FullSeq[!FullSeq %in% KFSI_Wrangled$date]
Missing
``

###RED and BLACK Count
#### Start Red and Black Flag Day Analysis
```{r}
KFSI_REDorABOVEbyYear <- KFSI_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Red = n)


KFSI_BLACKorABOVEbyYear <- KFSI_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Black = n)

##Filtering by Red or Above will include Black days - Hazard Day
KFSI_AboveRed.last40 <- KFSI_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Above_Red = n)%>%
  mutate(Day_Above_Red =ifelse(Day_Above_Red == '',1,1))%>%
  mutate(Julian=yday(date))%>%
  filter(date>="1982-01-01"&date<="2021-12-31")%>%
  mutate(year=lubridate::year(date))

##Filters for Green and above - Caution Day
KFSI_AboveGreen.last40 <- KFSI_Wrangled%>%
  subset(between(Ave_WBGT.C,Heat_Categories$Category_Temp_Min.C[1],Heat_Categories$Category_Temp_Min.C[2]))%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Green_or_Yellow = n)%>%
  mutate(Day_Green_or_Yellow =ifelse(Day_Green_or_Yellow == '',1,1))%>%
  mutate(Julian=yday(date))%>%
  filter(date>="1982-01-01"&date<="2021-12-31")%>%
  mutate(year=lubridate::year(date))
```
##Create DF with all Flag Day counts only
these are days at are at RED OR ABOVE or are days following RED OR ABOVE that are also GREEN OR ABOVE
```{r}
##Create a df that has all days at/above red OR days at/above green following a red+ day
KFSI_Flag_Days.last40 <- full_join(KFSI_AboveRed.last40,KFSI_AboveGreen.last40,by="date")
KFSI_Flag_Days.last40 <- KFSI_Flag_Days.last40%>%
  select(date,Day_Above_Red,Day_Green_or_Yellow,Julian.y,year.y)%>%
    mutate(Julian.y=yday(date))%>%
    mutate(year.y=lubridate::year(date))
KFSI_Flag_Days.last40 <- replace(KFSI_Flag_Days.last40,is.na(KFSI_Flag_Days.last40),0)

#view(KFSI_Flag_Days.last40)

####YOU STOPPED HERE ON 28-MAR-2023 trying to make a script that counts days above red or days above green following a day above red
KFSI_Flag_Days.last40 <-  KFSI_Flag_Days.last40%>%arrange(ymd(KFSI_Flag_Days.last40$date)) ##this line gets the df in date order for the following loop
KFSI_Flag_Days.last40$count <- c(1:nrow(KFSI_Flag_Days.last40))

KFSI_Flag_Days.last40$Lagged <- lag(KFSI_Flag_Days.last40$Day_Above_Red>0,1)

KFSI_Caution_Days.last40 <- KFSI_Flag_Days.last40%>%
  filter(Lagged==TRUE | Day_Above_Red==1)

#view(KFSI_Flag_Days.last40)
#view(KFSI_Caution_Days.last40)
```

##TIME SERIES
### Seasonal and trend plots
```{r}
####TIME SERIES ANALYSIS ON THE DATASET
##Daily HIGH
KFSI_Daily_High.TS <- ts(KFSI_Daily_Values.C$daily_high, start = c(1960,01,01),frequency=365)
KFSI_Daily_High.decomp <- stl(KFSI_Daily_High.TS,s.window="periodic")
plot(KFSI_Daily_High.decomp)
KFSI_Daily_High_Trend <- Kendall::SeasonalMannKendall(KFSI_Daily_High.TS)
summary(KFSI_Daily_High_Trend)
##This returns a very small p=value, also a small tau, so we can state the trend is STRONGLY statistically significant even though the magnitude of the temperature increase is small (the tau value?).

##Daily LOW
KFSI_Daily_Low.TS <- ts(KFSI_Daily_Values.C$daily_low, start = c(1960,01,01),frequency=365)
KFSI_Daily_Low.decomp <- stl(KFSI_Daily_Low.TS,s.window="periodic")
plot(KFSI_Daily_Low.decomp)
KFSI_Daily_Low_Trend <- Kendall::SeasonalMannKendall(KFSI_Daily_Low.TS)
summary(KFSI_Daily_Low_Trend)

##Daily AVG
KFSI_Daily_Avg.TS <- ts(KFSI_Daily_Values.C$daily_avg, start = c(1960,01,01),frequency=365)
KFSI_Daily_Avg.decomp <- stl(KFSI_Daily_Avg.TS,s.window="periodic")
plot(KFSI_Daily_Avg.decomp)
KFSI_Daily_Avg_Trend <- Kendall::SeasonalMannKendall(KFSI_Daily_Avg.TS)
summary(KFSI_Daily_Avg_Trend)

##WBGT
##WBGT.High
KFSI_Daily_High_WBGT.TS <- ts(KFSI_Daily_Values.C$daily_high_WBGT, start = c(1960,01,01),frequency=365)
KFSI_Daily_High_WBGT.decomp <- stl(KFSI_Daily_High_WBGT.TS,s.window="periodic")
plot(KFSI_Daily_High_WBGT.decomp)
KFSI_Daily_High_WBGT_Trend <- Kendall::SeasonalMannKendall(KFSI_Daily_High_WBGT.TS)
summary(KFSI_Daily_High_WBGT_Trend)

##WBGT Low
KFSI_Daily_low_WBGT.TS <- ts(KFSI_Daily_Values.C$daily_low_WBGT, start = c(1960,01,01),frequency=365)
KFSI_Daily_low_WBGT.decomp <- stl(KFSI_Daily_low_WBGT.TS,s.window="periodic")
plot(KFSI_Daily_low_WBGT.decomp)
KFSI_Daily_low_WBGT_Trend <- Kendall::SeasonalMannKendall(KFSI_Daily_low_WBGT.TS)
summary(KFSI_Daily_low_WBGT_Trend)

##WBGT Avg
KFSI_Daily_avg_WBGT.TS <- ts(KFSI_Daily_Values.C$daily_avg_WBGT, start = c(1960,01,01),frequency=365)
KFSI_Daily_avg_WBGT.decomp <- stl(KFSI_Daily_avg_WBGT.TS,s.window="periodic")
plot(KFSI_Daily_avg_WBGT.decomp)
KFSI_Daily_avg_WBGT_Trend <- Kendall::SeasonalMannKendall(KFSI_Daily_avg_WBGT.TS)
summary(KFSI_Daily_avg_WBGT_Trend)
```
### Mann Kendall Seasonal Values
```{r}
##MANUALLY RUN THE MANN KENDALL TEST - WBGT TEMPERATURE - MAX DAILY HIGH
#Set up month and year columns
KFSI_Daily_Values.C$month <- month(ymd(KFSI_Daily_Values.C$date))
KFSI_Daily_Values.C$year <- year(ymd(KFSI_Daily_Values.C$date))

###WBGT
##HIGH WBGT
KFSI_Daily_High_WBGT.C_MannKendall <- kendallSeasonalTrendTest(daily_high_WBGT ~ month+year, data=KFSI_Daily_Values.C)
#return just tau, slope, and intercept
KFSI_Daily_High_WBGT.C_MannKendall$estimate
##LOW WBGT
KFSI_Daily_Low_WBGT.C_MannKendall <- kendallSeasonalTrendTest(daily_low_WBGT ~ month+year, data=KFSI_Daily_Values.C)
KFSI_Daily_Low_WBGT.C_MannKendall$estimate
##AVG WBGT
KFSI_Daily_Avg_WBGT.C_MannKendall <- kendallSeasonalTrendTest(daily_avg_WBGT ~ month+year, data=KFSI_Daily_Values.C)
KFSI_Daily_Avg_WBGT.C_MannKendall$estimate
##Build data frame with the needed information
KFSI_Daily_TS_estimates.C <- KFSI_Daily_High_WBGT.C_MannKendall$estimate%>%
  as.data.frame()
KFSI_Daily_TS_estimates.C$Low_WBGT <- as.data.frame(KFSI_Daily_Low_WBGT.C_MannKendall$estimate)
KFSI_Daily_TS_estimates.C$Avg_WBGT <- as.data.frame(KFSI_Daily_Avg_WBGT.C_MannKendall$estimate)
KFSI_Daily_TS_estimates.C$High_WBGT <- as.data.frame(KFSI_Daily_High_WBGT.C_MannKendall$estimate)
KFSI_Daily_TS_estimates_WBGT.C <- KFSI_Daily_TS_estimates.C[2:4]
##Remove uneeded variables
#remove(KFSI_Daily_High_WBGT.C_MannKendall,KFSI_Daily_Low_WBGT.C_MannKendall,KFSI_Daily_Avg_WBGT.C_MannKendall)

###Temperature
##HIGH 
KFSI_Daily_High.C_MannKendall <- kendallSeasonalTrendTest(daily_high ~ month+year, data=KFSI_Daily_Values.C)
#return just tau, slope, and intercept
KFSI_Daily_High.C_MannKendall$estimate
##LOW WBGT
KFSI_Daily_Low.C_MannKendall <- kendallSeasonalTrendTest(daily_low ~ month+year, data=KFSI_Daily_Values.C)
KFSI_Daily_Low.C_MannKendall$estimate
##AVG WBGT
KFSI_Daily_Avg.C_MannKendall <- kendallSeasonalTrendTest(daily_avg ~ month+year, data=KFSI_Daily_Values.C)
KFSI_Daily_Avg.C_MannKendall$estimate
##Build data frame with the needed information
KFSI_Daily_TS_estimates.C <- KFSI_Daily_High.C_MannKendall$estimate%>%
  as.data.frame()
KFSI_Daily_TS_estimates.C$Low <- as.data.frame(KFSI_Daily_Low.C_MannKendall$estimate)
KFSI_Daily_TS_estimates.C$Avg <- as.data.frame(KFSI_Daily_Avg.C_MannKendall$estimate)
KFSI_Daily_TS_estimates.C$High <- as.data.frame(KFSI_Daily_High.C_MannKendall$estimate)
KFSI_Daily_TS_estimates.C <- KFSI_Daily_TS_estimates.C[2:4]
##Remove unneeded variables
#remove(KFSI_Daily_High.C_MannKendall,KFSI_Daily_Low.C_MannKendall,KFSI_Daily_Avg.C_MannKendall)
```
###Bottom quarter trend test
```{r}
##Temp
KFSI_quartile_test_lower25 <- KFSI_Wrangled%>%
  group_by(date)%>%
  reframe(qs=quantile(Ave_Temp.C,c(0.25,0.75)),prob=c(0.25,0.75))%>%
  filter(prob==0.25)

KFSI_quartile_test_lower25$month <- month(ymd(KFSI_quartile_test_lower25$date))
KFSI_quartile_test_lower25$year <- year(ymd(KFSI_quartile_test_lower25$date))

##Bottom Quarter Temp
KFSI_quartile_test_lower25_MannKendall <- kendallSeasonalTrendTest(qs ~ month+year, data=KFSI_quartile_test_lower25)
KFSI_quartile_test_lower25_MannKendall$estimate

##returns a small but not 0 positive trend in temperature

##WBGT
KFSI_quartile_test_lower25_WBGT <- KFSI_Wrangled%>%
  group_by(date)%>%
  reframe(qs=quantile(Ave_WBGT.C,c(0.25,0.75)),prob=c(0.25,0.75))%>%
  filter(prob==0.25)

KFSI_quartile_test_lower25_WBGT$month <- month(ymd(KFSI_quartile_test_lower25_WBGT$date))
KFSI_quartile_test_lower25_WBGT$year <- year(ymd(KFSI_quartile_test_lower25_WBGT$date))

##Bottom Quarter Temp
KFSI_quartile_test_lower25_WBGT <- kendallSeasonalTrendTest(qs ~ month+year, data=KFSI_quartile_test_lower25_WBGT)
KFSI_quartile_test_lower25_WBGT$estimate
```
###Upper quarter trend test
```{r}
##Temp
KFSI_quartile_test_upper25 <- KFSI_Wrangled%>%
  group_by(date)%>%
  reframe(qs=quantile(Ave_Temp.C,c(0.25,0.75)),prob=c(0.25,0.75))%>%
  filter(prob==0.75)

KFSI_quartile_test_upper25$month <- month(ymd(KFSI_quartile_test_upper25$date))
KFSI_quartile_test_upper25$year <- year(ymd(KFSI_quartile_test_upper25$date))

##Bottom Quarter Temp
KFSI_quartile_test_upper25_MannKendall <- kendallSeasonalTrendTest(qs ~ month+year, data=KFSI_quartile_test_upper25)
KFSI_quartile_test_upper25_MannKendall$estimate

##returns a small but not 0 positive trend in temperature

##WBGT
KFSI_quartile_test_upper25_WBGT <- KFSI_Wrangled%>%
  group_by(date)%>%
  reframe(qs=quantile(Ave_WBGT.C,c(0.25,0.75)),prob=c(0.25,0.75))%>%
  filter(prob==0.75)

KFSI_quartile_test_upper25_WBGT$month <- month(ymd(KFSI_quartile_test_upper25_WBGT$date))
KFSI_quartile_test_upper25_WBGT$year <- year(ymd(KFSI_quartile_test_upper25_WBGT$date))

##Upper Quarter WBGT
KFSI_quartile_test_upper25_WBGT <- kendallSeasonalTrendTest(qs ~ month+year, data=KFSI_quartile_test_upper25_WBGT)
KFSI_quartile_test_upper25_WBGT$estimate
##returns a small but not 0 positive trend in temperature
```
##time series full year data tables
```{r}
##Build data frame with the trend information - WBGT
KFSI_Daily_TS_estimates.C <- KFSI_Daily_High.C_MannKendall$estimate%>%
  as.data.frame()
KFSI_Daily_TS_estimates.C$Low <- as.data.frame(KFSI_Daily_Low.C_MannKendall$estimate)
KFSI_Daily_TS_estimates.C$Avg <- as.data.frame(KFSI_Daily_Avg.C_MannKendall$estimate)
KFSI_Daily_TS_estimates.C$Upper <- as.data.frame(KFSI_quartile_test_upper25_MannKendall$estimate)
KFSI_Daily_TS_estimates.C$Lower <- as.data.frame(KFSI_quartile_test_lower25_MannKendall$estimate)
KFSI_Daily_TS_estimates.C$High <- as.data.frame(KFSI_Daily_High.C_MannKendall$estimate)
KFSI_Daily_TS_estimates.C[2:6]

##Build data frame with the trend information - WBGT
KFSI_Daily_TS_estimates_WBGT.C <- KFSI_Daily_High_WBGT.C_MannKendall$estimate%>%
  as.data.frame()
KFSI_Daily_TS_estimates_WBGT.C$Low_WBGT <- as.data.frame(KFSI_Daily_Low_WBGT.C_MannKendall$estimate)
KFSI_Daily_TS_estimates_WBGT.C$Avg_WBGT <- as.data.frame(KFSI_Daily_Avg_WBGT.C_MannKendall$estimate)
KFSI_Daily_TS_estimates_WBGT.C$Upper_WBGT <- as.data.frame(KFSI_quartile_test_upper25_WBGT$estimate)
KFSI_Daily_TS_estimates_WBGT.C$Lower_WBGT <- as.data.frame(KFSI_quartile_test_lower25_WBGT$estimate)
KFSI_Daily_TS_estimates_WBGT.C$High_WBGT <- as.data.frame(KFSI_Daily_High_WBGT.C_MannKendall$estimate)
KFSI_Daily_TS_estimates_WBGT.C[2:6]
```

##Summer season trends
```{r}
KFSI_warm_season_trends.C <- c(mean(KFSI_Daily_High.C_MannKendall$seasonal.estimates[5:9,2]),mean(KFSI_quartile_test_upper25_MannKendall$seasonal.estimates[5:9,2]),mean(KFSI_Daily_Avg.C_MannKendall$seasonal.estimates[5:9,2]),mean(KFSI_quartile_test_lower25_MannKendall$seasonal.estimates[5:9,2]),mean(KFSI_Daily_Low.C_MannKendall$seasonal.estimates[5:9,2]))
print(KFSI_warm_season_trends.C)

KFSI_warm_season_trends_WBGT.C <- c(mean(KFSI_Daily_High_WBGT.C_MannKendall$seasonal.estimates[5:9,2]),mean(KFSI_quartile_test_upper25_WBGT$seasonal.estimates[5:9,2]),mean(KFSI_Daily_Avg_WBGT.C_MannKendall$seasonal.estimates[5:9,2]),mean(KFSI_quartile_test_lower25_WBGT$seasonal.estimates[5:9,2]),mean(KFSI_Daily_Low_WBGT.C_MannKendall$seasonal.estimates[5:9,2]))
print(KFSI_warm_season_trends_WBGT.C)
```
##Finalize trend tables for export
```{r}
KFSI_full_year_trends.C <- as.data.frame(c(KFSI_Daily_High.C_MannKendall$estimate[2],KFSI_quartile_test_upper25_MannKendall$estimate[2],KFSI_Daily_Avg.C_MannKendall$estimate[2],KFSI_quartile_test_lower25_MannKendall$estimate[2],KFSI_Daily_Low.C_MannKendall$estimate[2]))
KFSI_full_year_trends.C$installation <- "FSOK"
KFSI_full_year_trends.C$Metric <- c("Maximum","Upper 75%","Mean","Lower 25%","Minimum")
colnames(KFSI_full_year_trends.C)[1] <- 'Temp.C'
print(KFSI_full_year_trends.C)

KFSI_full_year_trends_WBGT.C <- as.data.frame(c(KFSI_Daily_High_WBGT.C_MannKendall$estimate[2],KFSI_quartile_test_upper25_WBGT$estimate[2],KFSI_Daily_Avg_WBGT.C_MannKendall$estimate[2],KFSI_quartile_test_lower25_WBGT$estimate[2],KFSI_Daily_Low_WBGT.C_MannKendall$estimate[2]))
KFSI_full_year_trends.C$installation <- "FSOK"
KFSI_full_year_trends.C$Metric <- c("Maximum","Upper 75%","Mean","Lower 25%","Minimum")
colnames(KFSI_full_year_trends_WBGT.C)[1] <- 'WBGT.C'
print(KFSI_full_year_trends_WBGT.C)

```
##warm month season trends
```{r}
KFSI_warm_season_trends.C <- as.data.frame(c(mean(KFSI_Daily_High.C_MannKendall$seasonal.estimates[5:9,2]),mean(KFSI_quartile_test_upper25_MannKendall$seasonal.estimates[5:9,2]),mean(KFSI_Daily_Avg.C_MannKendall$seasonal.estimates[5:9,2]),mean(KFSI_quartile_test_lower25_MannKendall$seasonal.estimates[5:9,2]),mean(KFSI_Daily_Low.C_MannKendall$seasonal.estimates[5:9,2])))
KFSI_warm_season_trends.C$installation <- "FSOK"
KFSI_warm_season_trends.C$Metric <- c("Maximum","Upper 75%","Mean","Lower 25%","Minimum")
colnames(KFSI_warm_season_trends.C)[1] <- 'Temp.C_Warm.season'
print(KFSI_warm_season_trends.C)


KFSI_warm_season_trends_WBGT.C <- as.data.frame(c(mean(KFSI_Daily_High_WBGT.C_MannKendall$seasonal.estimates[5:9,2]),mean(KFSI_quartile_test_upper25_WBGT$seasonal.estimates[5:9,2]),mean(KFSI_Daily_Avg_WBGT.C_MannKendall$seasonal.estimates[5:9,2]),mean(KFSI_quartile_test_lower25_WBGT$seasonal.estimates[5:9,2]),mean(KFSI_Daily_Low_WBGT.C_MannKendall$seasonal.estimates[5:9,2])))
KFSI_warm_season_trends_WBGT.C$installation <- "FSOK"
KFSI_warm_season_trends_WBGT.C$Metric <- c("Maximum","Upper 75%","Mean","Lower 25%","Minimum")
colnames(KFSI_warm_season_trends_WBGT.C)[1] <- 'WBGT.C_Warm.season'
print(KFSI_warm_season_trends_WBGT.C)
```
##full installation trend table
```{r}
FSOK_Historical_Trends <- cbind(KFSI_full_year_trends.C,KFSI_full_year_trends_WBGT.C,KFSI_warm_season_trends.C,KFSI_warm_season_trends_WBGT.C)

FSOK_Historical_Trends <- FSOK_Historical_Trends%>%
  select(c(1:5,8))%>%
  select(installation,Metric,Temp.C,WBGT.C,Temp.C_Warm.season,WBGT.C_Warm.season)

#view(FSOK_Historical_Trends)
```


#KLAW (Adjacent weather station)
#### INITIAL DATA LOAD AND SET UP
```{r eval=FALSE}
KLAW <- read_csv("/Users/erikpatton/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/Raw Data/KLAW_Raw.csv")

KLAW <- KLAW %>%
  mutate('datetime' = make_datetime(year=Year, month = Month, day = Day, hour = Hour..UTC.))%>%
  pad()

#%>%
#  mutate('date' = make_date(year=Year, month = Month, day = Day))

KLAW <- KLAW%>%
  mutate('Temp.C' = ((Temperature..F.-32)*(5/9)))%>%
  mutate('Dewpoint.Temp.C' = (Dewpoint.Temperature..F.-32)*(5/9))%>%
  mutate('Heat.Index.C'=((Heat.Index..F.-32)*(5/9)))%>%
  mutate('WBGT.C'=((Derived.Wet.Bulb.Globe.Temperature..F.-32)*(5/9)))%>%
  rename('Temp.F' = Temperature..F.)%>%
  rename('Dewpoint.Temp.F'=Dewpoint.Temperature..F.)%>%
  rename('Heat.Index.F'=Heat.Index..F.)%>%
  rename("WBGT.F"=Derived.Wet.Bulb.Globe.Temperature..F.)%>%
  mutate("Hour"=hour(datetime))%>%
  mutate("Day"=day(datetime))%>%
  mutate("Month"=month(datetime))%>%
  mutate("Year"=year(datetime))%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(WBGT.C = zoo::na.approx(WBGT.C, maxgap=3))

##CREATE DECADE COLUMN
KLAW <- KLAW%>%
  mutate(Decade=floor(Year/10)*10)
```
## WRANGLE DATA & REPLACE WITH AVERAGE DATA VALUES
###DECADES
####1980
```{r eval=FALSE}
##1980s - no missing values
KLAW_80s_Hour_Avg <- KLAW%>%
  filter(Decade=="1980")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KLAW_80s <- KLAW%>%
  filter(Decade=="1980")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:43109))

KLAW_80s <- full_join(KLAW_80s,KLAW_80s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KLAW_90s)

KLAW_80s <- mutate(KLAW_80s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KLAW_80s <- mutate(KLAW_80s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KLAW_80s <- KLAW_80s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####1990
```{r eval=FALSE}
##1990s
KLAW_90s_Hour_Avg <- KLAW%>%
  filter(Decade=="1990")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KLAW_90s <- KLAW%>%
  filter(Decade=="1990")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

KLAW_90s <- full_join(KLAW_90s,KLAW_90s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KLAW_90s)

KLAW_90s <- mutate(KLAW_90s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KLAW_90s <- mutate(KLAW_90s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KLAW_90s <- KLAW_90s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####2000s 
```{r eval=FALSE}
##2000  ##Almost all the NAs are in 2002 and 2003, with 2001 having some and 2008 having a handful.
KLAW_00s_Hour_Avg <- KLAW%>%
  filter(Decade=="2000")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KLAW_00s <- KLAW%>%
  filter(Decade=="2000")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87672))

KLAW_00s <- full_join(KLAW_00s,KLAW_00s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KLAW_00s)

KLAW_00s <- mutate(KLAW_00s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KLAW_00s <- mutate(KLAW_00s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KLAW_00s <- KLAW_00s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
####2010
```{r eval=FALSE}
##2010
KLAW_10s_Hour_Avg <- KLAW%>%
  filter(Decade=="2010")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KLAW_10s <- KLAW%>%
  filter(Decade=="2010")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:87648))

KLAW_10s <- full_join(KLAW_10s,KLAW_10s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KLAW_10s)

KLAW_10s <- mutate(KLAW_10s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KLAW_10s <- mutate(KLAW_10s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KLAW_10s <- KLAW_10s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)

```

#### 2020 DECADE, EXTRAPOLATED OUT FOR 2020-2029
```{r eval=FALSE}
KLAW_20s_Hour_Avg <- KLAW%>%
  filter(Decade=="2020")%>% 
  group_by(Hour, Day, Month)%>%
  summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE) ##create a column with the average WBGT & Temp.C values for that day, ignoring missing data in calculation

KLAW_20s <- KLAW%>%
  filter(Decade=="2020")%>%
  #group_by(Hour, Day, Month, Year, Decade)%>%
  #summarise_at(c("WBGT.C", "Temp.C"), mean,na.rm=TRUE)%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:24096))

KLAW_20s <- full_join(KLAW_20s,KLAW_20s_Hour_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KLAW_20s)

KLAW_20s <- mutate(KLAW_20s, WBGT.C.x = case_when(
  is.na(WBGT.C.x) ~WBGT.C.y,
  TRUE~WBGT.C.x
))   ##Replaces WBGT NA values in 'x' column with the averaged from 'y' column

KLAW_20s <- mutate(KLAW_20s, Temp.C.x = case_when(
  is.na(Temp.C.x) ~Temp.C.y,
  TRUE~Temp.C.x
)) 

KLAW_20s <- KLAW_20s%>%
  select(Hour, Day, Month, Year, Decade, WBGT.C.x,Date,count, Temp.C.x)%>%
  rename(Ave_WBGT.C=WBGT.C.x)%>%
  rename(Ave_Temp.C=Temp.C.x)
```
###Combine all chunks to new KLAW data set with the interpolated values. 
```{r eval=FALSE}
KLAW_Wrangled <- rbind(KLAW_80s,KLAW_90s,KLAW_00s,KLAW_10s,KLAW_20s) %>%
  #rename(WBGT.C = Ave_WBGT.C)%>%
  rename(date=Date)
#sum(is.na(KLAW_Wrangled$Ave_Temp.C))  ##28 still missing
#sum(is.na(KLAW_Wrangled$Ave_WBGT.C))  ##22 still missing
```
##CREATE SINGLE DAY HIGHS AND LOWS
```{r eval=FALSE}
#Create single day high temp
KLAW_DAILY_HIGH.C <- KLAW_Wrangled%>%
  group_by(date)%>%
  dplyr::summarise(daily_high = max(Ave_Temp.C))%>%
  as.data.frame()
#Create single day low temp
KLAW_DAILY_LOW.C <- KLAW_Wrangled%>%
  group_by(date)%>%
  dplyr::summarise(daily_LOW = min(Ave_Temp.C))%>%
  as.data.frame()
#Create single day high WBGT temp
KLAW_DAILY_HIGH_WBGT.C <- KLAW_Wrangled%>%
  group_by(date)%>%
  dplyr::summarise(daily_high_WBGT = max(Ave_WBGT.C))%>%
  as.data.frame()
#Create single day low WBGT temp
```
### Look for missing dates in the complete data set
No missing dates!
```{r eval=FALSE}
FullSeq <- seq.Date(from = min(KLAW_Wrangled$date), to = max(KLAW_Wrangled$date), by = 1)
Missing <- FullSeq[!FullSeq %in% KLAW_Wrangled$date]
Missing
```
1965-1967  missing data gaps

###RED and BLACK Count
#### Start Red and Black Flag Day Analysis
```{r eval=FALSE}
KLAW_REDorABOVEbyYear <- KLAW_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Red = n)

KLAW_BLACKorABOVEbyYear <- KLAW_Wrangled%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Black = n)

##Plots hours Red or Above
KLAW_REDorABOVEbyYear.plot <- ggplot(KLAW_REDorABOVEbyYear, aes(x = Year, y=Hours_Above_Red))+
  geom_line()+
  geom_point()
```


##TIME SERIES
### Seasonal and trend plots
```{r eval=FALSE}
####TIME SERIES ANALYSIS ON THE DATASET
KLAW_Daily_High.C_Clean <- KLAW_DAILY_HIGH.C%>%
  mutate(Temp.C_Clean = zoo::na.approx(daily_high))%>%
  select(date,Temp.C_Clean)

KLAW_Daily_High.TS <- ts(KLAW_Daily_High.C_Clean$Temp.C_Clean, start = c(1985,01,01),frequency=365)
KLAW_Daily_High.decomp <- stl(KLAW_Daily_High.TS,s.window="periodic")
plot(KLAW_Daily_High.decomp)
KLAW_Daily_High_Trend <- Kendall::SeasonalMannKendall(KLAW_Daily_High.TS)
summary(KLAW_Daily_High_Trend)
##This returns a very small p=value, also a small tau, so we can state the trend is STRONGLY statistically significant even though the magnitude of the temperature increase is small (the tau value?).

##WBGT - INSUFFICIENT DATA IN EARLY YEARS
#summary(KLAW_DAILY_HIGH_WBGT.C)
#KLAW_Daily_High_WBGT.C_Clean <- KLAW_DAILY_HIGH_WBGT.C%>%
#  mutate(WBGT.C_Clean = zoo::na.approx(KLAW_DAILY_HIGH_WBGT.C$daily_high_WBGT))%>%
#  select(date,WBGT.C_Clean)
#summary(KLAW_Daily_High_WBGT.C_Clean)
##No more NAs

##Temp Trend using Mann-Kendall Seasonal Analysis
#KLAW_Daily_High_WBGT.TS <- ts(KLAW_Daily_High_WBGT.C_Clean$WBGT.C_Clean, start = c(1973,01,01),frequency=365)
#KLAW_Daily_High_WBGT.decomp <- stl(KLAW_Daily_High_WBGT.TS,s.window="periodic")
#plot(KLAW_Daily_High_WBGT.decomp)
#KLAW_Daily_High_WBGT_Trend <- Kendall::SeasonalMannKendall(KLAW_Daily_High_WBGT.TS)
#summary(KLAW_Daily_High_WBGT_Trend)
##This returns a very small p=value, also a small tau, so we can state the trend is STRONGLY statistically significant even though the magnitude of the temperature increase is small (the tau value).

##Daily LOW
KLAW_Daily_Low.C_Clean <- KLAW_DAILY_LOW.C%>%
  mutate(Temp.C_Clean = zoo::na.approx(daily_LOW))%>%
  select(date,Temp.C_Clean)

KLAW_Daily_Low.TS <- ts(KLAW_Daily_Low.C_Clean$Temp.C_Clean, start = c(1990,01,01),frequency=365)
KLAW_Daily_Low.decomp <- stl(KLAW_Daily_Low.TS,s.window="periodic")
plot(KLAW_Daily_Low.decomp)
KLAW_Daily_Low_Trend <- Kendall::SeasonalMannKendall(KLAW_Daily_Low.TS)
summary(KLAW_Daily_Low_Trend)
```
### Mann Kendall Seasonal Values
```{r eval=FALSE}
##MANUALLY RUN THE MANN KENDALL TEST - WBGT TEMPERATURE - MAX DAILY HIGH
#Set up month and year columns
#KLAW_Daily_High_WBGT.C_Clean$month <- month(ymd(KLAW_Daily_High_WBGT.C_Clean$date))
#KLAW_Daily_High_WBGT.C_Clean$year <- year(ymd(KLAW_Daily_High_WBGT.C_Clean$date))

#KLAW_Daily_High_WBGT.C_MannKendall <- kendallSeasonalTrendTest(WBGT.C_Clean ~ month+year, data=KLAW_Daily_High_WBGT.C_Clean)
#return just tau, slope, and intercept
#KLAW_Daily_High_WBGT.C_MannKendall$estimate

##MANN KENDALL TEST - HIGH TEMPERATURE - MAX DAILY HIGH
#Set up month and year columns
KLAW_Daily_High.C_Clean$month <- month(ymd(KLAW_Daily_High.C_Clean$date))
KLAW_Daily_High.C_Clean$year <- year(ymd(KLAW_Daily_High.C_Clean$date))

KLAW_Daily_High.C_MannKendall <- kendallSeasonalTrendTest(Temp.C_Clean ~ month+year, data=KLAW_Daily_High.C_Clean)
#return just tau, slope, and intercept
KLAW_Daily_High.C_MannKendall$estimate


##MANN KENDALL TEST - LOW TEMPERATURE - MIN DAILY LOW
KLAW_Daily_Low.C_Clean$month <- month(ymd(KLAW_Daily_Low.C_Clean$date))
KLAW_Daily_Low.C_Clean$year <- year(ymd(KLAW_Daily_Low.C_Clean$date))

KLAW_Daily_Low.C_MannKendall <- kendallSeasonalTrendTest(Temp.C_Clean ~ month+year, data=KLAW_Daily_Low.C_Clean)
#return just tau, slope, and intercept
KLAW_Daily_Low.C_MannKendall$estimate
```


# KFSI vs KLAW COMPARISON
```{r eval=FALSE}
##CONDUCT A CORRELATION TEST BETWEEN THE TWO SITES TO SEE IF THEY MOVE IN THE SAME DIRECTION (AT LEAST STATISTICALLY)
##IF THEY DO, WE CAN SAY THAT THE TWO SITES ARE SHOWING THE SAME GENERAL CLIMATE TRENDS ACROSS YEARS
##NULL HYPOTHESIS IS THEY DO NOT MOVE IN THE SAME DIRECTION, AND THEREFORE WE HAVE LOWER CONFIDENCE IN USING THEM
KLAW_WBGT_REDorABOVE_Hourly_byYear <- KLAW_Wrangled%>%
  filter(date>='1985-01-01')%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Red = n)
KFSI_WBGT_REDorABOVE_Hourly_byYear <- KFSI_Wrangled%>%
  filter(date>='1985-01-01')%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(Year)%>%
  count(Year)%>%
  rename(Hours_Above_Red = n)
##CHECK FOR NORMAL DISTRIBUTION TO THE POINTS
##Shapiro test, if P>0.05, then it IS normally distributed. In this case, hourly summary of KLAW is not normally distributed.
shapiro.test(KLAW_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red)
shapiro.test(KFSI_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red)
##Not normally distributed, so should not sure Pearson's correlation tests

#ggplot(KLAW_WBGT_REDorABOVE_Hourly_byYear)+
#  geom_histogram(aes(y=KLAW_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red))
##Visualize normal distribution

#FSOK_Combined%>%select(.,KFSI_RedorAbove_Hourly,KLAW_RedorAbove_Hourly)%>%map(~fBasics::dagoTest(.))
##Omnibus test <0.05 would indicate that the set is NOT normally distributed.

##USE KENDALL RANK CORRELATION TEST - can be used if not from normal distribution
cor.test(KLAW_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red,KFSI_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red,method="kendall")
##Shows positive correlation between the 2 sites (tau=0.279) and p-value < 0.05 (0.004) ; implies correlation

##USE SPEARMAN RANK CORRELATION COEFFICIENT - can be used if data is not norma;
cor.test(KLAW_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red,KFSI_WBGT_REDorABOVE_Hourly_byYear$Hours_Above_Red,method="spearman")
##Shows rho = 0.416 and p-value <0.05 ; implies correlation
##All tests return positive correlation and low p-values, including tests robust to non-normal 
```
# KFSI ANALYSIS AND PLOT CREATION
```{r}
####FULL_JOIN ALL DECADES
Avg_Hourly_Decade <- full_join(KFSI_60s,KFSI_70s,by="count")%>%
  rename(Hour_1960=Hour.x,Day.1960=Day.x,Month.1960=Month.x,Year.1960=Year.x,Decade.1960=Decade.x,Ave_WBGT.C.1960=Ave_WBGT.C.x,Date.1960=Date.x)%>%
  rename(Hour_1970=Hour.y,Day.1970=Day.y,Month.1970=Month.y,Year.1970=Year.y,Decade.1970=Decade.y,Ave_WBGT.C.1970=Ave_WBGT.C.y,Date.1970=Date.y)
Avg_Hourly_Decade <-   full_join(Avg_Hourly_Decade,KFSI_80s,by="count")
Avg_Hourly_Decade <- full_join(Avg_Hourly_Decade,KFSI_90s,by="count")%>%
  rename(Hour_1980=Hour.x,Day.1980=Day.x,Month.1980=Month.x,Year.1980=Year.x,Decade.1980=Decade.x,Ave_WBGT.C.1980=Ave_WBGT.C.x,Date.1980=Date.x)%>%
  rename(Hour_1990=Hour.y,Day.1990=Day.y,Month.1990=Month.y,Year.1990=Year.y,Decade.1990=Decade.y,Ave_WBGT.C.1990=Ave_WBGT.C.y,Date.1990=Date.y)
Avg_Hourly_Decade <- full_join(Avg_Hourly_Decade,KFSI_00s,by="count")
Avg_Hourly_Decade <- full_join(Avg_Hourly_Decade,KFSI_10s,by="count")%>%
  rename(Hour_2000=Hour.x,Day.2000=Day.x,Month.2000=Month.x,Year.2000=Year.x,Decade.2000=Decade.x,Ave_WBGT.C.2000=Ave_WBGT.C.x,Date.2000=Date.x)%>%
  rename(Hour_2010=Hour.y,Day.2010=Day.y,Month.2010=Month.y,Year.2010=Year.y,Decade.2010=Decade.y,Ave_WBGT.C.2010=Ave_WBGT.C.y,Date.2010=Date.y)
Avg_Hourly_Decade <- full_join(Avg_Hourly_Decade,KFSI_20s,by="count")%>%
  rename(Hour_2020=Hour,Day.2020=Day,Month.2020=Month,Year.2020=Year,Decade.2020=Decade,Ave_WBGT.C.2020=Ave_WBGT.C,Date.2020=Date)

#Avg_Hourly_Decade$Month.1960 <- paste0("1960s",Avg_Hourly_Decade$Month.1960)
Avg_Hourly_Decade$Month.1970 <- paste0("1970s",Avg_Hourly_Decade$Month.1970)
Avg_Hourly_Decade$Month.1980 <- paste0("1980s",Avg_Hourly_Decade$Month.1980)
Avg_Hourly_Decade$Month.1990 <- paste0("1990s",Avg_Hourly_Decade$Month.1990)
Avg_Hourly_Decade$Month.2000 <- paste0("2000s",Avg_Hourly_Decade$Month.2000)
Avg_Hourly_Decade$Month.2010 <- paste0("2010s",Avg_Hourly_Decade$Month.2010)
Avg_Hourly_Decade$Month.2020 <- paste0("2020s",Avg_Hourly_Decade$Month.2020)

KFSI_Avg_Summer_Hourly_Decade <- Avg_Hourly_Decade%>%
  filter(between(Month.1960,5,9))     ##Filters for just the summer months
#view(Avg_Summer_Hourly_Decade)
KFSI_Thirteen_to_Nineteen <- Avg_Hourly_Decade%>%
  filter(Year.2010>2012)%>%
  select(Month.1960,Hour_2010:Ave_Temp.C.y.y.y)%>%
  rename(Hour=Hour_2010,Day=Day.2010,Month=Month.2010,Year=Year.2010,Decade=Decade.2010,Ave_WBGT.C=Ave_WBGT.C.2010,Date=Date.2010,Ave_Temp.C=Ave_Temp.C.y.y.y)
KFSI_Zero_to_Two <- Avg_Hourly_Decade%>%
  filter(Year.2020>0)%>%
  select(Month.1960,Hour_2020:Ave_Temp.C)%>%
  rename(Hour=Hour_2020,Day=Day.2020,Month=Month.2020,Year=Year.2020,Decade=Decade.2020,Ave_WBGT.C=Ave_WBGT.C.2020,Date=Date.2020,Ave_Temp.C=Ave_Temp.C)
KFSI_Thirteen_to_Two <- rbind(KFSI_Thirteen_to_Nineteen,KFSI_Zero_to_Two)
KFSI_Thirteen_to_Two <- KFSI_Thirteen_to_Two%>%
  select(Hour,Month.1960,Hour,Ave_WBGT.C:Ave_Temp.C)%>%
  rename(Ave_WBGT_last10 = Ave_WBGT.C ,Ave_Temp_last10=Ave_Temp.C,Month_last10= Month.1960 )%>%
  filter(between(Month_last10,5,9))
#%>%add_row(Hour = 21:23, Month_last10=9,Ave_WBGT_last10=NA,Date=NA,Ave_Temp_last10=NA)

KFSI_Avg_Summer_Hourly_Decade <- cbind(KFSI_Avg_Summer_Hourly_Decade,KFSI_Thirteen_to_Two)
```

####DENSITY PLOT AND HISTORGRAMS BY HOURLY AVERAGE BY DECADE
###DENSITY PLOT
````{r}
KFSI_Summer_DensityPlot_decades <- ggplot(KFSI_Avg_Summer_Hourly_Decade)+
  geom_density(aes(x=Ave_WBGT.C.1960,color='white'),linetype=2,linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.1970,color='purple'),linetype=2,linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.1980,color='red'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.1990,color='blue'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.2000, color='yellow'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.2010,color='green'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT.C.2020,color='black'),linewidth=0.3)+
  geom_density(aes(x=Ave_WBGT_last10 ,color='pink'),alpha=0.7,linewidth=1)+
  scale_color_discrete(name="Hourly WBGT \n Grouped by decade",
                         breaks=c("white","purple","red", "blue", "yellow","green","black","pink"),
                         labels=c("1960s","1970s","1980s", "1990s", "2000s","2010s","2020s","Last 10 Years"))+
  xlab("WBGT")+
  scale_x_continuous(limits = c(20,38),expand=c(0,0))+
  ggtitle("May - September Hourly WBGT Density",subtitle="Ft Sill, OK")+
#  xlim(20,38)+
  theme_light()
KFSI_Summer_DensityPlot_decades
## 2000s is included here without modification to missing data points because this is a density plot, so sample size matters less, assuming the proporation of WBGT readings are accurate.

```
####ANOMALY PLOT SUMMER MONTHS - DAYTIME
```{r}
##SET UP DATA
KFSI_Anom_Avg <- KFSI_Wrangled%>%
  filter(between(Year,1960,2022), between(Month,5,9))%>% 
  group_by(Hour, Day, Month)%>%
  dplyr::summarise(Ave_WBGT.C = mean(Ave_WBGT.C,na.rm=TRUE)) ##create a column with the average WBGT values for that day, ignoring missing data in calculation

KFSI_Anomaly <- KFSI_Wrangled%>%
  filter(between(Year,1960,2022),between(Hour,8,20), between(Month,5,9))%>%
  group_by(Hour, Day, Month, Year, Decade)%>%
  dplyr::summarise(Ave_WBGT.C = mean(Ave_WBGT.C))%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:125307))

KFSI_Anomaly <- full_join(KFSI_Anomaly,KFSI_Anom_Avg,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KFSI_00s)

KFSI_Anomaly <- mutate(KFSI_Anomaly, Ave_WBGT.C.x = case_when(
  is.na(Ave_WBGT.C.x) ~Ave_WBGT.C.y,
  TRUE~Ave_WBGT.C.x
))   ##Replaces NA values in 'x' column with the averaged from 'y' column

##START ACTUAL PLOT
KFSI_Anomaly <- KFSI_Anomaly%>%
  select(Hour,Day,Month,Year,Ave_WBGT.C.x,Date,count)%>%
  rename(Ave_WBGT.C=Ave_WBGT.C.x)
KFSI_Ref <- KFSI_Anomaly%>%
  filter(between(Year,1990,2020))%>%
  summarise(mean=mean(Ave_WBGT.C))
  
KFSI_Anomaly <- KFSI_Anomaly%>%
  group_by(Year)%>%
  dplyr::summarise(Avg_Year_WBGT = mean(Ave_WBGT.C))%>%
  mutate(Anomaly = Avg_Year_WBGT-KFSI_Ref$mean)
  #mutate(Date = make_date(year=Year,month=Month))

#KFSI_Anomaly <- KFSI_Anomaly %>% arrange(ymd(KFSI_Anomaly$Date)) ##puts DF in date order

#KFSI_Anomaly$count <- c(1:nrow(KFSI_Anomaly)) ##assigns date ordered number to DF

KFSI_Monthly_Anomaly.plot <- ggplot(KFSI_Anomaly,aes(x=Year,y=Anomaly))+
  geom_line()+
  geom_smooth()+
  xlab("Year")+
  ylab("Monthly Average WBGT Anomaly\nDegrees C")+
  ggtitle("Yearly average WBGT anomaly, 1960-2022",subtitle="Ft Sill, OK")+
  labs(caption = "Reference period = Average WBGT, 1990-2020")+
  theme_classic()+
  geom_hline(yintercept=0,linetype="dashed")
  #annotate("text",x=2010,y=-1.5,label=("Reference period = Average WBGT, 1990-2020"),size=2)+
```
### Compare anomaly from KFSI to anomaly from NOAA
https://www.ncei.noaa.gov/access/monitoring/climate-at-a-glance/divisional/time-series/0904/tavg/ann/5/1960-2022?base_prd=true&begbaseyear=1990&endbaseyear=2020
```{r}
##SET UP DATA
##manually load in the regional anomaly trend
Avg_Temp_Anom <-  read.csv ('/Users/erikpatton/Library/Mobile Documents/com~apple~CloudDocs/R Files/JAN 2023 Data Projects/NOAA Climate Anomaly Data.csv')
SWOK_Avg_Temp_Anom <- Avg_Temp_Anom$Anomaly.SW.OK
SWOK_Avg_Temp_Anom <- SWOK_Avg_Temp_Anom[1:62]

KFSI_Anom_Avg.C <- KFSI_Wrangled%>%
  filter(between(Year,1960,2021))%>% 
  group_by(Hour, Day, Month)%>%
  dplyr::summarise(Ave_Temp.C = mean(Ave_Temp.C,na.rm=TRUE)) ##create a column with the average WBGT values for that day, ignoring missing data in calculation

KFSI_Anomaly.C <- KFSI_Wrangled%>%
  filter(between(Year,1960,2021))%>%
  group_by(Hour, Day, Month, Year, Decade)%>%
  dplyr::summarise(Ave_Temp.C = mean(Ave_Temp.C))%>%
  as.data.frame()%>%
  mutate(Date=make_date(day=Day,month=Month,year=Year))%>%
  mutate(count=c(1:543504))

KFSI_Anomaly.C <- full_join(KFSI_Anomaly.C,KFSI_Anom_Avg.C,by=c("Hour" = "Hour", "Day" = "Day", "Month" = "Month")) ##Adds the averaged column in 
#view(KFSI_00s)

KFSI_Anomaly.C <- mutate(KFSI_Anomaly.C, Ave_Temp.C.x = case_when(
  is.na(Ave_Temp.C.x) ~Ave_Temp.C.y,
  TRUE~Ave_Temp.C.x
))   ##Replaces NA values in 'x' column with the averaged from 'y' column

##START ACTUAL PLOT
KFSI_Anomaly.C <- KFSI_Anomaly.C%>%
  select(Hour,Day,Month,Year,Ave_Temp.C.x,Date,count)%>%
  rename(Ave_Temp.C=Ave_Temp.C.x)
KFSI_Ref.C <- KFSI_Anomaly.C%>%
  filter(between(Year,1990,2020))%>%
  summarise(mean=mean(Ave_Temp.C))
  
KFSI_Anomaly.C <- KFSI_Anomaly.C%>%
  group_by(Year)%>%
  dplyr::summarise(Avg_Year_Temp = mean(Ave_Temp.C))%>%
  mutate(Anomaly = Avg_Year_Temp-KFSI_Ref.C$mean)
  #mutate(Date = make_date(year=Year,month=Month))

#KFSI_Anomaly <- KFSI_Anomaly %>% arrange(ymd(KFSI_Anomaly$Date)) ##puts DF in date order

#KFSI_Anomaly$count <- c(1:nrow(KFSI_Anomaly)) ##assigns date ordered number to DF

KFSI_Anomaly.C$Regional_Anom <- SWOK_Avg_Temp_Anom[1:62]
KFSI_Anomaly.C$Regional_Anom <- as.numeric(KFSI_Anomaly.C$Regional_Anom)

KFSI_Monthly_Anomaly.C.plot <- ggplot(KFSI_Anomaly.C)+
  geom_line(aes(x=Year,y=Anomaly,color="Blue"))+
  geom_line(aes(x=Year,y=Regional_Anom,color="red"))+
  xlab("Year")+
  scale_x_continuous(breaks = c(1960,1970,1980,1990,2000,2010,2020),expand=c(0,0))+
  ylab("Average Anomaly\nDegrees C")+
  ggtitle("Yearly average temperature anomaly\nFt Sill, OK vs South-West Oklahoma Average")+
  theme_classic()+
 # theme(axis.text.y=element_text(margin=margin(r=0)))+
  geom_hline(yintercept=0,linetype="dashed")+
   scale_color_identity(name="Location",breaks=c("Blue","red"),
                        labels=c("FSOK","Regional Average\n(NOAA)"),guide="legend")+
  labs(caption = "Reference period = Yearly average temp, 1990-2020")
  #annotate("text",x=2009,y=-2,label=("Reference period = Average Temperature, 1990-2020"),size=2)+

KFSI_Monthly_Anomaly.C.plot

FSOKxRegional.cor <-  cor.test(KFSI_Anomaly.C$Regional_Anom ,KFSI_Anomaly.C$Anomaly,method = "pearson")
FSOKxRegional.cor$estimate
FSOKxRegional.cor$p.value
FSOKxRegional.cor$conf.int
```

####HISTOGRAMS FULL year data used
```{r}
Summer_Histogram_1970s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.1980),color='red', fill="red",alpha=0.4,position="identity",bins=50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("1970-1979")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()  
Summer_Histogram_1980s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.1980),color='red', fill="red",alpha=0.4,position="identity",bins=50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("1980-1989")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()  
Summer_Histogram_1990s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.1990),color='red', fill="red",alpha=0.4,position="identity",bins = 50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("1990-1999")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()  
Summer_Histogram_2000s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.2000),color='red', fill="red",alpha=0.4,position="identity",bins=50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("2000-2009")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()  
Summer_Histogram_2010s <- ggplot(Avg_Hourly_Decade)+
  geom_histogram(aes(x=Ave_WBGT.C.2010),color='red', fill="red",alpha=0.4,position="identity",bins=50)+
  ylab("Hours")+
  xlab("WBGT")+
  ggtitle("2010-2019")+
  geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[4],color="black")+
    geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[3],color="red",linetype=2)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[2],color="yellow",linetype=5)+
      geom_vline(xintercept=Heat_Categories$Category_Temp_Min.C[1],color="green",linetype=6)+
  #xlim(27,36)+
  scale_x_continuous(breaks=c(27.8,29.4,31.1,32.2),limits = c(27,36),labels = c("27.8"="GREEN FLAG\n 27.8","29.4"="YELLOW FLAG\n29.4","31.1"="RED FLAG \n31.1","32.2"="BLACK FLAG\n32.2"))+
  theme_classic()
```
####BAR CHARTS (Full Year data used)
```{r}
##CREATE VECTORS OF HOURS ABOVE A CATEGORY
Green <- c(sum(KFSI_70s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,sum(KFSI_80s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,sum(KFSI_90s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,sum(KFSI_00s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,sum(KFSI_10s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/10,sum(KFSI_20s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1])/2.75)
Yellow <- c(sum(KFSI_70s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/10,sum(KFSI_80s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/10,sum(KFSI_90s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/10,sum(KFSI_00s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/10,sum(KFSI_10s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/10,sum(KFSI_20s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2])/2.75)
Red <- c(sum(KFSI_70s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/10,sum(KFSI_80s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/10,sum(KFSI_90s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/10,sum(KFSI_00s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/10,sum(KFSI_10s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/10,sum(KFSI_20s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])/2.75)
Black <- c(sum(KFSI_70s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,sum(KFSI_80s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,sum(KFSI_90s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,sum(KFSI_00s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,sum(KFSI_10s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/10,sum(KFSI_20s$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])/2.75)
#/10 in order to get single year avg

KFSI_Hours_Flags <- as.data.frame(cbind(c("1970s","1980s","1990s","2000s","2010s","2020s"),Green,Yellow,Red,Black),stringsAsFactors = FALSE)

KFSI_Hours_Flags <-  KFSI_Hours_Flags%>%
  pivot_longer(!V1,names_to = "Heat_Category",values_to = "Hours")%>%
  as.data.frame()
KFSI_Hours_Flags$Hours <- as.integer(KFSI_Hours_Flags$Hours)
#view(KFSI_Hours_Flags)

##CREATE GRAPH
KFSI_Threshold_Barplot <- ggplot(data=KFSI_Hours_Flags, aes(x=Heat_Category,y=Hours,fill=V1)) +
  geom_bar(stat="identity",position=position_dodge())+
  geom_text(aes(label=Hours), position=position_dodge(width=0.9),hjust=-0.25,size=3)+
  scale_x_discrete(limits=c("Green","Yellow","Red","Black"))+
  ylab("Average Hours per Year")+
  xlab("Heat Category")+
  theme_classic()+
  scale_fill_brewer(name="Decade",palette = "Spectral",direction=-1,guide = guide_legend(reverse = TRUE))+
  coord_flip()+
  #annotate("text",x=4,y=150,label=c("Hours above Black threshold, average per Year \n2020s : 96 (projected)\n2010s : 124 \n2000s : 74\n1990s : 50 \n1980s : 121 "),hjust=0,size=2)+
ggtitle("Average Hours at Each Heat Catergory per Year",subtitle="Ft Sill, OK")
KFSI_Threshold_Barplot
```
#### COMBINED PLOTS
```{r}
#bottom_row <- plot_grid(Summer_Histogram_2000s,Summer_Histogram_2010s,nrow=1)
#Middle_row <- plot_grid(Summer_Histogram_1980s,Summer_Histogram_1990s,nrow=1)
#top_row <- plot_grid(Summer_DensityPlot_decades, KFSI_Monthly_Anomaly.plot,nrow=2)
Summer_Months_KFSI.plot <- plot_grid(KFSI_Summer_DensityPlot_decades, KFSI_Monthly_Anomaly.plot, KFSI_Threshold_Barplot,nrow=3)
Summer_Months_KFSI.plot <- ggdraw(add_sub(Summer_Months_KFSI.plot,"Data gaps =< 3 hours interpolated. Longer gaps replaced with decade average for missing Hour, Day \n 2020s density plot for 2020-2022 only; Bar chart extrapolates full 2020s decade",size=8))
Summer_Months_KFSI.plot
```

#BCT Analysis
##Load Data
FSOK BCT Wrangle
```{r}
#write.csv(FSOK_BCT_23,file="~/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/ATRRS Data/FT Sill ATRRS Pull/FSOK_BCT_23.csv")
FSOK_BCT_23 <- read.csv("~/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/ATRRS Data/FT Sill ATRRS Pull/FSOK_BCT_23.csv",stringsAsFactors = FALSE)

FSOK_BCT_23 <- FSOK_BCT_23%>%
  mutate_all(.funs = funs(str_trim))%>%
  select(Class,X.Report.Date,X.Start.Date,X.End.Date,X.Capacity)

FSOK_BCT_23 <-  na.omit(FSOK_BCT_23)

FSOK_BCT_23$Class <-  as.numeric(FSOK_BCT_23$Class)
FSOK_BCT_23$X.Capacity <- as.numeric(FSOK_BCT_23$X.Capacity)
FSOK_BCT_23$X.Report.Date <-  dmy(FSOK_BCT_23$X.Report.Date)
FSOK_BCT_23$X.Start.Date <-  dmy(FSOK_BCT_23$X.Start.Date)
FSOK_BCT_23$X.End.Date <- dmy(FSOK_BCT_23$X.End.Date)

FSOK_BCT_23 <- FSOK_BCT_23%>%
  rename(Report_Date = X.Report.Date, Start_Date=X.Start.Date,End_Date=X.End.Date)%>%
  mutate(Julian_Start=yday(Start_Date),Julian_End=yday(End_Date))

FSOK_BCT_23 <- FSOK_BCT_23[apply(FSOK_BCT_23!=0,1,all),] #remove any row with a numerical '0' to get rid of classes with 0 capacity

#write.csv(FSOK_BCT_23,file="~/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/ATRRS Data/FT Sill ATRRS Pull/FSOK_BCT_23_Wrangled.csv")
```
##KFSI BCT
##Assign clean history to KFSI data 
```{r}
FSOK_BCT_23$Start_Month <- month(ymd(FSOK_BCT_23$Start_Date))
FSOK_BCT_23$End_Month <- month(ymd(FSOK_BCT_23$End_Date))
FSOK_BCT_23$Start_Day <- day(ymd(FSOK_BCT_23$Start_Date))
FSOK_BCT_23$End_Day <- day(ymd(FSOK_BCT_23$End_Date))

FSOK_BCT_23$Days_Long <- FSOK_BCT_23$End_Date-FSOK_BCT_23$Start_Date #counts number of days in each class

```
##Get the last 40 and last 30 year data frames
```{r}
##code below to get average hourly temp and WBGT for last fourty years for full data (not 2022 or 2023 due to lack of data)
###Averaging out the values left alone, per conversation with Luke
KFSI_Hourly_Avg.last40 <- KFSI_Wrangled%>%
  filter(date>="1982-01-01"&date<="2021-12-31")%>%
  mutate(Julian=yday(date))%>%
  group_by(Hour,Julian)

KFSI_Hourly_Avg.last30 <- KFSI_Wrangled%>%
  filter(date>="1992-10-01"&date<="2022-09-30")%>%
  mutate(Julian=yday(date))%>%
  group_by(Hour,Julian)
#  dplyr::summarize(Avg_Hourly_Low_WBGT.C=min(Ave_WBGT.C),Avg_Hourly_Avg_WBGT.C=mean(Ave_WBGT.C),Avg_Hourly_High_WBGT.C=max(Ave_WBGT.C),Avg_Hourly_Low_Temp.C=min(Ave_Temp.C),Avg_Hourly_Avg_Temp.C=mean(Ave_Temp.C),Avg_Hourly_High_Temp.C=max(Ave_Temp.C))
```
##Filter 
```{r}
##Get ATRRS Data and re-wrangled
#FSOK_BCT_23_Wrangled <-  read.csv("/Users/erikpatton/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/ATRRS Data/FT Sill ATRRS Pull/FSOK_BCT_23_Wrangled.csv")
FSOK_BCT_23_Wrangled <- FSOK_BCT_23
FSOK_BCT_23_Wrangled$Start_Date <-  ymd(FSOK_BCT_23_Wrangled$Start_Date)
FSOK_BCT_23_Wrangled$End_Date <- ymd(FSOK_BCT_23_Wrangled$End_Date)

FSOK_BCT_23_Wrangled <- FSOK_BCT_23_Wrangled%>%
  filter(between(Start_Date,"2023-03-01","2023-12-31"))
```
##Filter with ATRRS Data for course dates
```{r}
FSOK_BCT_23_Wrangled$Count <- c(1:nrow(FSOK_BCT_23_Wrangled)) #add a counter column for following loop

##Loop to count average hours in each flag condition by cohort.
for (i in FSOK_BCT_23_Wrangled$Count){
  Flags <- KFSI_Hourly_Avg.last30%>%
  filter(between(Julian,FSOK_BCT_23_Wrangled$Julian_Start[i],FSOK_BCT_23_Wrangled$Julian_End[i]))
  ##divide the below by 20 because there are 20 years in this filtered data set
  FSOK_BCT_23_Wrangled$NoFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C<Heat_Categories$Category_Temp_Min.C[1])) /(30))
FSOK_BCT_23_Wrangled$GreenFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[1]&Flags$Ave_WBGT.C<Heat_Categories$Category_Temp_Min.C[2])) /(30))
FSOK_BCT_23_Wrangled$YellowFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[2]&Flags$Ave_WBGT.C<Heat_Categories$Category_Temp_Min.C[3])) /(30))
FSOK_BCT_23_Wrangled$RedFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3]&Flags$Ave_WBGT.C<Heat_Categories$Category_Temp_Min.C[4])) /(30))
FSOK_BCT_23_Wrangled$BlackFlags_Avg[i] <-  as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4])) /(30))
FSOK_BCT_23_Wrangled$Dangerous_Heat_Hours_Avg[i] <- as.numeric(length(which(Flags$Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])) /(30))
}

##Loop to count up total hours from NoFlag:BlackFlag as a check that all hours are accounted for
for (i in FSOK_BCT_23_Wrangled$Count){
  FSOK_BCT_23_Wrangled$TotalCourseHours[i] <- FSOK_BCT_23_Wrangled$NoFlags_Avg[i]+FSOK_BCT_23_Wrangled$GreenFlags_Avg[i]+FSOK_BCT_23_Wrangled$YellowFlags_Avg[i]+FSOK_BCT_23_Wrangled$RedFlags_Avg[i]+FSOK_BCT_23_Wrangled$BlackFlags_Avg[i]
}
#view(FSOK_BCT_23_Wrangled)
```

##Calculate the number of days at or preceeded by a Red or Black Flag Day
```{r}
##Filtering by Red or Above will include Black days - "Hazardous Days"
KFSI_AboveRed <- KFSI_Hourly_Avg.last40%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Above_Red = n)%>%
  mutate(Day_Above_Red =ifelse(Day_Above_Red == '',1,1))

KFSI_AboveGreen <- KFSI_Hourly_Avg.last40%>%
  subset(between(Ave_WBGT.C,Heat_Categories$Category_Temp_Min.C[1],Heat_Categories$Category_Temp_Max.C[2]))%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Green_or_Yellow = n)%>%
  mutate(Day_Green_or_Yellow =ifelse(Day_Green_or_Yellow == '',1,1))


#Days just in Red category Last 30
KFSI_Cat4 <- KFSI_Hourly_Avg.last30%>%
  subset(between(Ave_WBGT.C,Heat_Categories$Category_Temp_Min.C[3],Heat_Categories$Category_Temp_Min.C[4]))%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Cat4 = n)%>%
  mutate(Day_Cat4 =ifelse(Day_Cat4 == '',1,1))%>%
  mutate(Julian=yday(date))
#Days just in Black category Last 30
KFSI_Cat5 <- KFSI_Hourly_Avg.last30%>%
  subset(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[4] )%>%
  group_by(date)%>%
  count(date)%>%
  rename(Day_Cat5 = n)%>%
  mutate(Day_Cat5 =ifelse(Day_Cat5 == '',1,1))%>%
  mutate(Julian=yday(date))
```
##Create a list of Green and Yellow days that follow Red or Black Days - last 40 years
```{r}
##Create a df that has all days at/above red OR days at/above green following a red+ day
KFSI_Flag_Days.last40 <- full_join(KFSI_AboveRed.last40,KFSI_AboveGreen.last40,by="date")
KFSI_Flag_Days.last40 <- KFSI_Flag_Days.last40%>%
  select(date,Day_Above_Red,Day_Green_or_Yellow,Julian.y,year.y)%>%
    mutate(Julian.y=yday(date))%>%
    mutate(year.y=lubridate::year(date))
KFSI_Flag_Days.last40 <- replace(KFSI_Flag_Days.last40,is.na(KFSI_Flag_Days.last40),0)

KFSI_Flag_Days.last40 <-  KFSI_Flag_Days.last40%>%arrange(ymd(KFSI_Flag_Days.last40$date)) ##this line gets the df in date order for the following loop
#KFSI_Flag_Days.last40$count <- c(1:nrow(KFSI_Flag_Days.last40))

KFSI_Flag_Days.last40$Lagged <- lag(KFSI_Flag_Days.last40$Day_Above_Red>0,1) ##create a column identifying days after RED or BLACK

##Create the average occurrence of "caution" day
KFSI_Caution_Days.last40 <- KFSI_Flag_Days.last40%>%
  filter(Lagged==TRUE | Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Caution Days" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Caution Days",Julian,date)
KFSI_Caution_Days.last40$Avg_Count_Caution_Days <- KFSI_Caution_Days.last40$`Caution Days`/40

##Create the average occurance of heat cat 4-5 "high heat" days
KFSI_High_Heat_Days.last40 <- KFSI_Flag_Days.last40%>%
filter(Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename(High_Heat_Days = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select(High_Heat_Days,Julian,date)
  
KFSI_High_Heat_Days.last40$High_Heat_Days <- KFSI_High_Heat_Days.last40$High_Heat_Days/40

#"Caution Days" include both high heat days and caitiopn days
KFSI_Caution_Days.last40 <- full_join(KFSI_Caution_Days.last40,KFSI_High_Heat_Days.last40,by="Julian")%>%
  select(High_Heat_Days,Avg_Count_Caution_Days,Julian,date.x)%>%
  rename(date=date.x)

KFSI_Caution_Days.last40 <- replace(KFSI_Caution_Days.last40,is.na(KFSI_Caution_Days.last40),0)
```
##Create a data frame of Red and Black Flag Days - last 30
```{r}
##Create a df that has all days at RED and all days at BLACK
KFSI_RedorBlack_Flag_Days.last30 <- full_join(KFSI_Cat4,KFSI_Cat5,by="date")
KFSI_RedorBlack_Flag_Days.last30 <- KFSI_RedorBlack_Flag_Days.last30%>%
  select(date,Day_Cat4,Day_Cat5)%>%
    mutate(Julian=yday(date))%>%
    mutate(year.y=lubridate::year(date))
KFSI_RedorBlack_Flag_Days.last30 <- replace(KFSI_RedorBlack_Flag_Days.last30,is.na(KFSI_RedorBlack_Flag_Days.last30),0)
```
##Create a list of Green and Yellow days that follow Red or Black Days - last 30 years
```{r}
##Create last 30 years of flag days
KFSI_Flag_Days.last30 <- KFSI_Flag_Days.last40%>%
  filter(between(date,"1992-10-01","2022-09-30"))

KFSI_Caution_Days.last30 <- KFSI_Flag_Days.last30%>%
  filter(Lagged==TRUE | Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Caution Days" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Caution Days",Julian,date)
KFSI_Caution_Days.last30$Avg_Count_Caution_Days <- KFSI_Caution_Days.last30$`Caution Days`/30

##Create the average occurance of heat cat 4-5 "high heat" days
KFSI_High_Heat_Days.last30<- KFSI_Flag_Days.last30%>%
filter(Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename(High_Heat_Days = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select(High_Heat_Days,Julian,date)
  
KFSI_High_Heat_Days.last30$High_Heat_Days <- KFSI_High_Heat_Days.last30$High_Heat_Days/30

KFSI_Caution_Days.last30 <- full_join(KFSI_Caution_Days.last30,KFSI_High_Heat_Days.last30,by="Julian")%>%
  select(High_Heat_Days,Avg_Count_Caution_Days,Julian,date.x)%>%
  rename(date=date.x)

KFSI_Caution_Days.last30 <- replace(KFSI_Caution_Days.last30,is.na(KFSI_Caution_Days.last30),0)
```
##Create a list of Green and Yellow days that follow Red or Black Days - last 10 years
```{r}
##Create last 10 years of flag days
KFSI_Flag_Days.last10 <- KFSI_Flag_Days.last40%>%
  filter(between(date,"2012-10-01","2022-09-30"))

KFSI_Caution_Days.last10 <- KFSI_Flag_Days.last10%>%
  filter(Lagged==TRUE | Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Caution Days" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Caution Days",Julian,date)
KFSI_Caution_Days.last10$Avg_Count_Caution_Days <- KFSI_Caution_Days.last10$`Caution Days`/10

##Create the average occurance of heat cat 4-5 "high heat" days
KFSI_High_Heat_Days.last10<- KFSI_Flag_Days.last10%>%
filter(Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename(High_Heat_Days = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select(High_Heat_Days,Julian,date)
  
KFSI_High_Heat_Days.last10$High_Heat_Days <- KFSI_High_Heat_Days.last10$High_Heat_Days/10

KFSI_Caution_Days.last10 <- full_join(KFSI_Caution_Days.last10,KFSI_High_Heat_Days.last10,by="Julian")%>%
  select(High_Heat_Days,Avg_Count_Caution_Days,Julian,date.x)%>%
  rename(date=date.x)

KFSI_Caution_Days.last10 <- replace(KFSI_Caution_Days.last10,is.na(KFSI_Caution_Days.last10),0)

```

##Plot the training cycle vs red and black flag days (Dangerous Heat per Class)
```{r}
FSOK_heat_hours.plot <-  ggplot (FSOK_BCT_23_Wrangled,aes(x=Start_Date,y=Dangerous_Heat_Hours_Avg))+
  geom_segment(aes(x=Start_Date,xend=Start_Date,y=0,yend=Dangerous_Heat_Hours_Avg))+
  geom_point(aes(size=Dangerous_Heat_Hours_Avg,fill=Dangerous_Heat_Hours_Avg),color="red",fill=alpha("red",0.3),alpha=0.7,shape=21,stroke=2)+scale_size(range=c(0.01,7),name="Hours of Hazardous\nHeat per Class")+
    theme_light() +
  geom_text(aes(label=round(Dangerous_Heat_Hours_Avg,0)), position=position_dodge(width=0.9),vjust=-2,hjust=0,size=3)+
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x = element_text(angle=30)
  )+
  scale_x_date(date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week",limits=as.Date(c("2023-03-01","2023-10-01")))+
  geom_text(aes(label=Class,y=-3), position=position_dodge(width=0.9),hjust=0,size=4,fontface='bold')+
  xlab("FSOK BCT Class Number (FY23)\nLine corresponds to class start date")+ylab("'Red' or 'Black' Hours per Class")+
  ggtitle("Average (mean) hours of hazardous heat by basic training class",subtitle="Ft Sill, OK")+
  labs(caption = "Hours averaged from 30 year period (October 1992 - September 2021).\nClass dates from FY 2023 basic combat training schedule")
FSOK_heat_hours.plot
```

###PLOT COURSE NUMBER AND THE LIKLIHOOD THAT THE DAY IS DANGEROUS ON SAME GRAPH
```{r}
ggplot()+
   #geom_line(data=KFSI_Caution_Days.last40,aes(x=date,y=Avg_Count_Caution_Days*250))+
  geom_smooth(data=KFSI_Caution_Days.last30,aes(x=date,y=Avg_Count_Caution_Days*200))+
  scale_y_continuous(limits=c(0,200), name="Average Hazardous Heat Hours per Class",expand=c(0,0),sec.axis = sec_axis( trans=~./200, name="Probability of Cautionary Heat Day"))+
  ##second axis is divided by 200, since to plot the geom_smooth line we multiplied by 250
  geom_segment(data=FSOK_BCT_23_Wrangled,aes(x=Start_Date,xend=End_Date,y=Dangerous_Heat_Hours_Avg,yend=Dangerous_Heat_Hours_Avg))+
  geom_text(data=FSOK_BCT_23_Wrangled,aes(x=(Start_Date),y=Dangerous_Heat_Hours_Avg,label=round(Class,0)), position=position_dodge(width=0.9),vjust=-0.1,hjust=1.5,size=5,fontface='bold')+
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x = element_text(angle=30)
  )+
  scale_x_date(date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week")+
  xlab("FSOK BCT Class Number (FY23 example)")+
  ggtitle("Hours Hazardous Heat for Basic Training and probability of experiencing a cautionary heat day\nbased on FY 2023 course schedule",subtitle="Ft Sill, OK")+
  theme_light()
```

###Rank days by high heat and by heat wave probability
```{r}
KFSI_Caution_Days.last40.sorted <- KFSI_Caution_Days.last40%>%
  mutate(High_Heat_Ranked=rank(-High_Heat_Days))%>%
  mutate(Avg_Count_Caution_Days_Ranked=rank(-Avg_Count_Caution_Days))

KFSI_Caution_Days.last40.sorted <- KFSI_Caution_Days.last40.sorted%>%
  arrange(KFSI_Caution_Days.last40.sorted$High_Heat_Ranked,KFSI_Caution_Days.last40.sorted$Avg_Count_Caution_Days_Ranked)%>%
  mutate(newcol=High_Heat_Ranked[]+Avg_Count_Caution_Days_Ranked)%>%
  mutate(ranking=c(1:nrow(KFSI_Caution_Days.last40.sorted)))%>%
  mutate(ranking_rescaled=rescale(-ranking))
##this ranking is based on the following - probability of a high heat day, probability of a cautionary heat day, and then earlier in the year. scale is used to transform the individual day to somethign that can be plotted on a % axis.

#view(KFSI_Caution_Days.last40.sorted)
```

##Prior 30 year hazardous and caution probability
```{r}
KFSI_Flag_Days.1982.2011 <- KFSI_Flag_Days.last40%>%
  filter(between(date,"1982-01-01","2011-12-31"))

##Create the average occurrence of "caution" day

KFSI_Caution_Days.prev30 <- KFSI_Flag_Days.1982.2011%>%
  filter(Lagged==TRUE | Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Caution Days" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Caution Days",Julian,date)
KFSI_Caution_Days.prev30$Avg_Count_Caution_Days <- KFSI_Caution_Days.prev30$`Caution Days`/30

##Create the average occurance of heat cat 4-5 "hazardous heat" days
KFSI_High_Heat_Days.prev30<- KFSI_Flag_Days.1982.2011%>%
filter(Day_Above_Red==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename(High_Heat_Days = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select(High_Heat_Days,Julian,date)
  
KFSI_High_Heat_Days.prev30$High_Heat_Days <- KFSI_High_Heat_Days.prev30$High_Heat_Days/30

KFSI_Caution_Days.prev30 <- full_join(KFSI_Caution_Days.prev30,KFSI_High_Heat_Days.prev30,by="Julian")%>%
  select(High_Heat_Days,Avg_Count_Caution_Days,Julian,date.x)%>%
  rename(date=date.x)

KFSI_Caution_Days.prev30 <- replace(KFSI_Caution_Days.prev30,is.na(KFSI_Caution_Days.prev30),0)
```
###Plot the likelihood that a day is a 'Cautionary' or 'Hazardous' Heat Day
```{r}
##Cautionary Days
KFSI_Likelihood_cautionary_days.plot <- ggplot(KFSI_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=Avg_Count_Caution_Days,color='blue'),se=TRUE,fill='blue',alpha=0.3)+
    geom_smooth(data=KFSI_Caution_Days.last10,aes(x=date,y=Avg_Count_Caution_Days,color='purple'),se=TRUE,fill='purple',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("purple","blue"),labels=c("Last 10-year Avg","Previous 30-year Avg\n(1982-2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing Cautionary Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='Purple', size=16))+
  #annotate("text",label="5% increase in likelihood of cautionary days in the last week of July",size=6,x=KFSI_Caution_Days.prev30$date[45] ,y=.9)+
#  geom_point(data=KFSI_Caution_Days.last40.sorted[1:10,],aes(x=date,y=ranking_rescaled))+
  #geom_text(data=KFSI_Caution_Days.last40.sorted[1:10,],aes(x=date,y=ranking_rescaled-.9,label=ranking))+
  ggtitle("Cautionary heat daily probability",subtitle="Ft Sill, OK")
  #geom_line(data=KFSI_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'))
KFSI_Likelihood_cautionary_days.plot

##Hazardous Days
KFSI_Likelihood_hazardous_days.plot <- ggplot(KFSI_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=High_Heat_Days,color='orange'),se=TRUE,fill='orange',alpha=0.3)+
  geom_smooth(data=KFSI_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'),se=TRUE,fill='red',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("red","orange"),labels=c("Last 10-year Avg","Previous 30-year Avg\n(1982-2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing a Hazardous Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='Red', size=16))+
  #annotate("text",label="~10% increase in likelihood of hazardous days in the last week of July",size=6,x=KFSI_Caution_Days.last10$date[40] ,y=.9)+
  ggtitle("Hazardous heat (category 4 or 5) daily probability",subtitle="Ft Sill, OK")
  #geom_line(data=KFSI_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'))
KFSI_Likelihood_hazardous_days.plot

```

## [NEW] Last 30-year Cat 4 or Cat 5
```{r}
##Create the average occurrence of "cat 4" day
KFSI_Cat4_Days.last30 <- KFSI_RedorBlack_Flag_Days.last30%>%
  filter(Day_Cat4==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Heat Cat 4" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Heat Cat 4",Julian,date)
KFSI_Cat4_Days.last30$`Heat Cat 4` <- KFSI_Cat4_Days.last30$`Heat Cat 4`/30
KFSI_Cat4_Days.last30 <- KFSI_Cat4_Days.last30%>%
  rename(Likelihood_Cat4 = `Heat Cat 4`)

##Create the average occurrence of "cat 5" day
KFSI_Cat5_Days.last30 <- KFSI_RedorBlack_Flag_Days.last30%>%
  filter(Day_Cat5==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Heat Cat 5" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Heat Cat 5",Julian,date)
KFSI_Cat5_Days.last30$`Heat Cat 5` <- KFSI_Cat5_Days.last30$`Heat Cat 5`/30
KFSI_Cat5_Days.last30 <- KFSI_Cat5_Days.last30%>%
  rename(Likelihood_Cat5 = `Heat Cat 5`)

##Create the average occurrence of "hazardous heat" day
KFSI_Hazardous_Days.last30 <- KFSI_RedorBlack_Flag_Days.last30%>%
  filter(Day_Cat5==1 || Day_Cat4==1)%>%
  mutate(day=day(date),month=month(date))%>%
  group_by(month,day)%>%
  count(day)%>%
  rename("Hazardous Heat" = n)%>%
  mutate(date=make_date(year=2023,month=month,day=day))%>%
  mutate(Julian=yday(date))%>%
  select("Hazardous Heat",Julian,date)
KFSI_Hazardous_Days.last30$`Hazardous Heat` <- KFSI_Hazardous_Days.last30$`Hazardous Heat`/30
KFSI_Hazardous_Days.last30 <- KFSI_Hazardous_Days.last30%>%
  rename(Likelihood_Hazardous_Heat = `Hazardous Heat`)

KFSI_Likelihood_Cat4xCat5.last30 <- full_join(KFSI_Cat4_Days.last30,KFSI_Cat5_Days.last30,by="Julian")%>%
  select(Likelihood_Cat4,Likelihood_Cat5,Julian,date.x)%>%
  rename(date=date.x)

KFSI_Likelihood_Cat4xCat5.last30 <- full_join(KFSI_Likelihood_Cat4xCat5.last30,KFSI_Hazardous_Days.last30,by="Julian")%>%
  select(Likelihood_Cat4,Likelihood_Cat5,Likelihood_Hazardous_Heat,Julian,date.x)%>%
  rename(date=date.x)

KFSI_Likelihood_Cat4xCat5.last30 <- replace(KFSI_Likelihood_Cat4xCat5.last30,is.na(KFSI_Likelihood_Cat4xCat5.last30),0)
```

### [NEW] Plot the likelihood that a day is Cat4 or Cat5
```{r}
##Heat Cat 4 and Heat Cat 5, last 30
KFSI_Likelihood_Cat4xCat5.last30.plot <- ggplot(KFSI_Likelihood_Cat4xCat5.last30)+
  geom_smooth(aes(x=date,y=Likelihood_Cat4,color='orange'),se=TRUE,fill='orange',alpha=0.4)+
    geom_smooth(aes(x=date,y=Likelihood_Cat5,color='red'),se=TRUE,fill='red',alpha=0.4)+
  scale_color_identity(name="Likelihood w/ 95%\nconfidence interval",breaks=c("orange","red"),labels=c("Heat Category 4 (Red flag)","Heat Category 5 (Black flag)"),guide="legend")+
  scale_y_continuous(limits=c(0,.8), labels=scales::percent,breaks=seq(0,1,.1) ,name="Chances a day experiences heat category 4 or 5",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="light grey", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='Black', size=16))+
  ggtitle("Daily probability of experiencing hazardous heat",subtitle="Ft Sill, OK\n(Oct 1992 - Sept 2022)")
  #geom_line(data=KFSI_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'))
KFSI_Likelihood_Cat4xCat5.last30.plot
```

##25th-75th Quartiles 
```{r}
KFSI_Daily_Values.C <- KFSI_Daily_Values.C%>%
  mutate(day=day(date))%>%
  mutate(Julian=yday(date))
KFSI_Daily_Values.C.previous30 <- KFSI_Daily_Values.C%>%
  filter(between(date,"1982-01-01","2011-12-31"))
KFSI_Daily_Values.C.last10 <- KFSI_Daily_Values.C%>%
  filter(between(date,"2012-10-01","2022-09-30"))
```
###- Previous 30 years - Find Quantiles and create data frame
```{r}
##fidn the quartile for each day
##WBGT

###dail avg wbgt
KFSI_Daily_Values.C.previous30.quantile.daily_avg_WBGT <-  do.call("rbind",
        tapply(KFSI_Daily_Values.C.previous30$daily_avg_WBGT,
               KFSI_Daily_Values.C.previous30$Julian,
               quantile))
KFSI_Daily_Values.C.previous30.quantile.daily_avg_WBGT <- KFSI_Daily_Values.C.previous30.quantile.daily_avg_WBGT%>%
  as.data.frame(KFSI_Daily_Values.C.previous30.quantile.daily_avg_WBGT)
##create a new data frame
KFSI_Daily_Values.C.previous30.quantile.daily_avg_WBGT$Julian <- c(1:nrow(KFSI_Daily_Values.C.previous30.quantile.daily_avg_WBGT))
KFSI_Daily_Values.C.previous30.quantile.daily_avg_WBGT$date <- 
  as.Date(KFSI_Daily_Values.C.previous30.quantile.daily_avg_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KFSI_Daily_Values.C.previous30.quantile.daily_avg_WBGT)

KFSI_Daily_Values.C.previous30.quantile.daily_high_WBGT <-  do.call("rbind",
        tapply(KFSI_Daily_Values.C.previous30$daily_high_WBGT,
               KFSI_Daily_Values.C.previous30$Julian,
               quantile))
KFSI_Daily_Values.C.previous30.quantile.daily_high_WBGT <- KFSI_Daily_Values.C.previous30.quantile.daily_high_WBGT%>%
  as.data.frame(KFSI_Daily_Values.C.previous30.quantile.daily_high_WBGT)
##create a new data frame
KFSI_Daily_Values.C.previous30.quantile.daily_high_WBGT$Julian <- c(1:nrow(KFSI_Daily_Values.C.previous30.quantile.daily_high_WBGT))
KFSI_Daily_Values.C.previous30.quantile.daily_high_WBGT$date <- 
  as.Date(KFSI_Daily_Values.C.previous30.quantile.daily_high_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KFSI_Daily_Values.C.previous30.quantile.daily_high_WBGT)

KFSI_Daily_Values.C.previous30.quantile.daily_low_WBGT <-  do.call("rbind",
        tapply(KFSI_Daily_Values.C.previous30$daily_low_WBGT,
               KFSI_Daily_Values.C.previous30$Julian,
               quantile))
KFSI_Daily_Values.C.previous30.quantile.daily_low_WBGT <- KFSI_Daily_Values.C.previous30.quantile.daily_low_WBGT%>%
  as.data.frame(KFSI_Daily_Values.C.previous30.quantile.daily_low_WBGT)
##create a new data frame
KFSI_Daily_Values.C.previous30.quantile.daily_low_WBGT$Julian <- c(1:nrow(KFSI_Daily_Values.C.previous30.quantile.daily_low_WBGT))
KFSI_Daily_Values.C.previous30.quantile.daily_low_WBGT$date <- 
  as.Date(KFSI_Daily_Values.C.previous30.quantile.daily_low_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KFSI_Daily_Values.C.previous30.quantile.daily_low_WBGT)

###temperature
KFSI_Daily_Values.C.previous30.quantile.daily_avg <-  do.call("rbind",
        tapply(KFSI_Daily_Values.C.previous30$daily_avg,
               KFSI_Daily_Values.C.previous30$Julian,
               quantile))
KFSI_Daily_Values.C.previous30.quantile.daily_avg <- KFSI_Daily_Values.C.previous30.quantile.daily_avg%>%
  as.data.frame(KFSI_Daily_Values.C.previous30.quantile.daily_avg)
##create a new data frame
KFSI_Daily_Values.C.previous30.quantile.daily_avg$Julian <- c(1:nrow(KFSI_Daily_Values.C.previous30.quantile.daily_avg))
KFSI_Daily_Values.C.previous30.quantile.daily_avg$date <- 
  as.Date(KFSI_Daily_Values.C.previous30.quantile.daily_avg$Julian, origin=as.Date("2022-12-31"))
#view(KFSI_Daily_Values.C.previous30.quantile.daily_avg)

KFSI_Daily_Values.C.previous30.quantile.daily_high <-  do.call("rbind",
        tapply(KFSI_Daily_Values.C.previous30$daily_high,
               KFSI_Daily_Values.C.previous30$Julian,
               quantile))
KFSI_Daily_Values.C.previous30.quantile.daily_high <- KFSI_Daily_Values.C.previous30.quantile.daily_high%>%
  as.data.frame(KFSI_Daily_Values.C.previous30.quantile.daily_high)
##create a new data frame
KFSI_Daily_Values.C.previous30.quantile.daily_high$Julian <- c(1:nrow(KFSI_Daily_Values.C.previous30.quantile.daily_high))
KFSI_Daily_Values.C.previous30.quantile.daily_high$date <- 
  as.Date(KFSI_Daily_Values.C.previous30.quantile.daily_high$Julian, origin=as.Date("2022-12-31"))
#view(KFSI_Daily_Values.C.previous30.quantile.daily_high)

KFSI_Daily_Values.C.previous30.quantile.daily_low <-  do.call("rbind",
        tapply(KFSI_Daily_Values.C.previous30$daily_low,
               KFSI_Daily_Values.C.previous30$Julian,
               quantile))
KFSI_Daily_Values.C.previous30.quantile.daily_low <- KFSI_Daily_Values.C.previous30.quantile.daily_low%>%
  as.data.frame(KFSI_Daily_Values.C.previous30.quantile.daily_low)
##create a new data frame
KFSI_Daily_Values.C.previous30.quantile.daily_low$Julian <- c(1:nrow(KFSI_Daily_Values.C.previous30.quantile.daily_low))
KFSI_Daily_Values.C.previous30.quantile.daily_low$date <- 
  as.Date(KFSI_Daily_Values.C.previous30.quantile.daily_low$Julian, origin=as.Date("2022-12-31"))
```
###- Last 10 years - Find Quantiles and create data frame
```{r}
##fidn the quartile for each day
##WBGT

###dail avg wbgt
KFSI_Daily_Values.C.last10.quantile.daily_avg_WBGT <-  do.call("rbind",
        tapply(KFSI_Daily_Values.C.last10$daily_avg_WBGT,
               KFSI_Daily_Values.C.last10$Julian,
               quantile))
KFSI_Daily_Values.C.last10.quantile.daily_avg_WBGT <- KFSI_Daily_Values.C.last10.quantile.daily_avg_WBGT%>%
  as.data.frame(KFSI_Daily_Values.C.last10.quantile.daily_avg_WBGT)
##create a new data frame
KFSI_Daily_Values.C.last10.quantile.daily_avg_WBGT$Julian <- c(1:nrow(KFSI_Daily_Values.C.last10.quantile.daily_avg_WBGT))
KFSI_Daily_Values.C.last10.quantile.daily_avg_WBGT$date <- 
  as.Date(KFSI_Daily_Values.C.last10.quantile.daily_avg_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KFSI_Daily_Values.C.last10.quantile.daily_avg_WBGT)

KFSI_Daily_Values.C.last10.quantile.daily_high_WBGT <-  do.call("rbind",
        tapply(KFSI_Daily_Values.C.last10$daily_high_WBGT,
               KFSI_Daily_Values.C.last10$Julian,
               quantile))
KFSI_Daily_Values.C.last10.quantile.daily_high_WBGT <- KFSI_Daily_Values.C.last10.quantile.daily_high_WBGT%>%
  as.data.frame(KFSI_Daily_Values.C.last10.quantile.daily_high_WBGT)
##create a new data frame
KFSI_Daily_Values.C.last10.quantile.daily_high_WBGT$Julian <- c(1:nrow(KFSI_Daily_Values.C.last10.quantile.daily_high_WBGT))
KFSI_Daily_Values.C.last10.quantile.daily_high_WBGT$date <- 
  as.Date(KFSI_Daily_Values.C.last10.quantile.daily_high_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KFSI_Daily_Values.C.last10.quantile.daily_high_WBGT)

KFSI_Daily_Values.C.last10.quantile.daily_low_WBGT <-  do.call("rbind",
        tapply(KFSI_Daily_Values.C.last10$daily_low_WBGT,
               KFSI_Daily_Values.C.last10$Julian,
               quantile))
KFSI_Daily_Values.C.last10.quantile.daily_low_WBGT <- KFSI_Daily_Values.C.last10.quantile.daily_low_WBGT%>%
  as.data.frame(KFSI_Daily_Values.C.last10.quantile.daily_low_WBGT)
##create a new data frame
KFSI_Daily_Values.C.last10.quantile.daily_low_WBGT$Julian <- c(1:nrow(KFSI_Daily_Values.C.last10.quantile.daily_low_WBGT))
KFSI_Daily_Values.C.last10.quantile.daily_low_WBGT$date <- 
  as.Date(KFSI_Daily_Values.C.last10.quantile.daily_low_WBGT$Julian, origin=as.Date("2022-12-31"))
#view(KFSI_Daily_Values.C.last10.quantile.daily_low_WBGT)

###temperature
KFSI_Daily_Values.C.last10.quantile.daily_avg <-  do.call("rbind",
        tapply(KFSI_Daily_Values.C.last10$daily_avg,
               KFSI_Daily_Values.C.last10$Julian,
               quantile))
KFSI_Daily_Values.C.last10.quantile.daily_avg <- KFSI_Daily_Values.C.last10.quantile.daily_avg%>%
  as.data.frame(KFSI_Daily_Values.C.last10.quantile.daily_avg)
##create a new data frame
KFSI_Daily_Values.C.last10.quantile.daily_avg$Julian <- c(1:nrow(KFSI_Daily_Values.C.last10.quantile.daily_avg))
KFSI_Daily_Values.C.last10.quantile.daily_avg$date <- 
  as.Date(KFSI_Daily_Values.C.last10.quantile.daily_avg$Julian, origin=as.Date("2022-12-31"))
#view(KFSI_Daily_Values.C.last10.quantile.daily_avg)

KFSI_Daily_Values.C.last10.quantile.daily_high <-  do.call("rbind",
        tapply(KFSI_Daily_Values.C.last10$daily_high,
               KFSI_Daily_Values.C.last10$Julian,
               quantile))
KFSI_Daily_Values.C.last10.quantile.daily_high <- KFSI_Daily_Values.C.last10.quantile.daily_high%>%
  as.data.frame(KFSI_Daily_Values.C.last10.quantile.daily_high)
##create a new data frame
KFSI_Daily_Values.C.last10.quantile.daily_high$Julian <- c(1:nrow(KFSI_Daily_Values.C.last10.quantile.daily_high))
KFSI_Daily_Values.C.last10.quantile.daily_high$date <- 
  as.Date(KFSI_Daily_Values.C.last10.quantile.daily_high$Julian, origin=as.Date("2022-12-31"))
#view(KFSI_Daily_Values.C.last10.quantile.daily_high)

KFSI_Daily_Values.C.last10.quantile.daily_low <-  do.call("rbind",
        tapply(KFSI_Daily_Values.C.last10$daily_low,
               KFSI_Daily_Values.C.last10$Julian,
               quantile))
KFSI_Daily_Values.C.last10.quantile.daily_low <- KFSI_Daily_Values.C.last10.quantile.daily_low%>%
  as.data.frame(KFSI_Daily_Values.C.last10.quantile.daily_low)
##create a new data frame
KFSI_Daily_Values.C.last10.quantile.daily_low$Julian <- c(1:nrow(KFSI_Daily_Values.C.last10.quantile.daily_low))
KFSI_Daily_Values.C.last10.quantile.daily_low$date <- 
  as.Date(KFSI_Daily_Values.C.last10.quantile.daily_low$Julian, origin=as.Date("2022-12-31"))
#view(KFSI_Daily_Values.C.last10.quantile.daily_low)
```
###Plot the quartiles
```{r}
KFSI_Quantiles_high_WBGT.plot <- ggplot()+
  geom_ribbon(data=KFSI_Daily_Values.C.previous30.quantile.daily_high_WBGT, aes(x=date,ymin=`25%`,ymax=`75%`,fill='orange'),color='grey',alpha=0.4)+
    geom_ribbon(data=KFSI_Daily_Values.C.last10.quantile.daily_high_WBGT, aes(x=date,ymin=`25%`,ymax=`75%`,fill='red'),color='red',alpha=0.6)+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+theme(axis.text.x = element_text(angle = 30),plot.title=element_text(family='', face='bold', colour='Red', size=16))+
  scale_y_continuous(limits=c(8,35), name="WBGT")+
  geom_ribbon(data=KFSI_Daily_Values.C.previous30.quantile.daily_low_WBGT, aes(x=date,ymin=`25%`,ymax=`75%`,fill='blue'),color='grey',alpha=0.4)+
    geom_ribbon(data=KFSI_Daily_Values.C.last10.quantile.daily_low_WBGT, aes(x=date,ymin=`25%`,ymax=`75%`,fill='purple'),color='purple',alpha=0.6)+
  scale_fill_identity(name="25th-75th Quartiles",breaks=c("red","orange","purple","blue"),labels=c("Last 10-year\nmaximum","Previous 30-year\nmaximum","Last 10-year\nminimum","Previous 30-year\n minimum"),guide="legend")+
  ggtitle("Heat Season Maximum and Minimum WBGT",subtitle="Ft Sill, OK")
KFSI_Quantiles_high_WBGT.plot
```


#Plots Figures and Tables
##Anomaly Plots
```{r}
Combined_Anomaly.C.plot <- plot_grid(KLSF_Monthly_Anomaly.C.plot,KMMT_Monthly_Anomaly.C.plot,KFSI_Monthly_Anomaly.C.plot,KTBN_Monthly_Anomaly.C.plot)
Combined_Anomaly.C.plot
```

```{r}
Combined_Anomaly.plot <- plot_grid(KLSF_Monthly_Anomaly.plot,KMMT_Monthly_Anomaly.plot,KFSI_Monthly_Anomaly.plot,KTBN_Monthly_Anomaly.plot)
Combined_Anomaly.plot
```

```{r}
Combined_Threshold.plot <- plot_grid(KLSF_Threshold_Barplot,KMMT_Threshold_Barplot,KFSI_Threshold_Barplot,KTBN_Threshold_Barplot)
Combined_Threshold.plot
```

```{r}
Combined_Density.plot <- plot_grid(KLSF_Summer_DensityPlot_decades,KMMT_Summer_DensityPlot_decades,KFSI_Summer_DensityPlot_decades,KTBN_Summer_DensityPlot_decades)
Combined_Density.plot
```

##Combined Heat Hours per Class 2x2 (Lollipop Plot)
```{r}
##FBGA
FBGA_heat_hours.plot.combined <-  ggplot (FBGA_BCT_23_Wrangled,aes(x=Start_Date,y=Dangerous_Heat_Hours_Avg))+
  geom_segment(aes(x=Start_Date,xend=Start_Date,y=0,yend=Dangerous_Heat_Hours_Avg))+
  geom_point(aes(size=Dangerous_Heat_Hours_Avg,fill=Dangerous_Heat_Hours_Avg),color="red",fill=alpha("red",0.3),alpha=0.7,shape=21,stroke=2)+scale_size(range=c(0.01,7),name="Ft Moore")+
    theme_light() +
  geom_text(aes(label=round(Dangerous_Heat_Hours_Avg,0)), position=position_dodge(width=0.9),vjust=-2,hjust=0.5,size=3)+
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x = element_text(angle=30),
    axis.title.y = element_blank(),
    axis.title.x = element_blank(),
    legend.position = "none",
    axis.text=element_text(size=14),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01))+
  scale_x_date(date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week",limits=as.Date(c("2023-03-01","2023-10-01")))+
  geom_text(aes(label=Class,y=-3), position=position_dodge(width=0.9),hjust=0,size=2,fontface='bold')+
  xlab("FBGA BCT Class Number")+
  ggtitle("Ft Moore, GA")+
  ylim(-3,200)

##FLW
FLW_heat_hours.plot.combined <-  ggplot (FLW_BCT_23_Wrangled,aes(x=Start_Date,y=Dangerous_Heat_Hours_Avg))+
  geom_segment(aes(x=Start_Date,xend=Start_Date,y=0,yend=Dangerous_Heat_Hours_Avg))+
  geom_point(aes(size=Dangerous_Heat_Hours_Avg,fill=Dangerous_Heat_Hours_Avg),color="red",fill=alpha("red",0.3),alpha=0.7,shape=21,stroke=2)+scale_size(range=c(0.01,7),name="Ft Leonard Wood")+
    theme_light() +
  geom_text(aes(label=round(Dangerous_Heat_Hours_Avg,0)), position=position_dodge(width=0.9),vjust=-4,hjust=0,size=3)+
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x = element_text(angle=30),
    axis.title.x = element_blank(),
      axis.title.y = element_blank(),
    legend.position = "none",
    axis.text=element_text(size=14),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01)
  )+
  scale_x_date(date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week",limits=as.Date(c("2023-03-01","2023-10-01")))+
  geom_text(aes(label=Class,y=-3), position=position_dodge(width=0.9),hjust=0,size=2,fontface='bold',axis.text=element_text(size=14))+
  ggtitle("Ft Leonard Wood, MO")+
  ylim(-3,200)+
  xlab("FLW BCT Class Number")+
  ylab("Hours at heat category 4 or 5")

##FSOK
FSOK_heat_hours.plot.combined <-  ggplot (FSOK_BCT_23_Wrangled,aes(x=Start_Date,y=Dangerous_Heat_Hours_Avg))+
  geom_segment(aes(x=Start_Date,xend=Start_Date,y=0,yend=Dangerous_Heat_Hours_Avg))+
  geom_point(aes(size=Dangerous_Heat_Hours_Avg,fill=Dangerous_Heat_Hours_Avg),color="red",fill=alpha("red",0.3),alpha=0.7,shape=21,stroke=2)+scale_size(range=c(0.01,7),name="Ft Sill")+
    theme_light() +
  geom_text(aes(label=round(Dangerous_Heat_Hours_Avg,0)), position=position_dodge(width=0.9),vjust=-4,hjust=0,size=3)+
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x = element_text(angle=30),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),legend.position = "none",
    axis.text=element_text(size=14),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01)
  )+
  scale_x_date(date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week",limits=as.Date(c("2023-03-01","2023-10-01")))+
  geom_text(aes(label=Class,y=-3), position=position_dodge(width=0.9),hjust=0,size=2,fontface='bold')+
  ggtitle("Ft Sill, OK")+
  ylim(-3,200)+
  xlab("FSOK BCT Class Number")+
  ylab("Hours at heat category 4 or 5")

##FJSC
FJSC_heat_hours.plot.combined <-  ggplot (FJSC_BCT_23_Wrangled,aes(x=Start_Date,y=Dangerous_Heat_Hours_Avg))+
  geom_segment(aes(x=Start_Date,xend=Start_Date,y=0,yend=Dangerous_Heat_Hours_Avg))+
  geom_point(aes(size=Dangerous_Heat_Hours_Avg,fill=Dangerous_Heat_Hours_Avg),color="red",fill=alpha("red",0.3),alpha=0.7,shape=21,stroke=2)+scale_size(range=c(0.01,7),name="Hours per Class")+
    theme_light() +
  geom_text(aes(label=round(Dangerous_Heat_Hours_Avg,0)), position=position_dodge(width=0.9),vjust=-4,hjust=0,size=3)+
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x = element_text(angle=30),axis.title.y = element_blank(),
    axis.title.x = element_blank(),
    axis.text=element_text(size=14),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01)
  )+
  scale_x_date(date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week",limits=as.Date(c("2023-03-01","2023-10-01")))+
  geom_text(aes(label=Class,y=-3), position=position_dodge(width=0.9),hjust=0,size=2,fontface='bold')+
  xlab("FJSC BCT Class Number")+
  ggtitle("Ft Jackson, SC")+
  ylim(-3,200)

legend <- get_legend(FJSC_heat_hours.plot.combined)

##FJSC again, removing legend
FJSC_heat_hours.plot.combined <-  ggplot (FJSC_BCT_23_Wrangled,aes(x=Start_Date,y=Dangerous_Heat_Hours_Avg))+
  geom_segment(aes(x=Start_Date,xend=Start_Date,y=0,yend=Dangerous_Heat_Hours_Avg))+
  geom_point(aes(size=Dangerous_Heat_Hours_Avg,fill=Dangerous_Heat_Hours_Avg),color="red",fill=alpha("red",0.3),alpha=0.7,shape=21,stroke=2)+scale_size(range=c(0.01,7),name="Ft Jackson")+
    theme_light() +
  geom_text(aes(label=round(Dangerous_Heat_Hours_Avg,0)), position=position_dodge(width=0.9),vjust=-4,hjust=0,size=3)+
  theme(
    panel.grid.major.x = element_blank(),
    panel.border = element_blank(),
    axis.ticks.x = element_blank(),
    axis.text.x = element_text(angle=30),
    axis.title.y = element_blank(),
    axis.title.x = element_blank(),
    legend.position = "none",
    axis.text=element_text(size=14),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01)
  )+
  scale_x_date(date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week",limits=as.Date(c("2023-03-01","2023-10-01")))+
  geom_text(aes(label=Class,y=-3), position=position_dodge(width=0.9),hjust=0,size=2,fontface='bold')+
  xlab("FJSC BCT Class Number")+
  ggtitle("Ft Jackson, SC")+
  ylim(-3,200)

##Combined heat hours per class
top_row.heatxClass <- plot_grid(FJSC_heat_hours.plot.combined,FBGA_heat_hours.plot.combined)
bottom_row.heatxClass <- plot_grid(FSOK_heat_hours.plot.combined,FLW_heat_hours.plot.combined)
class_heat_combined <- plot_grid(top_row.heatxClass,bottom_row.heatxClass,ncol=1)

##Make a shared y-axis
yaxis <- ggdraw() + 
  draw_label(    "Hours at heat category 4 or 5",
    fontface = 'bold',    angle = 90,    x = 0,    hjust = 0) +
  theme(    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(160, 0, 0, 10)
  )
##Make a shared x-axis
xaxis <- ggdraw() + 
  draw_label(    "Numbers above bubbles are average hours of hazardous heat per class. Bottom numbers along horizontal axis identify basic training class number.\nVerticle lines correspond with class start date.",
    fontface = 'bold',    angle = 0,    x = 0,    hjust = 0  ) +
  theme(    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 0, 0)  )

title <- ggdraw() +   draw_label("Average hours of hazardous heat per class",
    fontface = 'bold', x = 0, hjust = 0, size=18  ) +
  theme(    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 0, 310))

sub_title <- ggdraw() + 
  draw_label(
    "Based on a 30-year climate average (October 1992 - September 2022)",
    fontface = 'bold', x = 0, hjust = 0, size=10) +
  theme(
    plot.margin = margin(0, 0, 0, 325))

class_heat_combined <- plot_grid(title,sub_title,class_heat_combined,xaxis,ncol=1,rel_heights = c(0.05,0.03,1,0.07))
class_heat_combined.2 <- plot_grid(yaxis,class_heat_combined,ncol=2,rel_widths = c(0.03,1))
class_heat_combined.2
```
##Combined hours and probability per Class 2x2
```{r}
##FBGA
FBGA_hours.prob <- ggplot()+
  geom_smooth(data=KLSF_Caution_Days.last30,aes(x=date,y=Avg_Count_Caution_Days*200),se=TRUE,fill='blue',alpha=0.3)+
  scale_y_continuous(limits=c(0,200), name="Hours of hazardous heat per class",expand=c(0,0),sec.axis = sec_axis( trans=~./200, name="Probability of experiencing a cautionary heat day",labels=scales::percent,breaks=seq(0,1,.1)))+
  geom_segment(data=FBGA_BCT_23_Wrangled,aes(x=Start_Date,xend=End_Date,y=Dangerous_Heat_Hours_Avg,yend=Dangerous_Heat_Hours_Avg))+
  geom_text(data=FBGA_BCT_23_Wrangled,aes(x=(Start_Date),y=Dangerous_Heat_Hours_Avg,label=round(Class,0)), position=position_dodge(width=0.9),vjust=-0.1,hjust=1.5,size=2,fontface='bold')+
    scale_x_date(date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week")+
  theme_light()+
  theme(    panel.grid.major.x = element_blank(),
            panel.border = element_blank(),
            axis.ticks.x = element_blank(),
            axis.text.x = element_text(angle=30),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01))+
  ggtitle("Ft Moore, GA")

##FLW
FLW_hours.prob <- ggplot()+
  geom_smooth(data=KTBN_Caution_Days.last30,aes(x=date,y=Avg_Count_Caution_Days*200),se=TRUE,fill='blue',alpha=0.3)+
  scale_y_continuous(limits=c(0,200), name="Hours of hazardous heat per class",expand=c(0,0),sec.axis = sec_axis( trans=~./200, name="Probability of experiencing a cautionary heat day",labels=scales::percent,breaks=seq(0,1,.1)))+
  geom_segment(data=FLW_BCT_23_Wrangled,aes(x=Start_Date,xend=End_Date,y=Dangerous_Heat_Hours_Avg,yend=Dangerous_Heat_Hours_Avg))+
  geom_text(data=FLW_BCT_23_Wrangled,aes(x=(Start_Date),y=Dangerous_Heat_Hours_Avg,label=round(Class,0)), position=position_dodge(width=0.9),vjust=-0.1,hjust=1.5,size=2,fontface='bold')+
  theme_light()+
  theme(    panel.grid.major.x = element_blank(),
            panel.border = element_blank(),
            axis.ticks.x = element_blank(),
            axis.text.x = element_text(angle=30),
            axis.title.x = element_blank(),
    axis.title.y = element_blank(),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01)  )+
  scale_x_date(date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week")+
  ggtitle("Ft Leonard Wood, MO")

##FSOK
FSOK_hours.prob <- ggplot()+
  geom_smooth(data=KFSI_Caution_Days.last30,aes(x=date,y=Avg_Count_Caution_Days*200),se=TRUE,fill='blue',alpha=0.3)+
  scale_y_continuous(limits=c(0,200), name="Hours of hazardous heat per class",expand=c(0,0),sec.axis = sec_axis( trans=~./200, name="Probability of experiencing a cautionary heat day",labels=scales::percent,breaks=seq(0,1,.1)))+
  geom_segment(data=FSOK_BCT_23_Wrangled,aes(x=Start_Date,xend=End_Date,y=Dangerous_Heat_Hours_Avg,yend=Dangerous_Heat_Hours_Avg))+
  geom_text(data=FSOK_BCT_23_Wrangled,aes(x=(Start_Date),y=Dangerous_Heat_Hours_Avg,label=round(Class,0)), position=position_dodge(width=0.9),vjust=-0.1,hjust=1.5,size=2,fontface='bold')+
  theme_light()+
  theme(    panel.grid.major.x = element_blank(),
            panel.border = element_blank(),
            axis.ticks.x = element_blank(),
            axis.text.x = element_text(angle=30),
            axis.title.x = element_blank(),
    axis.title.y = element_blank(),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01)  )+
  scale_x_date(date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week")+
  ggtitle("Ft Sill, OK")

##FJSC
FJSC_hours.prob <- ggplot()+
  geom_smooth(data=KMMT_Caution_Days.last30,aes(x=date,y=Avg_Count_Caution_Days*200),se=TRUE,fill='blue',alpha=0.3)+
  scale_y_continuous(limits=c(0,200), name="Hours of hazardous heat per class",expand=c(0,0),sec.axis = sec_axis( trans=~./200, name="Probability of experiencing a cautionary heat day",labels=scales::percent,breaks=seq(0,1,.1)))+
  geom_segment(data=FJSC_BCT_23_Wrangled,aes(x=Start_Date,xend=End_Date,y=Dangerous_Heat_Hours_Avg,yend=Dangerous_Heat_Hours_Avg))+
  geom_text(data=FJSC_BCT_23_Wrangled,aes(x=(Start_Date),y=Dangerous_Heat_Hours_Avg,label=round(Class,0)), position=position_dodge(width=0.9),vjust=-0.1,hjust=1.5,size=2,fontface='bold')+
  theme_light()+
  theme(    panel.grid.major.x = element_blank(),
            panel.border = element_blank(),
            axis.ticks.x = element_blank(),
            axis.text.x = element_text(angle=30),
            axis.title.x = element_blank(),
    axis.title.y = element_blank(),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01)    )+
  scale_x_date(date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week")+
  ggtitle("Ft Jackson, SC")

top_row.heat.probxClass <- plot_grid(FJSC_hours.prob,FBGA_hours.prob)
bottom_row.heat.probxClass <- plot_grid(FSOK_hours.prob,FLW_hours.prob)
class_heat.prob_combined <- plot_grid(top_row.heat.probxClass,bottom_row.heat.probxClass,ncol=1)

##Make a shared y-axis
yaxis <- ggdraw() + 
  draw_label(    "Hours at heat category 4 or 5",
    fontface = 'bold',    angle = 90,    x = 0,    hjust = 0) +
  theme(    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(170, 0, 0, 10)
  )
yaxis2 <- ggdraw() + 
  draw_label(    "Probability of a cautionary heat day",
    fontface = 'bold',    angle = 270,    x = 0,    hjust = 0) +
  theme(    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 170, 10)
  )
##Make a shared x-axis
xaxis <- ggdraw() + 
  draw_label(    "-Numbers with horizontal bars correspond to BCT classes.\n-The length of a horizontal bar is the class length from start to finish (x-axis).",
    fontface = 'bold',    angle = 0,    x = 0,    hjust = 0, size = 10  ) +
  theme(    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 0, 10)  )

xaxis2 <- ggdraw() + 
  draw_label(    "-Horizontal bar elevation is the average number of hazardous heat hours experienced by class (left axis).\n-The blue line is the daily probability of a cautionary day (right axis) with 95% confidence interval shaded.",
    fontface = 'bold',    angle = 0,    x = 0,    hjust = 0, size = 10  ) +
  theme(    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 0, 10)  )

xaxis <- plot_grid(xaxis,xaxis2,ncol=2)

title <- ggdraw() +   draw_label("Hazardous heat and likelihood of cautionary heat day, all BCT classes",
    fontface = 'bold', x = 0, hjust = 0, size=18  ) +
  theme(    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 0, 175))

class_heat.prob_combined <- plot_grid(title,class_heat.prob_combined,xaxis,ncol=1,rel_heights = c(0.05,1,0.08))
class_heat.prob_combined <- plot_grid(yaxis,class_heat.prob_combined,yaxis2,ncol=3,rel_widths = c(0.03,1,0.03))
class_heat.prob_combined
```
##Combined hours and prob per Class 1x5
```{r}
#install.packages("ggpubr")
library(ggpubr)
##Example Plot
example.prob <- ggplot()+
  geom_smooth(data=KMMT_Caution_Days.last30,aes(x=date,y=Avg_Count_Caution_Days*200),se=TRUE,fill='blue',alpha=0.3)+
  scale_y_continuous(limits=c(0,150), name="Hours of hazardous heat per class",expand=c(0,0),sec.axis = sec_axis( trans=~./200, name="Probability of experiencing a cautionary heat day",labels=scales::percent,breaks=seq(0,1,.1)))+
  geom_segment(data=FJSC_BCT_23_Wrangled,aes(x=Start_Date[5],xend=End_Date[5],y=Dangerous_Heat_Hours_Avg[5],yend=Dangerous_Heat_Hours_Avg[5]))+
    geom_segment(data=FJSC_BCT_23_Wrangled,aes(x=Start_Date[11],xend=End_Date[11],y=Dangerous_Heat_Hours_Avg[11],yend=Dangerous_Heat_Hours_Avg[11]))+
    geom_segment(data=FJSC_BCT_23_Wrangled,aes(x=Start_Date[15],xend=End_Date[15],y=Dangerous_Heat_Hours_Avg[15],yend=Dangerous_Heat_Hours_Avg[15]))+
  geom_text(data=FJSC_BCT_23_Wrangled,aes(x=(Start_Date[5]),y=Dangerous_Heat_Hours_Avg[5],label=round(Class[5],0)), position=position_dodge(width=0.9),vjust=-0.1,hjust=1.5,size=3)+
  geom_text(data=FJSC_BCT_23_Wrangled,aes(x=(Start_Date[11]),y=Dangerous_Heat_Hours_Avg[11],label=round(Class[11],0)), position=position_dodge(width=0.9),vjust=-0.1,hjust=1.5,size=3)+
    geom_text(data=FJSC_BCT_23_Wrangled,aes(x=(Start_Date[15]),y=Dangerous_Heat_Hours_Avg[15],label=round(Class[15],0)), position=position_dodge(width=0.9),vjust=-0.1,hjust=1.5,size=3)+
  theme_light()+
  theme(    panel.grid.major.x = element_blank(),
            panel.border = element_blank(),
            axis.ticks.x = element_blank(),
            axis.text.x = element_text(angle=30),
            axis.title.x = element_blank(),
            axis.title.y = element_blank(),
            axis.text=element_text(size=14),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01))+
  scale_x_date(limit=c(as.Date("2023-03-01"),as.Date("2023-12-01")),expand=c(0,0),date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week")+
  ggtitle("Example Installation, State")+

  #start and end date segments
  geom_segment(data=FJSC_BCT_23_Wrangled,aes(x=(Start_Date[5]),xend=(Start_Date[5]),y=(Dangerous_Heat_Hours_Avg[5]+10),yend=0),color='red',arrow = arrow(length = unit(0.3, "cm")))+
  annotate("text",x=FJSC_BCT_23_Wrangled$Start_Date[5],y=FJSC_BCT_23_Wrangled$Dangerous_Heat_Hours_Avg[5]+16,label="Class Start Date",fontface='bold')+
    geom_segment(data=FJSC_BCT_23_Wrangled,aes(x=(End_Date[5]),xend=(End_Date[5]),y=(Dangerous_Heat_Hours_Avg[5]+10),yend=0),color='red',arrow = arrow(length = unit(0.3, "cm")))+
  annotate("text",x=FJSC_BCT_23_Wrangled$End_Date[5],y=FJSC_BCT_23_Wrangled$Dangerous_Heat_Hours_Avg[5]+16,label="Class End Date",fontface='bold')+
  geom_bracket(xmin = FJSC_BCT_23_Wrangled$Start_Date[5],xmax=FJSC_BCT_23_Wrangled$End_Date[5],y=FJSC_BCT_23_Wrangled$Dangerous_Heat_Hours_Avg[5]+2,label="Class Length",linetype='dashed',tip.length = 0.01,fontface='bold')+

  #class number
  geom_segment(data=FJSC_BCT_23_Wrangled,aes(x=(Start_Date[11]-5),xend=(Start_Date[11]-5),y=(Dangerous_Heat_Hours_Avg[11]+22),yend=Dangerous_Heat_Hours_Avg[11]+6),color='red',arrow = arrow(length = unit(0.3, "cm")))+
  annotate("text",x=FJSC_BCT_23_Wrangled$Start_Date[11],y=FJSC_BCT_23_Wrangled$Dangerous_Heat_Hours_Avg[11]+26,label="Class Number",fontface='bold')+
  #hours hazardous heat
  geom_segment(data=FJSC_BCT_23_Wrangled,aes(x=(Start_Date[11]-10),xend=Start_Date[1]-5,y=(Dangerous_Heat_Hours_Avg[11]),yend=Dangerous_Heat_Hours_Avg[11]),color='red',arrow = arrow(length = unit(0.5, "cm")))+
 annotate("text",x=FJSC_BCT_23_Wrangled$Start_Date[6],y=FJSC_BCT_23_Wrangled$Dangerous_Heat_Hours_Avg[11],label="Class Total Hours Hazardous Heat\n(left axis)",fontface='bold')+
  
  #likelihood of cautionary heat day
  geom_segment(data=FJSC_BCT_23_Wrangled,aes(x=(Start_Date[15]),xend=Start_Date[15],y=(120),yend=0),color='red',linetype='dotted')+
    geom_segment(data=FJSC_BCT_23_Wrangled,aes(x=(Start_Date[15]),xend=End_Date[22],y=120,yend=120),color='red',arrow = arrow(length = unit(0.5, "cm")))+
  annotate("text",x=FJSC_BCT_23_Wrangled$Start_Date[22],y=120,label="Likelihood of Cautionary Heat on July 14th\n(right axis)",fontface='bold')+
    geom_segment(data=FJSC_BCT_23_Wrangled,aes(x=(End_Date[15]),xend=End_Date[15],y=(79),yend=0),color='red',linetype='dotted')+
    geom_segment(data=FJSC_BCT_23_Wrangled,aes(x=(End_Date[15]),xend=End_Date[22],y=25,yend=25),color='red',arrow = arrow(length = unit(0.5, "cm")))+
  annotate("text",x=FJSC_BCT_23_Wrangled$End_Date[17]+5,y=27,label="Likelihood of Cautionary Heat\non Sept 22nd",fontface='bold')

##FBGA
FBGA_hours.prob <- ggplot()+
  geom_smooth(data=KLSF_Caution_Days.last30,aes(x=date,y=Avg_Count_Caution_Days*200),se=TRUE,fill='blue',alpha=0.3)+
  scale_y_continuous(limits=c(0,200), name="Hours of hazardous heat per class",expand=c(0,0),sec.axis = sec_axis( trans=~./200, name="Probability of experiencing a cautionary heat day",labels=scales::percent,breaks=seq(0,1,.2)))+
  geom_segment(data=FBGA_BCT_23_Wrangled,aes(x=Start_Date,xend=End_Date,y=Dangerous_Heat_Hours_Avg,yend=Dangerous_Heat_Hours_Avg))+
  geom_text(data=FBGA_BCT_23_Wrangled,aes(x=(Start_Date),y=Dangerous_Heat_Hours_Avg,label=round(Class,0)), position=position_dodge(width=0.9),vjust=-0.1,hjust=1.5,size=3,fontface='bold')+
    scale_x_date(limit=c(as.Date("2023-03-01"),as.Date("2023-12-01")),expand=c(0,0),date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week")+
  theme_light()+
  theme(    panel.grid.major.x = element_blank(),
            panel.border = element_blank(),
            axis.ticks.x = element_blank(),
            axis.text.x = element_text(angle=30),
            axis.text=element_text(size=14),
    axis.title.x = element_blank(),
    axis.title.y = element_blank(),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01))+  ggtitle("Ft Moore, GA")

##FLW
FLW_hours.prob <- ggplot()+
  geom_smooth(data=KTBN_Caution_Days.last30,aes(x=date,y=Avg_Count_Caution_Days*200),se=TRUE,fill='blue',alpha=0.3)+
  scale_y_continuous(limits=c(0,200), name="Hours of hazardous heat per class",expand=c(0,0),sec.axis = sec_axis( trans=~./200, name="Probability of experiencing a cautionary heat day",labels=scales::percent,breaks=seq(0,1,.2)))+
  geom_segment(data=FLW_BCT_23_Wrangled,aes(x=Start_Date,xend=End_Date,y=Dangerous_Heat_Hours_Avg,yend=Dangerous_Heat_Hours_Avg))+
  geom_text(data=FLW_BCT_23_Wrangled,aes(x=(Start_Date),y=Dangerous_Heat_Hours_Avg,label=round(Class,0)), position=position_dodge(width=0.9),vjust=-0.1,hjust=1.5,size=3,fontface='bold')+
  theme_light()+
  theme(    panel.grid.major.x = element_blank(),
            panel.border = element_blank(),
            axis.ticks.x = element_blank(),
            axis.text.x = element_text(angle=30),
            axis.title.x = element_blank(),
            axis.title.y = element_blank(),
            axis.text=element_text(size=14),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01))+
  scale_x_date(limit=c(as.Date("2023-03-01"),as.Date("2023-12-01")),expand=c(0,0),date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week")+
  ggtitle("Ft Leonard Wood, MO")

##FSOK
FSOK_hours.prob <- ggplot()+
  geom_smooth(data=KFSI_Caution_Days.last30,aes(x=date,y=Avg_Count_Caution_Days*200),se=TRUE,fill='blue',alpha=0.3)+
  scale_y_continuous(limits=c(0,200), name="Hours of hazardous heat per class",expand=c(0,0),sec.axis = sec_axis( trans=~./200, name="Probability of experiencing a cautionary heat day",labels=scales::percent,breaks=seq(0,1,.2)))+
  geom_segment(data=FSOK_BCT_23_Wrangled,aes(x=Start_Date,xend=End_Date,y=Dangerous_Heat_Hours_Avg,yend=Dangerous_Heat_Hours_Avg))+
  geom_text(data=FSOK_BCT_23_Wrangled,aes(x=(Start_Date),y=Dangerous_Heat_Hours_Avg,label=round(Class,0)), position=position_dodge(width=0.9),vjust=-0.1,hjust=1.5,size=3,fontface='bold')+
  theme_light()+
  theme(    panel.grid.major.x = element_blank(),
            panel.border = element_blank(),
            axis.ticks.x = element_blank(),
            axis.text.x = element_text(angle=30),
            axis.title.x = element_blank(),
            axis.title.y = element_blank(),
            axis.text=element_text(size=14),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01))+
  scale_x_date(limit=c(as.Date("2023-03-01"),as.Date("2023-12-01")),expand=c(0,0),date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week")+
  ggtitle("Ft Sill, OK")

##FJSC
FJSC_hours.prob <- ggplot()+
  geom_smooth(data=KMMT_Caution_Days.last30,aes(x=date,y=Avg_Count_Caution_Days*200),se=TRUE,fill='blue',alpha=0.3)+
  scale_y_continuous(limits=c(0,200), name="Hours of hazardous heat per class",expand=c(0,0),sec.axis = sec_axis( trans=~./200, name="Probability of experiencing a cautionary heat day",labels=scales::percent,breaks=seq(0,1,.2)))+
  geom_segment(data=FJSC_BCT_23_Wrangled,aes(x=Start_Date,xend=End_Date,y=Dangerous_Heat_Hours_Avg,yend=Dangerous_Heat_Hours_Avg))+
  geom_text(data=FJSC_BCT_23_Wrangled,aes(x=(Start_Date),y=Dangerous_Heat_Hours_Avg,label=round(Class,0)), position=position_dodge(width=0.9),vjust=-0.1,hjust=1.5,size=3,fontface='bold')+
  theme_light()+
  theme(    panel.grid.major.x = element_blank(),
            panel.border = element_blank(),
            axis.ticks.x = element_blank(),
            axis.text.x = element_text(angle=30),
            axis.title.x = element_blank(),
            axis.title.y = element_blank(),
            axis.text=element_text(size=14),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01))+
  scale_x_date(limit=c(as.Date("2023-03-01"),as.Date("2023-12-01")),expand=c(0,0),date_breaks = "1 month", date_labels = "%b",date_minor_breaks = "1 week")+
  ggtitle("Ft Jackson, SC")

heat.probxClass <- plot_grid(FJSC_hours.prob,FBGA_hours.prob,FSOK_hours.prob,FLW_hours.prob,ncol=1)


##Make a shared y-axis
yaxis <- ggdraw() + 
  draw_label(    "Hours at heat category 4 or 5",
    fontface = 'bold',    angle = 90,    x = 0,    hjust = 0) +
  theme(    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(170, 0, 0, 10)
  )
yaxis2 <- ggdraw() + 
  draw_label(    "Probability of a cautionary heat day",
    fontface = 'bold',    angle = 270,    x = 0,    hjust = 0) +
  theme(    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 170, 10)
  )
##Make a shared x-axis
xaxis <- ggdraw() + 
  draw_label(    "-Numbers with horizontal bars correspond to BCT classes.\n-The length of a horizontal bar is the class length from start to finish (x-axis).\n-Horizontal bar elevation is the average number of hazardous heat hours experienced by class (left axis).\n-The blue line is the daily probability of a cautionary day (right axis) with 95% confidence interval shaded.",
    fontface = 'bold',    angle = 0,    x = 0,    hjust = 0, size = 14  ) +
  theme(    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 0, 10)  )

title <- ggdraw() +   draw_label("Hazardous heat and likelihood of cautionary heat day, all BCT classes",
    fontface = 'bold', x = 0, hjust = 0, size=18  ) +
  theme(    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 0, 0))

class_heat.prob_combined <- plot_grid(title,example.prob,heat.probxClass,xaxis,ncol=1,rel_heights = c(0.05,0.3,1,0.08))
class_heat.prob_combined <- plot_grid(yaxis,class_heat.prob_combined,yaxis2,ncol=3,rel_widths = c(0.03,1,0.03))
class_heat.prob_combined
```

##BCT Heat Cat 30 year vs 10 year
###FBGA
```{r}
top_row <- plot_grid(KLSF_Likelihood_hazardous_days.plot,KLSF_Likelihood_cautionary_days.plot)
plot_grid(top_row,KLSF_Quantiles_high_WBGT.plot,ncol=1)
```
###FSOK
```{r}
top_row <- plot_grid(KFSI_Likelihood_hazardous_days.plot,KFSI_Likelihood_cautionary_days.plot)
plot_grid(top_row,KFSI_Quantiles_high_WBGT.plot,ncol=1)
```
###FLW
```{r}
top_row <- plot_grid(KTBN_Likelihood_hazardous_days.plot,KTBN_Likelihood_cautionary_days.plot)
plot_grid(top_row,KTBN_Quantiles_high_WBGT.plot,ncol=1)
```
###FJSC
```{r}
top_row <- plot_grid(KMMT_Likelihood_hazardous_days.plot,KMMT_Likelihood_cautionary_days.plot)
plot_grid(top_row,KMMT_Quantiles_high_WBGT.plot,ncol=1)
```


#####Create Table for Seasonal Outputs
```{r}
KLSF_Trends.table <- data.frame(name=c('WBGT_High','WBGT_Avg','WBGT_Low','Temp_High','Temp_Avg','Temp_Low'), tau=c(KLSF_Daily_High_WBGT.C_MannKendall$estimate[1],KLSF_Daily_Avg_WBGT.C_MannKendall$estimate[1],KLSF_Daily_Low_WBGT.C_MannKendall$estimate[1],KLSF_Daily_High.C_MannKendall$estimate[1],KLSF_Daily_Avg.C_MannKendall$estimate[1],KLSF_Daily_Low.C_MannKendall$estimate[1])
  
  'KLSF_Daily_High_WBGT.C_MannKendall$estimate[1]','KLSF_Daily_Avg_WBGT.C_MannKendall$estimate[1]','KLSF_Daily_Low_WBGT.C_MannKendall$estimate[1]','KLSF_Daily_High.C_MannKendall$estimate[1]','KLSF_Daily_Avg.C_MannKendall$estimate[1]','KLSF_Daily_Low.C_MannKendall$estimate[1]'))
view(KLSF_Trends.table)
```

```{r}
#install.packages('leaflet')
library(leaflet)
#install.packages('sf')
library(sf)
library(sp)
install.packages("raster",dependencies = TRUE)
install.packages("rgdal",dependencies = TRUE)
install.packages("rgeos",dependencies = TRUE)
install.packages("sp",dependencies = TRUE)

```

```{r}
## ----spatial-points------------------------------------------------------
# Make a set of coordinates that represent vertices
# with longitude and latitude in the familiar
# degrees

x_coords <- c(8,8,28,28)
y_coords <- c(5,-10,5,-10)


## ------------------------------------------------------------------------
poly1 <- sp::Polygon(cbind(x_coords,y_coords))


## ------------------------------------------------------------------------
firstPoly <- sp::Polygons(list(poly1), ID = "A")

str(firstPoly,1)


## ------------------------------------------------------------------------
firstSpatialPoly <- sp::SpatialPolygons(list(firstPoly))

firstSpatialPoly
```


##Combined Thresholds Barplot 2x2
```{r}
KLSF_Threshold_Barplot.combined <- ggplot(data=KLSF_Hours_Flags, aes(x=Heat_Category,y=Hours,fill=V1)) +
  geom_bar(stat="identity",position=position_dodge())+
  geom_text(aes(label=Hours), position=position_dodge(width=0.9),hjust=-0.25,size=3)+
  scale_x_discrete(limits=c("Green","Yellow","Red","Black"))+
  ylab("Average Hours per Year")+
  xlab("Heat Category")+
  theme_classic()+
  theme(axis.title.y = element_blank(),axis.title.x = element_blank(),
        legend.position = "none",
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01),
        axis.text = element_text(size=12))+
  scale_fill_brewer(name="Decade",palette = "Spectral",direction=-1,guide = guide_legend(reverse = TRUE))+
  coord_flip()+
ggtitle("Ft Moore, GA")

KTBN_Threshold_Barplot.combined <- ggplot(data=KTBN_Hours_Flags, aes(x=Heat_Category,y=Hours,fill=V1)) +
  geom_bar(stat="identity",position=position_dodge())+
  geom_text(aes(label=Hours), position=position_dodge(width=0.9),hjust=-0.25,size=3)+
  scale_x_discrete(limits=c("Green","Yellow","Red","Black"))+
  ylab("Average Hours per Year")+
  xlab("Heat Category")+
  theme_classic()+
  theme(axis.title.y = element_blank(),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01),
        axis.text = element_text(size=12))+
  scale_fill_brewer(name="Decade",palette = "Spectral",direction=-1,guide = guide_legend(reverse = TRUE))+
  theme(legend.position="none")+
  coord_flip()+
ggtitle("Ft Leonard Wood, MO")

KFSI_Threshold_Barplot.combined <- ggplot(data=KFSI_Hours_Flags, aes(x=Heat_Category,y=Hours,fill=V1)) +
  geom_bar(stat="identity",position=position_dodge())+
  geom_text(aes(label=Hours), position=position_dodge(width=0.9),hjust=-0.25,size=3)+
  scale_x_discrete(limits=c("Green","Yellow","Red","Black"))+
  ylab("Average Hours per Year")+  xlab("Heat Category")+
  theme_classic()+
  theme(axis.title.y = element_blank(),,
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01),
        axis.text = element_text(size=12))+
  scale_fill_brewer(name="Decade",palette = "Spectral",direction=-1,guide = guide_legend(reverse = TRUE))+  theme(legend.position="none")+
  coord_flip()+
ggtitle("Ft Sill, OK")

KMMT_Threshold_Barplot.combined <- ggplot(data=KMMT_Hours_Flags, aes(x=Heat_Category,y=Hours,fill=V1)) +
  geom_bar(stat="identity",position=position_dodge())+
  geom_text(aes(label=Hours), position=position_dodge(width=0.9),hjust=-0.25,size=3)+
  scale_x_discrete(limits=c("Green","Yellow","Red","Black"))+
  ylab("Average Hours per Year")+
  theme_classic()+  
  theme(axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        axis.text = element_text(size=12))+
  scale_fill_brewer(name="Decade",palette = "Spectral",direction=-1,guide = guide_legend(reverse = TRUE))+
  coord_flip()+
ggtitle("Ft Jackson, SC")

bar_legend <- get_legend(KMMT_Threshold_Barplot.combined)

KMMT_Threshold_Barplot.combined <- ggplot(data=KMMT_Hours_Flags, aes(x=Heat_Category,y=Hours,fill=V1)) +
  geom_bar(stat="identity",position=position_dodge())+
  geom_text(aes(label=Hours), position=position_dodge(width=0.9),hjust=-0.25,size=3)+
  scale_x_discrete(limits=c("Green","Yellow","Red","Black"))+
  ylab("Average Hours per Year")+
  theme_classic()+  
  theme(axis.title.y = element_blank(),
        axis.title.x = element_blank(),
        legend.position="none",
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01),
        axis.text = element_text(size=12))+
  scale_fill_brewer(name="Decade",palette = "Spectral",direction=-1,guide = guide_legend(reverse = TRUE))+
  coord_flip()+
ggtitle("Ft Jackson, SC")

top_row.heatxClass <- plot_grid(KMMT_Threshold_Barplot.combined,KLSF_Threshold_Barplot.combined)
bottom_row.heatxClass <- plot_grid(KFSI_Threshold_Barplot.combined,KTBN_Threshold_Barplot.combined)

##Make a shared y-axis
yaxis <- ggdraw() + 
  draw_label(    "Hours per Year",
    fontface = 'bold',    angle = 90,    x = 0,    hjust = 0) +
  theme(
    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(120, 0, 0, 30)
  )
##Make a shared x-axis
xaxis <- ggdraw() + 
  draw_label(    "Horizontal axis do not align across installations",
    fontface = 'bold',    angle = 0,    x = 0,    hjust = 0  ) +
  theme(
    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 0, 380)  )

title <- ggdraw() +   draw_label("Inter-decadal trends in heat category hours per year",
    fontface = 'bold', x = 0, hjust = 0, size=18  ) +
  theme(    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 0, 275))

Barplot_combined <- plot_grid(title,top_row.heatxClass,bottom_row.heatxClass,ncol=1,rel_heights = c(0.08,1,1))
Barplot_combined.2 <- plot_grid(yaxis,Barplot_combined,ncol=2,rel_widths = c(0.05,1))
Barplot_combined.3 <- plot_grid(Barplot_combined.2,xaxis,ncol=1,rel_heights = c(1,0.05))
Barplot_combined.4 <- plot_grid(Barplot_combined.3,bar_legend,ncol=2,rel_widths = c(1,0.08))
Barplot_combined.4
```
##Combined Cautionary Heat
```{r}
KLSF_cautionary.combined <- ggplot(KLSF_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=Avg_Count_Caution_Days,color='blue'),se=TRUE,fill='blue',alpha=0.3)+
    geom_smooth(data=KLSF_Caution_Days.last10,aes(x=date,y=Avg_Count_Caution_Days,color='purple'),se=TRUE,fill='purple',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("purple","blue"),labels=c("Last 10-year Avg","Previous 30-year Avg\n(1982-2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing Cautionary Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='Purple', size=16),axis.title.y = element_blank(),axis.title.x = element_blank(),legend.position="none")+
  ggtitle("Ft Moore, GA")

KTBN_cautionary_combined <- ggplot(KTBN_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=Avg_Count_Caution_Days,color='blue'),se=TRUE,fill='blue',alpha=0.3)+
    geom_smooth(data=KTBN_Caution_Days.last10,aes(x=date,y=Avg_Count_Caution_Days,color='purple'),se=TRUE,fill='purple',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("purple","blue"),labels=c("Last 10-year Avg","Previous 30-year Avg\n(1982-2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing Cautionary Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='Purple', size=16),legend.position="none",axis.title.y = element_blank())+
  ggtitle("Ft Leonard Wood, MO")

KFSI_cautionary_combined <- ggplot(KFSI_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=Avg_Count_Caution_Days,color='blue'),se=TRUE,fill='blue',alpha=0.3)+
    geom_smooth(data=KFSI_Caution_Days.last10,aes(x=date,y=Avg_Count_Caution_Days,color='purple'),se=TRUE,fill='purple',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("purple","blue"),labels=c("Last 10-year Avg","Previous 30-year Avg\n(1982-2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing Cautionary Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='Purple', size=16),legend.position="none",axis.title.y = element_blank(),axis.title.x = element_blank())+
  ggtitle("Ft Sill, OK")

KMMT_cautionary_combined<- ggplot(KMMT_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=Avg_Count_Caution_Days,color='blue'),se=TRUE,fill='blue',alpha=0.3)+
    geom_smooth(data=KMMT_Caution_Days.last10,aes(x=date,y=Avg_Count_Caution_Days,color='purple'),se=TRUE,fill='purple',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("purple","blue"),labels=c("Last 10-year Avg\n(Oct 2012 - Sept 2022)","Previous 30-year Avg\n(Jan 1982 - Dec 2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing Cautionary Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),legend.direction="horizontal",
        plot.title=element_text(family='', face='bold', colour='Purple', size=16),axis.title.y = element_blank(),axis.title.x = element_blank(),axis.text.x = element_text(angle = 30))+
  ggtitle("Ft Jackson, SC")

##grab the legend from the KMMT plot
cautionary_legend <- get_legend(KMMT_cautionary_combined)

##Re-run the KMMT plot to make the legend go away
KMMT_cautionary_combined<- ggplot(KMMT_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=Avg_Count_Caution_Days,color='blue'),se=TRUE,fill='blue',alpha=0.3)+
    geom_smooth(data=KMMT_Caution_Days.last10,aes(x=date,y=Avg_Count_Caution_Days,color='purple'),se=TRUE,fill='purple',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("purple","blue"),labels=c("Last 10-year Avg\n(Oct 2011 - Sept 2022","Previous 30-year Avg\n(1982-2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing Cautionary Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),legend.direction="horizontal",
        plot.title=element_text(family='', face='bold', colour='Purple', size=16),axis.title.y = element_blank(),axis.title.x = element_blank(),axis.text.x = element_text(angle = 30),legend.position="none")+
  ggtitle("Ft Jackson, SC")

#top_row.cautionary <- plot_grid(KTBN_cautionary_combined,KMMT_cautionary_combined,rel_widths=c(.8,1),align="h")
#bottom_row.cautionary <- plot_grid(KFSI_cautionary_combined,KLSF_cautionary.combined,rel_widths=c(.8,1))
#Cautionary_combined <- plot_grid(top_row.cautionary,bottom_row.cautionary,ncol=1)
#Cautionary_combined
```

##Combined Hazardous Heat
```{r}
KLSF_hazardous_combined <- ggplot(KLSF_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=High_Heat_Days,color='orange'),se=TRUE,fill='orange',alpha=0.3)+
  geom_smooth(data=KLSF_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'),se=TRUE,fill='red',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("red","orange"),labels=c("Last 10-year Avg","Previous 30-year Avg\n(1982-2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing a Hazardous Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='red', size=16),axis.title.y = element_blank(),legend.position = "none",axis.title.x = element_blank())+
  ggtitle("Ft Moore, GA")

KTBN_hazardous_combined <- ggplot(KTBN_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=High_Heat_Days,color='orange'),se=TRUE,fill='orange',alpha=0.3)+
  geom_smooth(data=KTBN_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'),se=TRUE,fill='red',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("red","orange"),labels=c("Last 10-year Avg","Previous 30-year Avg\n(Jan 1982 - Dec 2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing a Hazardous Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(plot.title=element_text(family='', face='bold', colour='red', size=16),legend.position="none",axis.title.y = element_blank(),axis.text.x = element_text(angle = 30))+
  ggtitle("Ft Leonard Wood, MO")

KFSI_hazardous_combined <- ggplot(KFSI_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=High_Heat_Days,color='orange'),se=TRUE,fill='orange',alpha=0.3)+
  geom_smooth(data=KFSI_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'),se=TRUE,fill='red',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("red","orange"),labels=c("Last 10-year Avg","Previous 30-year Avg\n(1982-2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing a Hazardous Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='red', size=16),legend.position="none",axis.title.x = element_blank(),axis.title.y = element_blank())+
  ggtitle("Ft Sill, OK")

KMMT_hazardous_combined <- ggplot(KMMT_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=High_Heat_Days,color='orange'),se=TRUE,fill='orange',alpha=0.3)+
  geom_smooth(data=KMMT_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'),se=TRUE,fill='red',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("red","orange"),labels=c("Last 10-year Avg\n(Oct 2012 - Sept 2022)","Previous 30-year Avg\n(Jan 1982 - Dec 2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing a Hazardous Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
theme(
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='red', size=16),axis.title.y = element_blank(),axis.title.x = element_blank(),axis.text.x = element_text(angle = 30),legend.direction = "horizontal")+
  ggtitle("Ft Jackson, SC")

##grab the legend from the KMMT plot
hazardous_legend <- get_legend(KMMT_hazardous_combined)

##Replot KMMT without legend
KMMT_hazardous_combined <- ggplot(KMMT_Caution_Days.prev30)+
  geom_smooth(aes(x=date,y=High_Heat_Days,color='orange'),se=TRUE,fill='orange',alpha=0.3)+
  geom_smooth(data=KMMT_Caution_Days.last10,aes(x=date,y=High_Heat_Days,color='red'),se=TRUE,fill='red',alpha=0.3)+
  scale_color_identity(name="Years w/ 95%\nconfidence interval",breaks=c("red","orange"),labels=c("Last 10-year Avg","Previous 30-year Avg\n(1982-2011)"),guide="legend")+
  scale_y_continuous(limits=c(0.1,.9), labels=scales::percent,breaks=seq(0,1,.1) ,name="Probability of experiencing a Hazardous Heat day",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
theme(
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),legend.direction="horizontal",
        plot.title=element_text(family='', face='bold', colour='red', size=16),axis.title.y = element_blank(),axis.title.x = element_blank(),axis.text.x = element_text(angle = 30),legend.position = "none")+
  ggtitle("Ft Jackson, SC")

top_row.hazardous <- plot_grid(KTBN_hazardous_combined,KMMT_hazardous_combined,rel_widths=c(.8,1),align="h")
bottom_row.hazardous <- plot_grid(KFSI_hazardous_combined,KLSF_hazardous_combined,rel_widths=c(.8,1))
Hazardous_combined <- plot_grid(top_row.hazardous,bottom_row.hazardous,ncol=1)
Hazardous_combined
```

##Combined Heat Cat 4 and Heat Cat 5
```{r}
lead_title <- ggdraw() +   draw_label("Daily likelihood of experiencing hazardous heat",
    fontface = 'bold', x = 0, hjust = 0, size=18  ) +
  theme(    # add margin on the left of the drawing canvas,
    plot.margin = margin(0, 0, 0, 330))

title <- ggdraw() + 
  draw_label(
    "Based on a 30-year climate average (October 1992 - September 2022)",
    fontface = 'bold', x = 0, hjust = 0, size=10) +
  theme(
    plot.margin = margin(0, 0, 0, 360))

##Make a shared y-axis
yaxis <- ggdraw() + 
  draw_label(    "Daily Likelihood",
    fontface = 'bold',    angle = 90,    x = 0,    hjust = 0  ) +
  theme(
    plot.margin = margin(150, 0, 0, 10)
  )
##Make a shared x-axis
xaxis <- ggdraw() + 
  draw_label(    "Minor vertical breaks span one week",
    fontface = 'bold',    angle = 0,    x = 0,    hjust = 0, size=10  ) +
  theme(
    plot.margin = margin(0, 500, 0, 500)  )


KLSF_heat_cats_combined <- ggplot(KLSF_Likelihood_Cat4xCat5.last30)+
  geom_smooth(aes(x=date,y=Likelihood_Cat4,color='orange'),se=TRUE,fill='orange',alpha=0.4)+
    geom_smooth(aes(x=date,y=Likelihood_Cat5,color='red'),se=TRUE,fill='red',alpha=0.4)+
  scale_color_identity(name="Likelihood w/ 95%\nconfidence interval",breaks=c("orange","red"),labels=c("Heat Category 4 (Red flag)","Heat Category 5 (Black flag)"),guide="legend")+
  scale_y_continuous(limits=c(0,.8), labels=scales::percent,breaks=seq(0,1,.1) ,name="Chances a day experiences heat category 4 or 5",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="lightblue",
                                         size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='black', size=16),
        axis.title.y = element_blank(),
        legend.position = "none",axis.title.x = element_blank(),
        axis.text=element_text(size=14),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01))+
  ggtitle("Ft Moore, GA")

KTBN_heat_cats_combined <- ggplot(KTBN_Likelihood_Cat4xCat5.last30)+
geom_smooth(aes(x=date,y=Likelihood_Cat4,color='orange'),se=TRUE,fill='orange',alpha=0.4)+
    geom_smooth(aes(x=date,y=Likelihood_Cat5,color='red'),se=TRUE,fill='red',alpha=0.4)+
  scale_color_identity(name="Likelihood w/ 95%\nconfidence interval",breaks=c("orange","red"),labels=c("Heat Category 4 (Red flag)","Heat Category 5 (Black flag)"),guide="legend")+
  scale_y_continuous(limits=c(0,.8), labels=scales::percent,breaks=seq(0,1,.1) ,name="Chances a day experiences heat category 4 or 5",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(plot.title=element_text(family='', face='bold', colour='black', size=16),legend.position="none",axis.title.y = element_blank(),axis.title.x=element_blank(), axis.text.x = element_text(angle = 30),axis.text=element_text(size=14),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01))+
  ggtitle("Ft Leonard Wood, MO")

KFSI_heat_cats_combined <- ggplot(KFSI_Likelihood_Cat4xCat5.last30)+
    geom_smooth(aes(x=date,y=Likelihood_Cat4,color='orange'),se=TRUE,fill='orange',alpha=0.4)+
    geom_smooth(aes(x=date,y=Likelihood_Cat5,color='red'),se=TRUE,fill='red',alpha=0.4)+
  scale_color_identity(name="Likelihood w/ 95%\nconfidence interval",breaks=c("orange","red"),labels=c("Heat Category 4 (Red flag)","Heat Category 5 (Black flag)"),guide="legend")+
  scale_y_continuous(limits=c(0,.8), labels=scales::percent,breaks=seq(0,1,.1) ,name="Chances a day experiences heat category 4 or 5",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="light grey", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='black', size=16),legend.position="none",axis.title.x = element_blank(),axis.title.y = element_blank(),axis.text=element_text(size=14),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01))+
  ggtitle("Ft Sill, OK")

KMMT_heat_cats_combined <- ggplot(KMMT_Likelihood_Cat4xCat5.last30)+
  geom_smooth(aes(x=date,y=Likelihood_Cat4,color='orange'),se=TRUE,fill='orange',alpha=0.4)+
    geom_smooth(aes(x=date,y=Likelihood_Cat5,color='red'),se=TRUE,fill='red',alpha=0.4)+
  scale_color_identity(name="Likelihood w/ 95%\nconfidence interval",breaks=c("orange","red"),labels=c("Heat Category 4 (Red flag)","Heat Category 5 (Black flag)"),guide="legend")+
  scale_y_continuous(limits=c(0,.8), labels=scales::percent,breaks=seq(0,1,.1) ,name="Chances a day experiences heat category 4 or 5",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
theme(
        legend.background = element_rect(fill="light grey", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='black', size=16),axis.title.y = element_blank(),axis.title.x = element_blank(),axis.text.x = element_text(angle = 30),legend.direction = "horizontal",axis.text=element_text(size=14),legend.key.size = unit(1.25, 'cm'),legend.title = element_text(size=18),legend.text = element_text(size=14),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01))+
  ggtitle("Ft Jackson, SC")

##grab the legend from the KMMT plot
heat_cats_legend <- get_legend(KMMT_heat_cats_combined)

##Replot KMMT without legend
KMMT_heat_cats_combined <- ggplot(KMMT_Likelihood_Cat4xCat5.last30)+
  geom_smooth(aes(x=date,y=Likelihood_Cat4,color='orange'),se=TRUE,fill='orange',alpha=0.4)+
    geom_smooth(aes(x=date,y=Likelihood_Cat5,color='red'),se=TRUE,fill='red',alpha=0.4)+
  scale_color_identity(name="Likelihood w/ 95%\nconfidence interval",breaks=c("orange","red"),labels=c("Heat Category 4 (Red flag)","Heat Category 5 (Black flag)"),guide="legend")+
  scale_y_continuous(limits=c(0,.8), labels=scales::percent,breaks=seq(0,1,.1) ,name="Chances a day experiences heat category 4 or 5",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
theme(
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),legend.direction="horizontal",
        plot.title=element_text(family='', face='bold', colour='black', size=16),axis.title.y = element_blank(),axis.title.x = element_blank(),axis.text.x = element_text(angle = 30),legend.position = "none",axis.text=element_text(size=14),
            axis.line=element_line(size=0.2),
            panel.grid=element_line(size=01))+
  ggtitle("Ft Jackson, SC")

top_row.heat_cats <- plot_grid(KMMT_heat_cats_combined,KLSF_heat_cats_combined,rel_widths=c(1,1),align="h")
bottom_row.heat_cats <- plot_grid(KFSI_heat_cats_combined,KTBN_heat_cats_combined,rel_widths=c(1,1))
Heat_cats_combined <- plot_grid(top_row.heat_cats,bottom_row.heat_cats,ncol=1)
Heat_cats_combined <- plot_grid(lead_title,title,Heat_cats_combined,ncol=1,rel_heights = c(0.05,0.02,1))
Heat_cats_combined <- plot_grid(Heat_cats_combined,heat_cats_legend,ncol=1,rel_heights=c(1,0.07))
Heat_cats_combined <- plot_grid(yaxis,Heat_cats_combined,ncol=2,rel_widths = c(0.02,1))
Heat_cats_combined
```

##Combined hazardous heat last 30 years
```{r}
#Combined Heat Cat 4 and Heat Cat 5
lead_title <- ggdraw() +   draw_label("Daily likelihood of experiencing hazardous heat",
    fontface = 'bold', x = 0, hjust = 0, size=18  ) +
  theme(    # add margin on the left of the drawing canvas,
    plot.margin = margin(0, 0, 0, 370))

title <- ggdraw() + 
  draw_label(
    "Based on a 30-year climate average (October 1992 - September 2022)",
    fontface = 'bold', x = 0, hjust = 0, size=10) +
  theme(
    plot.margin = margin(0, 0, 0, 390))

##Make a shared y-axis
yaxis <- ggdraw() + 
  draw_label(    "Daily Likelihood",
    fontface = 'bold',    angle = 90,    x = 0,    hjust = 0  ) +
  theme(
    plot.margin = margin(150, 0, 0, 10)
  )
##Make a shared x-axis
xaxis <- ggdraw() + 
  draw_label(    "Minor vertical breaks span one week",
    fontface = 'bold',    angle = 0,    x = 0,    hjust = 0, size=10  ) +
  theme(
    plot.margin = margin(0, 500, 0, 500)  )


KLSF_heat_cats_combined <- ggplot(KLSF_Likelihood_Cat4xCat5.last30)+
    geom_smooth(aes(x=date,y=Likelihood_Hazardous_Heat,color='black'),se=TRUE,fill='dark grey',alpha=0.4)+
  scale_color_identity(name="Likelihood w/ 95%\nconfidence interval",breaks=c("orange","black"),labels=c("Heat Category 4 (Red flag)","Hazardous Heat (cat 4 + 5)"),guide="legend")+
  scale_y_continuous(limits=c(0,.8), labels=scales::percent,breaks=seq(0,1,.1) ,name="Chances a day experiences heat category 4 or 5",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='black', size=16),axis.title.y = element_blank(),legend.position = "none",axis.title.x = element_blank())+
  ggtitle("Ft Moore, GA")

KTBN_heat_cats_combined <- ggplot(KTBN_Likelihood_Cat4xCat5.last30)+
    geom_smooth(aes(x=date,y=Likelihood_Hazardous_Heat,color='black'),se=TRUE,fill='dark grey',alpha=0.4)+
  scale_color_identity(name="Likelihood w/ 95%\nconfidence interval",breaks=c("orange","red"),labels=c("Heat Category 4 (Red flag)","Heat Category 5 (Black flag)"),guide="legend")+
  scale_y_continuous(limits=c(0,.8), labels=scales::percent,breaks=seq(0,1,.1) ,name="Chances a day experiences heat category 4 or 5",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(plot.title=element_text(family='', face='bold', colour='black', size=16),legend.position="none",axis.title.y = element_blank(),axis.title.x=element_blank(), axis.text.x = element_text(angle = 30))+
  ggtitle("Ft Leonard Wood, MO")

KFSI_heat_cats_combined <- ggplot(KFSI_Likelihood_Cat4xCat5.last30)+
    geom_smooth(aes(x=date,y=Likelihood_Hazardous_Heat,color='black'),se=TRUE,fill='dark grey',alpha=0.4)+
  scale_color_identity(name="Likelihood w/ 95%\nconfidence interval",breaks=c("orange","red"),labels=c("Heat Category 4 (Red flag)","Heat Category 5 (Black flag)"),guide="legend")+
  scale_y_continuous(limits=c(0,.8), labels=scales::percent,breaks=seq(0,1,.1) ,name="Chances a day experiences heat category 4 or 5",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
  theme(axis.text.x = element_text(angle = 30),
        legend.background = element_rect(fill="light grey", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='black', size=16),legend.position="none",axis.title.x = element_blank(),axis.title.y = element_blank())+
  ggtitle("Ft Sill, OK")

KMMT_heat_cats_combined <- ggplot(KMMT_Likelihood_Cat4xCat5.last30)+
    geom_smooth(aes(x=date,y=Likelihood_Hazardous_Heat,color='black'),se=TRUE,fill='dark grey',alpha=0.4)+
  scale_color_identity(name="Likelihood w/ 95%\nconfidence interval",breaks=c("orange","black"),labels=c("Heat Category 4 (Red flag)","Hazardous Heat (cat 4 + 5)"),guide="legend")+
  scale_y_continuous(limits=c(0,.8), labels=scales::percent,breaks=seq(0,1,.1) ,name="Chances a day experiences heat category 4 or 5",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
theme(
        legend.background = element_rect(fill="light grey", size=0.5, linetype="solid"),
        plot.title=element_text(family='', face='bold', colour='black', size=16),axis.title.y = element_blank(),axis.title.x = element_blank(),axis.text.x = element_text(angle = 30),legend.direction = "horizontal")+
  ggtitle("Ft Jackson, SC")

##grab the legend from the KMMT plot
heat_cats_legend <- get_legend(KMMT_heat_cats_combined)

##Replot KMMT without legend
KMMT_heat_cats_combined <- ggplot(KMMT_Likelihood_Cat4xCat5.last30)+
    geom_smooth(aes(x=date,y=Likelihood_Hazardous_Heat,color='black'),se=TRUE,fill='dark grey',alpha=0.4)+
  scale_color_identity(name="Likelihood w/ 95%\nconfidence interval",breaks=c("orange","red"),labels=c("Heat Category 4 (Red flag)","Heat Category 5 (Black flag)"),guide="legend")+
  scale_y_continuous(limits=c(0,.8), labels=scales::percent,breaks=seq(0,1,.1) ,name="Chances a day experiences heat category 4 or 5",expand=c(0,0))+
  scale_x_date(date_breaks = "1 month", date_labels = "%B",date_minor_breaks = "1 week", name="Date \n minor breaks = 1 week",limits=as.Date(c("2023-05-01","2023-10-01")),expand=c(0,0))+
  theme_light()+
theme(
        legend.background = element_rect(fill="lightblue", size=0.5, linetype="solid"),legend.direction="horizontal",
        plot.title=element_text(family='', face='bold', colour='black', size=16),axis.title.y = element_blank(),axis.title.x = element_blank(),axis.text.x = element_text(angle = 30),legend.position = "none")+
  ggtitle("Ft Jackson, SC")

top_row.heat_cats <- plot_grid(KMMT_heat_cats_combined,KLSF_heat_cats_combined,rel_widths=c(1,1),align="h")
bottom_row.heat_cats <- plot_grid(KFSI_heat_cats_combined,KTBN_heat_cats_combined,rel_widths=c(1,1))
Heat_cats_combined <- plot_grid(top_row.heat_cats,bottom_row.heat_cats,ncol=1)
Heat_cats_combined <- plot_grid(lead_title,title,Heat_cats_combined,ncol=1,rel_heights = c(0.05,0.02,1))
Heat_cats_combined <- plot_grid(Heat_cats_combined,heat_cats_legend,ncol=1,rel_heights=c(1,0.05))
Heat_cats_combined <- plot_grid(yaxis,Heat_cats_combined,ncol=2,rel_widths = c(0.02,1))
Heat_cats_combined
```

##Quantiles Combined
```{r}


top_row.quantiles <- plot_grid(KTBN_Quantiles_high_WBGT.plot,KMMT_Quantiles_high_WBGT.plot,rel_widths=c(1,1),align="h")
bottom_row.quantiles <- plot_grid(KFSI_Quantiles_high_WBGT.plot,KLSF_Quantiles_high_WBGT.plot,rel_widths=c(1,1))
quantiles_combined <- plot_grid(top_row.quantiles,bottom_row.quantiles,ncol=1)
quantiles_combined
```


##Combined Cautionary and Hazardous 4x2
```{r}
##Make a title for cautionary column
lead_title <- ggdraw() +   draw_label("Last decade compared to the previous 30-year climate average",
    fontface = 'bold', x = 0, hjust = 0, size=18  ) +
  theme(    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 0, 325))

title <- ggdraw() + 
  draw_label(
    "Cautionary Heat Day",
    fontface = 'bold', x = 0, hjust = 0) +
  theme(
    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 0, 100))
##make a title for the hazardous column
h_title <- ggdraw() + 
  draw_label(    "Hazardous Heat Day",
    fontface = 'bold', x = 0,  hjust = 0) +
  theme(
    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 0, 350)  )
##Make a shared y-axis
yaxis <- ggdraw() + 
  draw_label(    "Daily Likelihood of Experiencing Cautionary or Harzardous Heat",
    fontface = 'bold',    angle = 90,    x = 0,    hjust = 0  ) +
  theme(
    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(370, 0, 0, 10)
  )
##Make a shared x-axis
xaxis <- ggdraw() + 
  draw_label(    "Major breaks are months\nminor breaks are weeks",
    fontface = 'bold',    angle = 90,    x = 0,    hjust = 0  ) +
  theme(
    # add margin on the left of the drawing canvas,
    # so title is aligned with left edge of first plot
    plot.margin = margin(0, 0, 0, 10)  )

##Make a cautionary plot column
Cautionary_Column1 <- plot_grid(KMMT_cautionary_combined,KLSF_cautionary.combined,KFSI_cautionary_combined,KTBN_cautionary_combined,ncol=1,rel_heights = c(0.8,0.8,0.8,1))
#####stopped here
Hazardous_Column1 <- plot_grid(KMMT_hazardous_combined,KLSF_hazardous_combined,KFSI_hazardous_combined,KTBN_hazardous_combined,ncol=1,rel_heights = c(0.8,0.8,0.8,1))

Cautionary_SingleC <- plot_grid(title,Cautionary_Column1,cautionary_legend,ncol=1,rel_heights = c(0.03,1,0.1))
Hazardous_SingleC <- plot_grid(h_title,Hazardous_Column1,hazardous_legend,ncol=1,rel_heights = c(0.03,1,0.1))
Cautionary_SingleC.2 <- plot_grid(yaxis,Cautionary_SingleC,ncol=2,rel_widths = c(0.03,1))
Cautionary_SingleC.2

Combined_haz_caut.plot <- plot_grid(Cautionary_SingleC.2,Hazardous_SingleC,ncol=2,rel_widths = c(1.1,1))
Combined_haz_caut.plot2 <- plot_grid(lead_title,Combined_haz_caut.plot,ncol=1,rel_heights = c(0.05,1))
Combined_haz_caut.plot2
```


##Quantile and Percentile Ranking for heat cat hours
```{r}
##Find percentile and quantile info
print(quantile(KLSF_Wrangled$Ave_WBGT.C,probs=c(.999))) # 0.1%
print(quantile(KLSF_Wrangled$Ave_WBGT.C,probs=c(.9999))) # 0.01%
print(quantile(KLSF_Wrangled$Ave_WBGT.C,probs=seq(0,1,1/100))) #by 1% increments

boxplot(KLSF_Wrangled$Ave_WBGT.C)

##find the percentile of the lowest heat cat 4 value
match(Heat_Categories$Category_Temp_Min.C[3],sort(KLSF_Wrangled$Ave_WBGT.C))/length((KLSF_Wrangled$Ave_WBGT.C)+1)

##find out if there are hours in heat cat 4-5 at night
temp <- KMMT_Wrangled%>%
  filter(Ave_WBGT.C>=Heat_Categories$Category_Temp_Min.C[3])%>%
  filter(Hour<9|Hour>20)%>%
  select(Hour,date,Ave_Temp.C,Ave_WBGT.C)
view(temp)  

```


##Create a table with trends data
```{r}
install.packages("reactable")
library(reactable)
library(tidyverse)

Historical_Trends <- rbind(FSOK_Historical_Trends,FJSC_Historical_Trends,FLW_Historical_Trends,FMGA_Historical_Trends)

view(Historical_Trends)

reactable(Historical_Trends,filterable = TRUE, defaultPageSize=20, 
          columns=list(
          Temp.C = colDef(format=colFormat(suffix = " °C",digits=5)),
          WBGT.C = colDef(format=colFormat(suffix = " °C",digits=5)),
          Temp.C_Warm.season = colDef(format=colFormat(suffix = " °C",,digits=5)),
          WBGT.C_Warm.season = colDef(format=colFormat(suffix = " °C",,digits=5))))

write_csv(Historical_Trends,"/Users/erikpatton/Library/Mobile Documents/com~apple~CloudDocs/Duke/Data_Dissertation/Historical_Trends_Table.csv")

```

##Supplemental Figure S3 Regional Anomaly Plots
```{r}
Figure_S3 <- plot_grid(KMMT_Monthly_Anomaly.C.plot,KLSF_Monthly_Anomaly.C.plot,KFSI_Monthly_Anomaly.C.plot,KTBN_Monthly_Anomaly.C.plot,ncol=2)
Figure_S3
```

##Supplemental Figure S4 25-75 percentile mean WBGT
```{r}
Figure_S4 <- plot_grid(KMMT_Quantiles_high_WBGT.plot,KLSF_Quantiles_high_WBGT.plot,KFSI_Quantiles_high_WBGT.plot,KTBN_Quantiles_high_WBGT.plot,ncol=2)
Figure_S4
```