---
title: "TESTsmART Nigeria Analysis"
author: "David Arthur, John Gallis, Yunji Zhou, Liz Turner"
subtitle: "Contents produced in accordance with Statistical Analysis Plan 'TESTsmART Aim 2 SAP Nigeria 2023-06-13 Final.docx'"
output:
  bookdown::word_document2:
    number_sections: false
  word_document: default
  html_document:
    df_print: paged
---

```{r setup, include = FALSE}
knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE, 
                      ft.align = "left", ft.keepnext = TRUE, cache = FALSE)
```


```{r libraries, include = FALSE, cache = F}

# Packages
packages <- c("MASS","gt","gtsummary","tidyverse","haven","flextable","ftExtra","expss",
              "labelled","lme4","lmerTest","sjmisc", "bookdown","foreign", "psych", "LearnPCA",
              "ICCbin", "gee", "geepack", "geesmv", "ggmosaic")

# Install packages not yet installed
installed_packages <- packages %in% rownames(installed.packages())
if (any(installed_packages == FALSE)) {
  install.packages(packages[!installed_packages])
}

# Packages loading
invisible(lapply(packages, library, character.only = TRUE))


```



```{r set paths, include = FALSE}

## set data_path to location of 'TS_Kenya_analytic_dataset.rdata'

data_path <- "________"

```


```{r load-data}

load(paste0(data_path, "/TS_Nigeria_analytic_dataset.rdata"))

data_included <- data_raw_ni %>%
  # For some reason in REDCap, discount was coded as 0="yes" and 1="no", so recode to avoid confusion
  mutate(discount_recoded = case_when(
    discount == 0 ~ 1,
    discount == 1 ~ 0,
    discount == 999 ~ 999
  )) %>%
  mutate(test_pay_cat = as.factor(
    case_when(
      test_pay == 9999 ~ "Don't know",
      near(test_pay, 0) ~ "0",
      near(test_pay, 250) ~ "250",
      test_pay > 250 ~ ">250"
    )
  )) %>%
  mutate(test_pay_cat = fct_relevel(
    test_pay_cat, "0", "40", ">40", "Don't know")) %>%
  # Discount among all included
  mutate(discount_bin = case_when(
    discount_recoded == 1 ~ 1,
    discount_recoded == 0 ~ 0,
    discount_recoded == 999 ~ 0
  )) %>%
  # Discount among ACT purchasers
  mutate(discountACT_bin = case_when(
    discount_recoded == 1 & ACT == 1 ~ 1,
    discount_recoded == 0 & ACT == 1 ~ 0,
    discount_recoded == 999 & ACT == 1 ~ 0
  )) %>%
  # Discount among mRDT positive ACT purchasers
  mutate(discountPos_bin = case_when(
    discount_recoded == 1 & ACT == 1 & test_results == 2 ~ 1,
    discount_recoded == 0 & ACT == 1 & test_results == 2 ~ 0,
    discount_recoded == 999 & ACT == 1 & test_results == 2 ~ 0
  )) %>%
  mutate(act_price_num = ifelse(act_price < 9999, act_price, NA)) %>%
  mutate(act_price_pos_num = ifelse(act_price_pos < 9999, act_price_pos, NA)) %>%
  mutate(act_price_neg_num = ifelse(act_price_neg < 9999, act_price_neg, NA)) %>%
  mutate(act_price_cat = as.factor(
    case_when(
      act_price == 9999 ~ "Don't know",
      near(act_price, 0) ~ "0",
      act_price > 0 & act_price < 650 ~ "1-649",
      act_price >= 650 & act_price <= 850 ~ "650-850",
      act_price > 850 ~ ">850"
    )
  )) %>%
  mutate(act_price_pos_cat = as.factor(
    case_when(
      act_price_pos == 9999 ~ "Don't know",
      near(act_price_pos, 0) ~ "0",
      act_price_pos > 0 & act_price_pos < 650 ~ "1-649",
      act_price_pos >= 650 & act_price_pos <= 850 ~ "650-850",
      act_price_pos > 850 ~ ">850"
    )
  )) %>%
  mutate(act_price_neg_cat = as.factor(
    case_when(
      act_price_neg == 9999 ~ "Don't know",
      near(act_price_neg, 0) ~ "0",
      act_price_neg > 0 & act_price_neg < 650 ~ "1-649",
      act_price_neg >= 650 & act_price_neg <= 850 ~ "650-850",
      act_price_neg > 850 ~ ">850"
    )
  )) %>%
  mutate(act_price_cat = fct_relevel(
    act_price_cat, "0", "1-649", "650-850", ">850", "Don't know")) %>%
  mutate(act_price_pos_cat = fct_relevel(
    act_price_pos_cat, "0", "1-649", "650-850", ">850", "Don't know")) %>%
  mutate(act_price_neg_cat = fct_relevel(
    act_price_neg_cat, "0", "1-649", "650-850", ">850", "Don't know")) %>%
  mutate(act_price_pos_0 = case_when(
    near(act_price_pos, 0) ~ 1,
    act_price_pos > 0 & act_price < 9999 ~ 0
  )) %>%
  mutate(total_spent_na = ifelse(!(total_spent %in% c(999, 9999, 99999)),
                                   total_spent,
                                   NA)) %>%
  set_variable_labels(
    test_pay_cat = "mRDT cost (Naira, categorized)",
    act_price_cat = "ACT cost (Naira, categorized)",
    act_price_pos_cat = "ACT cost among test positive (Naira, categorized)",
    act_price_neg_cat = "ACT cost among test negative/untested (Naira, categorized)",
    act_price_pos_0 = "Free ACT among test positive",
    total_spent_na = "How much did you spend at the pharmacy today? ('Don't know' as missing)"
  ) %>%
  mutate(arm = fct_relevel(arm, "Control")) %>%
  mutate(arm = fct_drop(arm)) 

```



```{r asset-variables, cache = FALSE}

reset_gtsummary_theme()
theme_gtsummary_compact()

data_asset <- data_included %>%
  select(record_id, water:occupation_private_sp) %>%
  mutate(toilet_bin = case_when(
    is.element(toilet, c(5, 7, 8, 555)) ~ 1,
    is.element(toilet, c(2, 3, 4, 6)) ~ 2
  )) %>%
  mutate(floor_bin = case_when(
    is.element(floor, c(3)) ~ 1,
    is.element(floor, c(4, 5, 6, 7, 555)) ~ 2
  )) %>%
  mutate(walls_bin = case_when(
    is.element(walls, c(3, 5, 6)) ~ 1,
    is.element(walls, c(1, 2, 4, 7)) ~ 2
  )) %>%
  mutate(own_house = case_when(
    is.element(house, c(1)) ~ 1,
    is.element(house, c(2)) ~ 0
  )) %>%
  mutate(cows_bin = ifelse(livestock == 0, 0, cows)) %>%
  mutate(sheep_bin = ifelse(livestock == 0, 0, sheep)) %>%
  mutate(goats_bin = ifelse(livestock == 0, 0, goats)) %>%
  set_value_labels(toilet_bin = c("Non-improved latrine" = 1, "Improved latrine" = 2),
                   floor_bin = c("Earthen" = 1, "Hard surface" = 2),
                   walls_bin = c("Porous" = 1, "Non-porous" = 2),
                   cows_bin = c("No" = 0, "Yes" = 1),
                   sheep_bin = c("No" = 0, "Yes" = 1),
                   goats_bin = c("No" = 0, "Yes" = 1))

asset_table <- data_asset %>%
  haven::as_factor() %>%
  select(-c(contains("_sp"), contains("sqft"), school, roof)) %>%
  # replace when confirm what to do with values = 9999 or 999
  mutate(land_acres = ifelse(land_acres >= 999, NA, land_acres)) %>%
  mutate(cows_amt = ifelse(cows_amt >= 999, NA, cows_amt)) %>%
  mutate(sheep_amt = ifelse(sheep_amt >= 999, NA, sheep_amt)) %>%
  mutate(goats_amt = ifelse(goats_amt >= 999, NA, goats_amt)) %>%
  set_variable_labels(land_acres = "Number of acres",
                      cows_amt = "How many cows does your household have?",
                      sheep_amt = "How many sheep does your household have?",
                      goats_amt = "How many goats does your household have?") %>%
  tbl_summary(include = -record_id,
              type = list(all_continuous() ~ "continuous2",
                          all_dichotomous() ~ "categorical"),
              statistic = list(all_continuous2() ~ c("{mean} ({sd})", 
                                                     "{median} [{p25}, {p75}]",
                                                     "{min}, {max}",
                                                     "{N_nonmiss}")),
              missing_text = "Missing") %>%
  add_stat_label()



```


```{r polychoricPCA, cache = FALSE, results = 'hide'}

# Start by excluding variables from above table where <10% or >90% of observations are "Yes".

data_pca <- data_asset %>%
  select(record_id, refrigerator, radio, motorcycle, car, bank, own_house) %>%
  data.frame() %>%
  mutate(across(everything(), ~ na_if(., 999))) %>%
  na.omit()


# Calculate polychoric PCA scores
pc3 <- principal(data_pca %>% select(-record_id), cor = "mixed", nfactors = 1)

# Join PCA scores to other data
data2 <- bind_cols(data_pca, pc3$scores) %>% arrange(PC1, record_id) %>%
  set_variable_labels(PC1 = "Wealth index (continuous)")

```


```{r combining-data}

# Multiple tables
data3 <- data_included %>%
  group_by(shop_ni) %>%
  mutate(cluster_id = cur_group_id()) %>%
  mutate(Interviewees = 1) %>%
  ungroup() %>%
  left_join(data2 %>% select(record_id, contains("_bin"), PC1), by = "record_id") %>%
  mutate(wealth_quintile = as.factor(cut_number(PC1, n = 5, labels = FALSE))) %>%
  mutate(wealth_binary = as.factor(
    case_when(
      is.element(wealth_quintile, c(1, 2)) ~ "0 to 40th",
      is.element(wealth_quintile, c(3, 4, 5)) ~ ">40.0"
    ))) %>%
  mutate(wealth_quintile = fct_recode(wealth_quintile,
                                      "0 to 20th" = "1",
                                      ">20.0 to 40th" = "2",
                                      ">40.0 to 60th" = "3",
                                      ">60.0 to 80th" = "4",
                                      ">80.0" = "5")) %>%
  mutate(school_binary = as.factor(
    case_when(
      is.element(school_2, c(1, 2)) ~ "None/Primary",
      is.element(school_2, c(3, 4, 5, 6)) ~ "Secondary/College/University/Polytechnic"
    ))) %>%
  set_variable_labels(
    # timespan = "Time Period",
    wealth_quintile = "Wealth index (quintile)",
    wealth_binary = "Wealth index (binary)",
    outcome_prim = "ACTs sold to malaria test-positive clients",
    # outcome_prim_sa_al = "ALs sold to malaria test-positive clients",
    # outcome_prim_sa_pre_exp = "ACTs sold to malaria test-positive clients before expansion",
    # outcome_prim_sa_post_exp = "ACTs sold to malaria test-positive clients after expansion",
    # outcome_prim_sa_pre_leaflets = "ACTs sold to malaria test-positive clients before CHW outreach",
    # outcome_prim_sa_post_leaflets = "ACTs sold to malaria test-positive clients after CHW outreach",
    outcome_sec_tested = "Suspected malaria cases that receive a mRDT",
    outcome_sec_tested_no_previous = "Suspected malaria cases that receive a mRDT with no prior test",
    outcome_sec_tested_expanded = "Met inclusion criteria that receive a mRDT",
    outcome_sec_adherence = "Malaria tested clients whose treatment adhered to test results",
    outcome_sec_appropriate = "Suspected malaria cases that are managed appropriately",
    outcome_sec_appropriate_expanded = "Met inclusion criteria that are managed appropriately",
    outcome_sec_ACT_untested = "Untested clients taking ACT",
    # al_expansion = "Post-expansion of approved ALs",
    # leaflets = "Post-deployment of CHWs",
    school_2 = "Education level",
    school_binary = "Education level (binary)") %>%
  haven::as_factor() %>%
  mutate(timespan = as.factor(timespan)) %>%
  set_variable_labels(timespan = "Time Period")

# data3 %>% select(timespan)
# str(data3$timespan)

data3$timespan <- fct_relevel(data3$timespan, c("One", "Two", "Three", "Four")) %>%
  fct_recode("Feb-May 2022" = "One",
             "Jun-Aug 2022" = "Two",
             "Sep-Nov 2022" = "Three",
             "Dec 2022-Feb 2023" = "Four")

# Add suffix to med# variables for purpose of reshaping
rename_function <- function(name) {
  paste0(name, "_type")
}

prices_long <- data3 %>%
  rename_with(rename_function, matches("^med\\d+$")) %>%
  select(record_id, matches("^med\\d+_type"), matches("med\\d+_price"),
         ) %>%
  pivot_longer(cols = c(matches("^med\\d+_type"), matches("med\\d+_price")),
               names_to = c("num", ".value"),
               names_pattern = "med(\\d+)_(\\w*)") %>%
  filter(type %in% c("AL (Lonart/CoArtem/Artefan/Lumartem)",
                     "Other ACT (DHAP, DP, Duocotexin, P-alaxin)")) %>%
  rename(price_line = price) %>%
  set_variable_labels(price_line = "Price (from line listing)")

prices_paid <- prices_long %>%
  distinct(record_id, .keep_all = TRUE) %>%
  mutate(across(price_line, ~ na_if(., 9999))) %>%
  mutate(across(price_line, ~ na_if(., 999))) %>%
  mutate(across(price_line, ~ na_if(., 99999))) %>%
  mutate(free_act = case_when(
    price_line == 0 ~ 1,
    price_line > 0 ~ 0,
  )) %>%
  select(-num) %>%
  set_variable_labels(free_act = "Free ACT (from line listing)")

data4 <- data3 %>%
  mutate(across(c(ACT, case, test), as.factor)) %>%
  mutate(ACT = recode_factor(ACT, "1" = "ACT", "0" = "No ACT")) %>%
  mutate(case = recode_factor(case, "1" = "Suspected case", "0" = "Not suspected case")) %>%
  mutate(test = recode_factor(test, "Yes" = "mRDT", "No" = "No mRDT")) %>%
  left_join(prices_paid, by = "record_id") %>%
  mutate(price_line_pos = ifelse(test_results == "Positive", price_line, NA)) %>%
  mutate(price_line_neg = ifelse(test_results == "Negative", price_line, NA)) %>%
  mutate(price_line_untested = ifelse(test == "No mRDT", price_line, NA)) %>%
  mutate(free_act_pos = ifelse(test_results == "Positive", free_act, NA)) %>%
  set_variable_labels(
    price_line_pos = "Price with Positive mRDT (from line listing)",
    price_line_neg = "Price with Negative mRDT (from line listing)",
    price_line_untested = "Price with no mRDT (from line listing)",
    free_act_pos = "Free ACT with Positive mRDT (from line listing)"
  )

```


## Table 1 (Results paper). Characteristics of participants enrolled in TESTsmART by arm

```{r, ft.keepnext = FALSE}

data_tab1 <- data3 %>%
  mutate(arm = haven::as_factor(arm)) %>%
  mutate(arm = fct_relevel(arm, "Control")) %>%
  set_variable_labels(
    school_2 = "Respondent education level",
    wealth_quintile = "Respondent wealth index (quintile)"
  )

tbl_interviewees <- data_tab1 %>% 
  tbl_summary(include = c(Interviewees, respondent
                          # , respondent_age, client_gender, respondent_gender
                          ),
              by = arm,
              type = list(Interviewees ~ "dichotomous"),
              value = list(Interviewees ~ 1),
              statistic = list(Interviewees ~ "{N}"),
              missing_text = "Missing") %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
  add_overall(last = TRUE, col_label = "**Overall**") %>%
  add_stat_label()

tbl1 <- data_tab1 %>% 
  tbl_summary(include = c(respondent_age, respondent_gender),
              by = arm,
              # type = list(Interviewees ~ "dichotomous"),
              # statistic = list(Interviewees ~ "{N}"),
              missing = "always",
              missing_text = "Missing") %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
  add_overall(last = TRUE, col_label = "**Overall**") %>%
  add_stat_label()

tbl_child <- data_tab1 %>% 
  filter(respondent == "Guardian of child client") %>%
  tbl_summary(include = c(child_age, child_gender),
              by = arm,
              type = list(child_age ~ "continuous2"),
              statistic = list(all_continuous2() ~ c("{mean} ({sd})", 
                                                     "{median} [{p25}, {p75}]",
                                                     "{min}, {max}")),
              missing = "always",
              missing_text = "Missing") %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
  add_overall(last = TRUE, col_label = "**Overall**") %>%
  add_stat_label()

tbl_covariates <- data_tab1 %>% 
  tbl_summary(include = c(school_2, 
                          wealth_quintile
                          ),
              by = arm,
              statistic = list(all_continuous2() ~ c("{mean} ({sd})", 
                                                     "{median} [{p25}, {p75}]",
                                                     "{min}, {max}")),
              missing = "always",
              missing_text = "Missing") %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
  add_overall(last = TRUE, col_label = "**Overall**") %>%
  add_stat_label()

tbl_period <- data_tab1 %>%
  tbl_summary(include = c(timespan),
              by = arm,
              statistic = list(all_continuous2() ~ c("{mean} ({sd})", 
                                                     "{median} [{p25}, {p75}]",
                                                     "{min}, {max}")),
              missing = "always",
              missing_text = "Missing") %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
  add_overall(last = TRUE, col_label = "**Overall**") %>%
  add_stat_label()

tbl1_combined <- tbl_stack(list(tbl_interviewees, tbl1, tbl_covariates, tbl_child, tbl_period))

tbl1_combined %>%
  as_flex_table() %>%
  footnote(i = 1, j = 1,
           value = as_paragraph("Includes only interviewees who consented and met all inclusion criteria."),
           ref_symbols = c("1"),
           part = "body") %>%
  flextable::set_caption('Characteristics of participants enrolled in TESTsmART by arm',
                         autonum = officer::run_autonum(seq_id = "tab", 
                                                        bkm = "Table1")) %>%
  bold(bold = TRUE, part = "header") %>%
  autofit() %>%
  set_table_properties(layout = "autofit") %>%
  font(fontname = "Calibri", part = "all") %>%
  fontsize(size = 10, part = "all") %>%
  theme_zebra() %>% 
  align(j = -1, align = "center", part = "all")

```

# Summary of Numerators and Denominators for Primary and Secondary Outcomes
 
| Outcome | Numerator | Denominator |
|:--------|:----------|:------------|
| Primary - ACT purchased by test positive | tested positive and purchased ACT | purchased ACT |
| Secondary 1 - Use of mRDT | mRDT tested | suspected malaria |
| Secondary 2 - Adherence to mRDT result | (mRDT+ and ACT) OR (mRDT- and no AM) | tested |
| Secondary 3 - Appropriate case management | (mRDT+ and ACT) OR (mRDT- and no AM) | suspected malaria |
| Secondary 4 - ACT use among untested | purchased ACT and untested | untested |



```{r Calculate-proportions-and-sizes}

get_size <- function(outcome){
  
  data3 %>%
    group_by(arm, shop_ni) %>%
    # filter(shop_ni == shop) %>%
    filter(!is.na(.data[[outcome]])) %>%
    summarize(cluster_size = n(), 
              numerator = sum(.data[[outcome]]),
              prop = mean(.data[[outcome]])) %>%
    ungroup()
  
}

outcomes <- data3 %>% 
  select(all_of(contains("outcome_"))) %>%
  select(-outcome_sec_adherence_else) %>%
  names()

outcome_sizes <- data.frame()

for(outcome in outcomes){
  new_df <- data.frame(outcome = as.factor(outcome)) %>%
    bind_cols(get_size(outcome))
  outcome_sizes <- bind_rows(outcome_sizes, new_df)
}

outcome_sizes <- outcome_sizes %>%
  mutate(outcome_full = dplyr::recode(outcome,
      "outcome_prim" = "Primary",
      "outcome_prim_sa_al" = "Primary - restricted to AL",
      "outcome_prim_sa_pre_leaflets" = "Primary - before leaflet distribution",
      "outcome_prim_sa_post_leaflets" = "Primary - after leaflet distribution",
      "outcome_sec_tested" = "Tested (denom = suspected)",
      "outcome_sec_tested_no_previous" = "Tested - no prior test",
      "outcome_sec_tested_expanded" = "Tested - expanded denom",
      "outcome_sec_adherence" = "Adherence (denom = tested)",
      "outcome_sec_appropriate" = "Approp manage (denom = suspected)",
      "outcome_sec_appropriate_expanded" = "Approp manage - expanded denom",
      "outcome_sec_ACT_untested" = "ACT use untested (denom = untested)"
  ))


```

\newpage

# GEE Analysis - Full table for each outcome

For each outcome, we estimated risk ratios using the modified Poisson approach with log link. To estimate risk differences, we first tried the modified Poisson approach with identity link. In cases where that model failed to converge, we used the same model, but with the binary versions of education level and wealth index rather than quintiles. In cases where that model also failed to converge, we used a binomial distribution with identity link, with education level and wealth index divided into quintiles. In cases where that model also failed to converge, we used a binomial distribution with identity link, with the binary versions of education level and wealth index. In cases where that model also failed to converge, we used a normal distribution with identity link, with education level and wealth index divided into quintiles.

The tables below indicate the model used for each outcome, determined by convergence results and the criteria outlined in the previous paragraph.


```{r GEE-Prim, results = 'hide', cache = F}


reset_gtsummary_theme()
theme_gtsummary_journal(journal = "jama")
theme_gtsummary_compact()

# Get n/N (%) for each arm
prop_prim <- data3 %>% 
  select(outcome_prim, arm) %>%
  na.omit() %>%
  group_by(arm) %>%
  summarize(n = sum(outcome_prim),
            N = n()) %>%
  mutate(prop_num = n / N) %>%
  mutate(prop = paste0(n,
                       "/",
                       N,
                       " (",
                       signif(100 * prop_num, digits = 2), "%)"))
  
# Data must be sorted by cluster id for gee, geeglm, and geesmv functions
data_gee <- data3 %>%
  arrange(shop_ni) %>%
  mutate(period_quadratic = period^2) %>%
  mutate(period_cubic = period^3) %>%
  mutate(arm = as.factor(arm)) %>%
  mutate(arm = fct_relevel(arm, "Control"))

# levels(data_gee$arm)


# Independent variables for minimally adjusted model
min_adjusted_vector <- c("arm", "period", "period_quadratic", "period_cubic", "quadrant")
# Independent variables for fully adjusted model
fully_adjusted_vector <-  c("arm", "period", "period_quadratic", "period_cubic",
                            "client_gender", "client_age", "school_2", "wealth_quintile",
                            "quadrant")
# Independent variables for fully adjusted model (wealth as binary when quintiles won't converge)
fully_adjusted_2_vector <-  c("arm", "period", "period_quadratic", "period_cubic",
                            "client_gender", "client_age", "school_binary", "wealth_binary",
                            "quadrant")

# Look at number of observations with missing values for covariates in fully adjusted model
data_gee %>% select(all_of(fully_adjusted_vector)) %>% map(~sum(is.na(.)))

# Create strings to use in model formulas
min_adjusted <- paste0(min_adjusted_vector, collapse = " + ")
fully_adjusted <- paste0(fully_adjusted_vector, collapse = " + ")
fully_adjusted_2 <- paste0(fully_adjusted_2_vector, collapse = " + ")

# Create datasets with no missing values in outcome variable or covariates
data_gee_prim <- data_gee %>%
  dplyr::select(outcome_prim, cluster_id, all_of(min_adjusted_vector)) %>%
  drop_na() %>%
  mutate(arm = dplyr::recode(arm,
                             "CDPD" = paste("CDPD, ", prop_prim[prop_prim$arm == "CDPD", ]$prop), 
                             "Control" = paste("Control, ", prop_prim[prop_prim$arm == "Control", ]$prop), 
                             "CD" = paste("CD, ", prop_prim[prop_prim$arm == "CD", ]$prop))) %>%
  set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>%
  as.data.frame()

data_gee_prim_fully <- data_gee %>%
  dplyr::select(outcome_prim, cluster_id, all_of(fully_adjusted_vector)) %>%
  drop_na() %>%
  mutate(arm = dplyr::recode(arm,
                             "CDPD" = paste("CDPD, ", prop_prim[prop_prim$arm == "CDPD", ]$prop), 
                             "Control" = paste("Control, ", prop_prim[prop_prim$arm == "Control", ]$prop), 
                             "CD" = paste("CD, ", prop_prim[prop_prim$arm == "CD", ]$prop))) %>%
  set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>%
  as.data.frame()


# Log link
# Minimally adjusted
fit_geepack_prim <- geeglm(as.formula(paste0("outcome_prim ~ ", min_adjusted)),
                           id = cluster_id, 
                           data = data_gee_prim,
                           family = poisson,
                           corstr = "independence")

# Fully adjusted
fit_geepack_prim_fully <- geeglm(as.formula(paste0("outcome_prim ~ ", fully_adjusted)),
                           id = cluster_id, 
                           data = data_gee_prim_fully,
                           family = poisson,
                           corstr = "independence")


# Identity link

# Minimally adjusted (doesn't converge using Poisson distribution)
# fit_geepack_prim_id <- geeglm(as.formula(paste0("outcome_prim ~ ", min_adjusted)),
#                            id = cluster_id, 
#                            data = data_gee_prim,
#                            family = poisson(link = "identity"),
#                            corstr = "independence")

# fit_geepack_prim_id <- geeglm(as.formula(paste0("outcome_prim ~ ", min_adjusted)),
#                            id = cluster_id, 
#                            data = data_gee_prim,
#                            family = binomial(link = "identity"),
#                            corstr = "independence")

fit_geepack_prim_id <- geeglm(as.formula(paste0("outcome_prim ~ ", min_adjusted)),
                           id = cluster_id, 
                           data = data_gee_prim,
                           family = gaussian(link = "identity"),
                           corstr = "independence")

# Fully adjusted (doesn't converge using Poisson distribution)
# fit_geepack_prim_fully_id <- geeglm(as.formula(paste0("outcome_prim ~ ", fully_adjusted)),
#                            id = cluster_id, 
#                            data = data_gee_prim_fully,
#                            family = poisson(link = "identity"),
#                            corstr = "independence")

# fit_geepack_prim_fully_id <- geeglm(as.formula(paste0("outcome_prim ~ ", fully_adjusted_2)),
#                            id = cluster_id,
#                            data = data_gee_prim_fully,
#                            family = poisson(link = "identity"),
#                            corstr = "independence")

# fit_geepack_prim_fully_id <- geeglm(as.formula(paste0("outcome_prim ~ ", fully_adjusted_2)),
#                            id = cluster_id,
#                            data = data_gee_prim_fully,
#                            family = binomial(link = "identity"),
#                            corstr = "independence")

fit_geepack_prim_fully_id <- geeglm(as.formula(paste0("outcome_prim ~ ", fully_adjusted)),
                           id = cluster_id, 
                           data = data_gee_prim_fully,
                           family = gaussian(link = "identity"),
                           corstr = "independence")


# str(summary_prim)
# prop_prim[prop_prim$arm == "CDPD", ]$prop
```




```{r Models-used}

models_used <- outcome_sizes %>% 
  distinct(outcome_full) %>%
  filter(!str_detect(outcome_full, "after")) %>%
  mutate(outcome_full = str_replace(outcome_full, "before", "before/after")) %>%
  mutate(outcome_full = str_remove(outcome_full, "\\(.*")) %>%
  mutate(RR_min = "Poisson",
         RR_fully = c("Poisson",
                      "Poisson",
                      "Poisson",
                      
                      "Poisson (wealth and school binary)",
                      "Poisson",
                      "Poisson (wealth and school binary)",
                      
                      "Poisson",
                      
                      "Poisson",
                      "Poisson",
                      
                      "Poisson"
                      ),
         RD_min = c("Normal",
                    "Normal",
                    "Normal",
                    
                    "Poisson",
                    "Poisson",
                    "Poisson",
                    
                    "Poisson",
                    
                    "Binomial",
                    "Binomial",
                    
                    "Poisson"
                    ),
         RD_fully = c("Normal",
                      "Normal",
                      "Normal",
                      
                      "Binomial (wealth and school binary)",
                      "Normal",
                      "Binomial (wealth and school binary)",
                      
                      "Normal",
                      
                      "Normal",
                      "Normal",
                      
                      "Poisson"
                      ))

models_used %>%
  # select(-number) %>%
  flextable() %>%
  set_header_labels(values = list(
    outcome_full = "Outcome",
    RR_min = "Risk Ratio Minimally Adjusted",
    RR_fully = "Risk Ratio Fully Adjusted",
    RD_min = "Risk Difference Minimally Adjusted",
    RD_fully = "Risk Difference Fully Adjusted")) %>%
  flextable::set_caption('Distributions used in GEE models for each outcome.',
                         autonum = officer::run_autonum(seq_id = "tab", 
                                                        bkm = "ModelsUsed")) %>%
  bold(bold = TRUE, part = "header") %>%
  autofit() %>%
  set_table_properties(layout = "autofit") %>%
  font(fontname = "Calibri", part = "all") %>%
  fontsize(size = 10, part = "all")



```


&nbsp;<br>


```{r Model-vars}

vars_1 <- data.frame(min = min_adjusted_vector) %>%
  mutate(number = row_number())

vars_2 <- data.frame(full = fully_adjusted_vector) %>%
  mutate(number = row_number())

vars_3 <- data.frame(full_bin = fully_adjusted_2_vector) %>%
  mutate(number = row_number())
         
vars_per_model <- full_join(vars_1, vars_2, by = "number") %>%
  full_join(vars_3, by = "number") %>%
  set_variable_labels(min = "Minimally Adjusted",
                      full = "Fully Adjusted",
                      full_bin = "Fully Adjusted (binary)") %>%
  mutate(across(everything(), str_replace, "arm", "Arm"),
         across(everything(), str_replace, "period$", "Time period (month)"),
         across(everything(), str_replace, "period_quadratic", "Time period squared"),
         across(everything(), str_replace, "period_cubic", "Time period cubed"),
         across(everything(), str_replace, "quadrant", "Quadrant"),
         across(everything(), str_replace, "client_gender", "Client gender"),
         across(everything(), str_replace, "client_age", "Client age"),
         across(everything(), str_replace, "school_2$", "Level of schooling (quintile)"),
         across(everything(), str_replace, "wealth_quintile", "Wealth (quintile)"),
         across(everything(), str_replace, "school_binary", "Level of schooling (binary)"),
         across(everything(), str_replace, "wealth_binary", "Wealth (binary)"))

# expss::use_labels(
#   data = vars_per_model 
#   # %>%
#   #   select(-number) %>%
#   #   data.frame()
#   ,
#   expr = {
#     flextable(..data) %>%
#       flextable::set_caption('Variables included in each model.',
#                          autonum = officer::run_autonum(seq_id = "tab", 
#                                                         bkm = "ModelVars"))
#   }
# )

vars_per_model %>%
  select(-number) %>%
  flextable() %>%
  set_header_labels(values = list(
    min = "Minimally Adjusted",
    full = "Fully Adjusted",
    full_bin = "Fully Adjusted (binary)"
  )) %>%
  flextable::set_caption('Variables included in each model.',
                         autonum = officer::run_autonum(seq_id = "tab", 
                                                        bkm = "ModelVars")) %>%
  footnote(i = 1,
           j = 3,
           value = as_paragraph(
             c("used in cases where fully adjusted model did not converge otherwise")),
           part = "header",
           ref_symbols = c("*")) %>%
  bold(bold = TRUE, part = "header") %>%
  autofit() %>%
  set_table_properties(layout = "autofit") %>%
  font(fontname = "Calibri", part = "all") %>%
  fontsize(size = 10, part = "all")

```



```{r Define-functions-CI-t-dist}

# Functions to calculate t-based confidence interval and p-value

# for calculating degrees of freedom penalty, only include cluster-level covariates and intercept (p = 6 for most models: "intercept", "armCD", "armControl", "period", "period_quadratic", "period_cubic", "quadrant"; set p in function call for models where p != 7)

my_tidy_rd_p6 <- function(x, exponentiate = FALSE, conf.level = 0.95, p = 6, ...) {
  covariates <- broom::tidy(x, 
                            exponentiate = exponentiate, 
                            conf.int = FALSE)
  summary_1 <- broom::glance(x,
                             exponentiate = exponentiate,
                             conf.int = FALSE)
  I <- summary_1$n.clusters

  covariates %>%
    # calculate the confidence intervals, and save them in a tibble
    mutate(conf.low = estimate - qt(0.975, df = I - p) * std.error,
           conf.high = estimate + qt(0.975, df = I - p) * std.error) %>%
    # calculate t-based p-value
    mutate(p.value = 2 * pt(abs(estimate) / std.error, 
                            df = I - p, lower.tail = FALSE))

}

my_tidy_rr_p6 <- function(x, exponentiate = TRUE, conf.level = 0.95, p = 6, ...) {
  covariates <- broom::tidy(x, 
                            exponentiate = exponentiate, 
                            conf.int = FALSE)
  summary_1 <- broom::glance(x,
                             exponentiate = exponentiate,
                             conf.int = FALSE)
  I <- summary_1$n.clusters

  covariates %>%
    # calculate t-based confidence intervals
    mutate(conf.low = estimate * exp(-qt(0.975, df = I - p) * std.error),
           conf.high = estimate * exp(qt(0.975, df = I - p) * std.error)) %>%
    # calculate t-based p-value
    mutate(p.value = 2 * pt(abs(log(estimate)) / std.error, 
                            df = I - p, lower.tail = FALSE))
}

# Revise for Nigeria SAs

# for calculating degrees of freedom penalty, only include cluster-level covariates and intercept (p = 8 for AL expansion and leaflet distribution models: "intercept", "armCDPD", "period", "period_quadratic", "period_cubic", "quadrant", "leaflets", "leaflet:arm" interaction term)

my_tidy_rd_p8 <- function(x, exponentiate = FALSE, conf.level = 0.95, p = 8, ...) {
  covariates <- broom::tidy(x, 
                            exponentiate = exponentiate, 
                            conf.int = FALSE)
  summary_1 <- broom::glance(x,
                             exponentiate = exponentiate,
                             conf.int = FALSE)
  I <- summary_1$n.clusters

  covariates %>%
    # calculate the confidence intervals, and save them in a tibble
    mutate(conf.low = estimate - qt(0.975, df = I - p) * std.error,
           conf.high = estimate + qt(0.975, df = I - p) * std.error) %>%
    # calculate t-based p-value
    mutate(p.value = 2 * pt(abs(estimate) / std.error, 
                            df = I - p, lower.tail = FALSE))

}

my_tidy_rr_p8 <- function(x, exponentiate = TRUE, conf.level = 0.95, p = 8, ...) {
  covariates <- broom::tidy(x, 
                            exponentiate = exponentiate, 
                            conf.int = FALSE)
  summary_1 <- broom::glance(x,
                             exponentiate = exponentiate,
                             conf.int = FALSE)
  I <- summary_1$n.clusters

  covariates %>%
    # calculate t-based confidence intervals
    mutate(conf.low = estimate * exp(-qt(0.975, df = I - p) * std.error),
           conf.high = estimate * exp(qt(0.975, df = I - p) * std.error)) %>%
    # calculate t-based p-value
    mutate(p.value = 2 * pt(abs(log(estimate)) / std.error, 
                            df = I - p, lower.tail = FALSE))
}

```


```{r Define-function-PoisBCV}
#############################################
# poisson GEE
# Bias-corrected Variance
#############################################

#############################################
# Input
# y: N by 1 vector of outcomes
# X: N by p design matrix (including intercept)
# beta: p by 1 mean model parameter estimates
# alpha: 1 by 1 correlation parameter estimate
# phi: 1 by 1 dispersion parameter estimate
# id: N by 1 cluster indicator
# link: Link function - 'logit', 'log', 'identity'
#############################################

PoisBCV=function(y,X,beta,alpha,phi,id,link){
  
  # Creates two vectors that have the start and end points for each cluster
  BEGINEND=function(n){
    last=cumsum(n)
    first=last-n+1
    return(cbind(first,last))
  }
  
  # Calculate the inverse for symmetric and positive finite matrix
  FINDINV=function(A){
    require(MASS)
    AHALF=chol(A)
    GINV=ginv(AHALF)
    AINV=tcrossprod(GINV)
    return(AINV)
  }
  
  # Score function
  SCORE=function(beta,alpha,phi,y,X,n,p){
    U=rep(0,p)
    UUtran=Ustar=matrix(0,p,p)
    locx=BEGINEND(n)
    
    for(i in 1:length(n)){
      X_c=X[locx[i,1]:locx[i,2],,drop=FALSE]
      y_c=y[locx[i,1]:locx[i,2]]
      
      U_c=rep(0,p)
      Ustar_c=matrix(0,p,p)
      
      if (link=='log') {
        mu_c=exp(c(X_c%*%beta))
        C=X_c*mu_c
      } else if (link=='identity') {
        mu_c=c(X_c%*%beta)
        C=X_c
      }
      
      A=y_c-mu_c
      INVR=diag(1/(1-alpha),n[i])-matrix(alpha/((1-alpha)*(1-alpha+n[i]*alpha)),n[i],n[i])
      INVB=diag(1/sqrt(mu_c),n[i]) %*% INVR %*% diag(1/sqrt(mu_c),n[i])/phi
      
      U_c=t(C)%*%INVB%*%A
      UUtran_c=tcrossprod(U_c)
      Ustar_c=t(C)%*%INVB%*%C
      U=U+U_c
      UUtran=UUtran+UUtran_c
      Ustar=Ustar+Ustar_c
    }
    return(list(U=U,UUtran=UUtran,Ustar=Ustar))
  }
  
  # Compute (A - mm`)^{-1}c without performing the inverse directly
  INVBIG=function(ainvc,ainvm,m,c,start,end){
    for(i in start:end){
      b=ainvm[,i]
      bt=t(b)
      btm=bt%*%m
      btmi=btm[,i]
      gam=1-btmi
      bg=b/gam
      ainvc=ainvc+bg%*%(bt%*%c)
      if(i<end){
        ainvm=ainvm+bg%*%btm
      }
    }
    return(ainvc)
  }
  
  # Creates bias-corrected covariance matrix of beta
  p=ncol(X)
  n=as.numeric(table(id))
  SCORE_RES=SCORE(beta,alpha,phi,y,X,n,p)
  U=SCORE_RES$U
  UUtran=SCORE_RES$UUtran
  Ustar=SCORE_RES$Ustar
  
  # Naive or Model-based estimator
  naive=FINDINV(Ustar)
  
  # BC0 or usual Sandwich estimator     
  robust=naive%*%UUtran%*%t(naive)
  
  # new commands to compute INV(I - H1)
  eigenRES1=eigen(naive)
  evals1=eigenRES1$values
  evecs1=eigenRES1$vectors
  sqrevals1=sqrt(evals1)
  sqe1=evecs1%*%diag(sqrevals1)
  
  # Bias-corrected variance
  Ustar_c_array=UUtran_c_array=array(0,c(p,p,length(n)))
  UUtran=UUbc=UUbc2=UUbc3=Ustar=matrix(0,p,p)
  
  locx=BEGINEND(n)
  
  for(i in 1:length(n)){
    X_c=X[locx[i,1]:locx[i,2],,drop=FALSE]
    y_c=y[locx[i,1]:locx[i,2]]
    if (link=='log') {
      mu_c=exp(c(X_c%*%beta))
    } else if (link=='identity') {
      mu_c=c(X_c%*%beta)
    }
    
    U_i=U_c=rep(0,p)
    Ustar_c=matrix(0,p,p)
    
    # commands for beta
    if (link=='log') {
      C=X_c*mu_c
    } else if (link=='identity') {
      C=X_c
    }
    A=y_c-mu_c
    INVR=diag(1/(1-alpha),n[i])-matrix(alpha/((1-alpha)*(1-alpha+n[i]*alpha)),n[i],n[i])
    INVB=diag(1/sqrt(mu_c),n[i]) %*% INVR %*% diag(1/sqrt(mu_c),n[i])/phi
    U_i=t(C)%*%INVB%*%A
    
    # commands for generalized inverse - beta
    ai1=INVB
    mm1=C%*%sqe1
    ai1A=ai1%*%A
    ai1m1=ai1%*%mm1
    ai1A=INVBIG(ai1A,ai1m1,mm1,A,1,p)
    U_c=t(C)%*%ai1A
    
    Ustar_c=t(C)%*%INVB%*%C
    Ustar=Ustar+Ustar_c
    UUtran_c=tcrossprod(U_i)
    UUtran=UUtran+UUtran_c
    UUbc_c=tcrossprod(U_c)
    UUbc=UUbc+UUbc_c
    UUbc_ic=tcrossprod(U_c,U_i)
    UUbc2=UUbc2+UUbc_ic
    
    Ustar_c_array[,,i]=Ustar_c
    UUtran_c_array[,,i]=UUtran_c
  }
  
  # calculating adjustment factor for BC3
  for(i in 1:length(n)){      
    Hi=diag(1/sqrt(1-pmin(0.75,c(diag(Ustar_c_array[,,i]%*%naive)))))
    UUbc3=UUbc3+Hi%*%UUtran_c_array[,,i]%*%Hi
  }
  
  # BC1 or Variance estimator due to Kauermann and Carroll (2001);
  varKC=naive%*%(UUbc2+t(UUbc2))%*%t(naive)/2
  
  # BC2 or Variance estimator due to Mancl and DeRouen (2001);
  varMD=naive%*%UUbc%*%t(naive)
  
  # BC3 or Variance estimator due to Fay and Graubard (2001);
  varFG=naive%*%UUbc3%*%t(naive)
  
  # BC4 or Variance estimator due to Morel, Bokossa, and Neerchal (2003);
  varMBN=(sum(n)-1)*length(n)/((sum(n)-p)*(length(n)-1))*robust+
    (min(0.5, p/(length(n)-p))*max(1,sum(diag(naive%*%((sum(n)-1)*length(n)/((sum(n)-p)*(length(n)-1))*UUtran)))/p))*t(naive)
  
  #############################################
  # Output
  # naive: naive or model-based var
  # robust: robust sandwich var
  # varMD: bias-corrected sandwich var due to Mancl and DeRouen (2001)
  # varKC: bias-corrected sandwich var due to Kauermann and Carroll (2001)
  # varFG: bias-corrected sandwich var due to Fay and Graubard (2001)
  # varMBN: bias-corrected sandwich var due to Morel, Bokossa, and Neerchal (2003)
  #############################################
  bSE=sqrt(diag(naive))
  bSEBC0=sqrt(diag(robust))
  bSEBC1=sqrt(diag(varKC))
  bSEBC2=sqrt(diag(varMD))
  bSEBC3=sqrt(diag(varFG))
  bSEBC4=sqrt(diag(varMBN))
  
  outbeta=cbind(beta,bSE,bSEBC0,bSEBC1,bSEBC2,bSEBC3,bSEBC4)
  colnames(outbeta)<-c("Estimate","MB-stderr","BC0-stderr","BC1-stderr","BC2-stderr","BC3-stderr","BC4-stderr")
  
  #return(list(naive=naive,robust=robust,varMD=varMD,varKC=varKC,varFG=varFG,varMBN=varMBN))
  return(list(outbeta=outbeta))
}

```

```{r Define-function-BinBCV}
#############################################
# binomial GEE
# Bias-corrected Variance
#############################################

#############################################
# Input
# y: N by 1 vector of outcomes
# X: N by p design matrix (including intercept)
# beta: p by 1 mean model parameter estimates
# alpha: 1 by 1 correlation parameter estimate
# phi: 1 by 1 dispersion parameter estimate
# id: N by 1 cluster indicator
# link: Link function - 'logit', 'log', 'identity'
#############################################

BinBCV=function(y,X,beta,alpha,phi,id,link){
  
  # Creates two vectors that have the start and end points for each cluster
  BEGINEND=function(n){
    last=cumsum(n)
    first=last-n+1
    return(cbind(first,last))
  }
  
  # Calculate the inverse for symmetric and positive finite matrix
  FINDINV=function(A){
    require(MASS)
    AHALF=chol(A)
    GINV=ginv(AHALF)
    AINV=tcrossprod(GINV)
    return(AINV)
  }
  
  # Score function
  SCORE=function(beta,alpha,phi,y,X,n,p){
    U=rep(0,p)
    UUtran=Ustar=matrix(0,p,p)
    locx=BEGINEND(n)
    
    for(i in 1:length(n)){
      X_c=X[locx[i,1]:locx[i,2],,drop=FALSE]
      y_c=y[locx[i,1]:locx[i,2]]
      
      U_c=rep(0,p)
      Ustar_c=matrix(0,p,p)
      
      if (link=='logit') {
        mu_c=1/(1+exp(c(-X_c%*%beta)))
        C=X_c*(mu_c*(1-mu_c))
      } else if (link=='log') {
        mu_c=exp(c(X_c%*%beta))
        C=X_c*mu_c
      } else if (link=='identity') {
        mu_c=c(X_c%*%beta)
        C=X_c
      }
      
      A=y_c-mu_c
      INVR=diag(1/(1-alpha),n[i])-matrix(alpha/((1-alpha)*(1-alpha+n[i]*alpha)),n[i],n[i])
      INVB=diag(1/sqrt(mu_c*(1-mu_c)),n[i]) %*% INVR %*% diag(1/sqrt(mu_c*(1-mu_c)),n[i])/phi
      
      U_c=t(C)%*%INVB%*%A
      UUtran_c=tcrossprod(U_c)
      Ustar_c=t(C)%*%INVB%*%C
      U=U+U_c
      UUtran=UUtran+UUtran_c
      Ustar=Ustar+Ustar_c
    }
    return(list(U=U,UUtran=UUtran,Ustar=Ustar))
  }
  
  # Compute (A - mm`)^{-1}c without performing the inverse directly
  INVBIG=function(ainvc,ainvm,m,c,start,end){
    for(i in start:end){
      b=ainvm[,i]
      bt=t(b)
      btm=bt%*%m
      btmi=btm[,i]
      gam=1-btmi
      bg=b/gam
      ainvc=ainvc+bg%*%(bt%*%c)
      if(i<end){
        ainvm=ainvm+bg%*%btm
      }
    }
    return(ainvc)
  }
  
  # Creates bias-corrected covariance matrix of beta
  p=ncol(X)
  n=as.numeric(table(id))
  SCORE_RES=SCORE(beta,alpha,phi,y,X,n,p)
  U=SCORE_RES$U
  UUtran=SCORE_RES$UUtran
  Ustar=SCORE_RES$Ustar
  
  # Naive or Model-based estimator
  naive=FINDINV(Ustar)
  
  # BC0 or usual Sandwich estimator     
  robust=naive%*%UUtran%*%t(naive)
  
  # new commands to compute INV(I - H1)
  eigenRES1=eigen(naive)
  evals1=eigenRES1$values
  evecs1=eigenRES1$vectors
  sqrevals1=sqrt(evals1)
  sqe1=evecs1%*%diag(sqrevals1)
  
  # Bias-corrected variance
  Ustar_c_array=UUtran_c_array=array(0,c(p,p,length(n)))
  UUtran=UUbc=UUbc2=UUbc3=Ustar=matrix(0,p,p)
  
  locx=BEGINEND(n)
  
  for(i in 1:length(n)){
    X_c=X[locx[i,1]:locx[i,2],,drop=FALSE]
    y_c=y[locx[i,1]:locx[i,2]]
    if (link=='logit') {
      mu_c=1/(1+exp(c(-X_c%*%beta)))
    } else if (link=='log') {
      mu_c=exp(c(X_c%*%beta))
    } else if (link=='identity') {
      mu_c=c(X_c%*%beta)
    }
    
    U_i=U_c=rep(0,p)
    Ustar_c=matrix(0,p,p)
    
    # commands for beta
    if (link=='logit') {
      C=X_c*(mu_c*(1-mu_c))
    } else if (link=='log') {
      C=X_c*mu_c
    } else if (link=='identity') {
      C=X_c
    }
    A=y_c-mu_c
    INVR=diag(1/(1-alpha),n[i])-matrix(alpha/((1-alpha)*(1-alpha+n[i]*alpha)),n[i],n[i])
    INVB=diag(1/sqrt(mu_c*(1-mu_c)),n[i]) %*% INVR %*% diag(1/sqrt(mu_c*(1-mu_c)),n[i])/phi
    U_i=t(C)%*%INVB%*%A
    
    # commands for generalized inverse - beta
    ai1=INVB
    mm1=C%*%sqe1
    ai1A=ai1%*%A
    ai1m1=ai1%*%mm1
    ai1A=INVBIG(ai1A,ai1m1,mm1,A,1,p)
    U_c=t(C)%*%ai1A
    
    Ustar_c=t(C)%*%INVB%*%C
    Ustar=Ustar+Ustar_c
    UUtran_c=tcrossprod(U_i)
    UUtran=UUtran+UUtran_c
    UUbc_c=tcrossprod(U_c)
    UUbc=UUbc+UUbc_c
    UUbc_ic=tcrossprod(U_c,U_i)
    UUbc2=UUbc2+UUbc_ic
    
    Ustar_c_array[,,i]=Ustar_c
    UUtran_c_array[,,i]=UUtran_c
  }
  
  # calculating adjustment factor for BC3
  for(i in 1:length(n)){      
    Hi=diag(1/sqrt(1-pmin(0.75,c(diag(Ustar_c_array[,,i]%*%naive)))))
    UUbc3=UUbc3+Hi%*%UUtran_c_array[,,i]%*%Hi
  }
  
  # BC1 or Variance estimator due to Kauermann and Carroll (2001);
  varKC=naive%*%(UUbc2+t(UUbc2))%*%t(naive)/2
  
  # BC2 or Variance estimator due to Mancl and DeRouen (2001);
  varMD=naive%*%UUbc%*%t(naive)
  
  # BC3 or Variance estimator due to Fay and Graubard (2001);
  varFG=naive%*%UUbc3%*%t(naive)
  
  # BC4 or Variance estimator due to Morel, Bokossa, and Neerchal (2003);
  varMBN=(sum(n)-1)*length(n)/((sum(n)-p)*(length(n)-1))*robust+
    (min(0.5, p/(length(n)-p))*max(1,sum(diag(naive%*%((sum(n)-1)*length(n)/((sum(n)-p)*(length(n)-1))*UUtran)))/p))*t(naive)
  
  #############################################
  # Output
  # naive: naive or model-based var
  # robust: robust sandwich var
  # varMD: bias-corrected sandwich var due to Mancl and DeRouen (2001)
  # varKC: bias-corrected sandwich var due to Kauermann and Carroll (2001)
  # varFG: bias-corrected sandwich var due to Fay and Graubard (2001)
  # varMBN: bias-corrected sandwich var due to Morel, Bokossa, and Neerchal (2003)
  #############################################
  bSE=sqrt(diag(naive))
  bSEBC0=sqrt(diag(robust))
  bSEBC1=sqrt(diag(varKC))
  bSEBC2=sqrt(diag(varMD))
  bSEBC3=sqrt(diag(varFG))
  bSEBC4=sqrt(diag(varMBN))
  
  outbeta=cbind(beta,bSE,bSEBC0,bSEBC1,bSEBC2,bSEBC3,bSEBC4)
  colnames(outbeta)<-c("Estimate","MB-stderr","BC0-stderr","BC1-stderr","BC2-stderr","BC3-stderr","BC4-stderr")
  
  #return(list(naive=naive,robust=robust,varMD=varMD,varKC=varKC,varFG=varFG,varMBN=varMBN))
  return(list(outbeta=outbeta))
}

```


```{r GEE-Prim-kc, cache = FALSE, results = 'hide'}

# Small sample corrections

# Log link

# Minimally adjusted
fit_PoisBCV_prim <- PoisBCV(
  y = data_gee_prim$outcome_prim,
  X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_prim),
  beta <- fit_geepack_prim$coefficients,
  alpha <- 0, # Independent working correlation
  phi <- 1,
  id <- data_gee_prim$cluster_id,
  link <- "log"
  )

fit_geepack_prim_smv <- fit_geepack_prim

# Replace robust variances with small sample corrected variances (KC)
# diag(fit_geepack_prim_smv$geese$vbeta) <- fit_kc_prim$cov.beta

diag(fit_geepack_prim_smv$geese$vbeta) <- (fit_PoisBCV_prim$outbeta[, "BC1-stderr"])^2


# Fully adjusted
fit_PoisBCV_prim_fully <- PoisBCV(
  y = data_gee_prim_fully$outcome_prim,
  X <- model.matrix(as.formula(paste0("~ ", fully_adjusted)), data_gee_prim_fully),
  beta <- fit_geepack_prim_fully$coefficients,
  alpha <- 0, # Independent working correlation
  phi <- 1,
  id <- data_gee_prim_fully$cluster_id,
  link <- "log"
  )


fit_geepack_prim_fully_smv <- fit_geepack_prim_fully

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_prim_fully_smv$geese$vbeta) <- (fit_PoisBCV_prim_fully$outbeta[, "BC1-stderr"])^2

# Identity link 

# Minimally adjusted (doesn't converge)
# fit_PoisBCV_prim_id <- PoisBCV(
#   y = data_gee_prim$outcome_prim,
#   X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_prim),
#   beta <- fit_geepack_prim_id$coefficients,
#   alpha <- 0, # Independent working correlation
#   phi <- 1,
#   id <- data_gee_prim$cluster_id,
#   link <- "identity"
#   )

# No custom formula for gaussian distribution, use geesmv package as advised by Xueqi
fit_PoisBCV_prim_id <- GEE.var.kc(as.formula(paste0("outcome_prim ~ ", min_adjusted)),
                                  id = "cluster_id", 
                                  data = data_gee_prim,
                                  family = gaussian, corstr = "independence")

fit_geepack_prim_id_smv <- fit_geepack_prim_id

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_prim_id_smv$geese$vbeta) <- fit_PoisBCV_prim_id$cov.beta

# Fully adjusted (doesn't converge)
# fit_PoisBCV_prim_fully_id <- PoisBCV(
#   y = data_gee_prim_fully$outcome_prim,
#   X <- model.matrix(as.formula(paste0("~ ", fully_adjusted)), data_gee_prim_fully),
#   beta <- fit_geepack_prim_fully_id$coefficients,
#   alpha <- 0, # Independent working correlation
#   phi <- 1,
#   id <- data_gee_prim_fully$cluster_id,
#   link <- "identity"
#   )

# No custom formula for gaussian distribution, use geesmv package as advised by Xueqi
fit_PoisBCV_prim_fully_id <- GEE.var.kc(as.formula(paste0("outcome_prim ~ ", fully_adjusted)),
                                        id = "cluster_id", 
                                        data = data_gee_prim_fully,
                                        family = gaussian, corstr = "independence")

fit_geepack_prim_fully_id_smv <- fit_geepack_prim_fully_id

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_prim_fully_id_smv$geese$vbeta) <- fit_PoisBCV_prim_fully_id$cov.beta


```


\newpage

## Primary Outcome - ACT purchased by test positive

```{r GEE-Prim-kc-out, cache = FALSE, ft.keepnext = FALSE}

# Create table for each model
risk_ratio_min <- fit_geepack_prim_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE)

risk_diff_min <- fit_geepack_prim_id_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rd_p6)

risk_ratio_fully <- fit_geepack_prim_fully_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) 

risk_diff_fully <- fit_geepack_prim_fully_id_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rd_p6)

# Combine tables
tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

ft_prim <- tbl_merge(tbls = list(tbl_ratio, tbl_diff),
                     tab_spanner = c("**Risk Ratio**", "**Risk Difference**")) %>%
  modify_header(
    estimate_1_1 = "**Minimally Adjusted RR** **(95% CI)**",
    estimate_2_1 = "**Fully Adjusted RR** **(95% CI)**",
    estimate_1_2 = "**Minimally Adjusted RD** **(95% CI)**",
    estimate_2_2 = "**Fully Adjusted RD** **(95% CI)**"
    ) %>%
  modify_footnote(
    update = c(estimate_1_1, estimate_2_1, estimate_1_2, estimate_2_2) ~ 
      "RR = Risk Ratio, RD = Risk Difference, CI = Confidence Interval",
    abbreviation = TRUE
  ) %>% 
  as_flex_table() %>%
  flextable::set_caption(
    as_paragraph(
      as_chunk(paste0('GEE analysis with KC Small Sample Variance Correction, \nN (minimally adjusted) = ',
                      nrow(data_gee_prim),
                      ', N (fully adjusted) = ',
                      nrow(data_gee_prim_fully)), 
               props = fp_text_default(
                 font.family = "Cambria",
                 font.size = 12,
                 italic = TRUE))
    ),
    autonum = officer::run_autonum(seq_id = "tab",
                                   bkm = "PrimGEEkc")) %>%
  bold(bold = TRUE, part = "header") %>%
  autofit() %>%
  set_table_properties(layout = "autofit") %>%
  font(fontname = "Calibri", part = "all") %>%
  fontsize(size = 10, part = "all")

# ft_prim$body$content$content$data[, 2:5] <- lapply(ft_prim$body$content$content$data[, 2:5],
#                                              function(x) {mutate(x, txt = str_replace(
#                                                txt, "\\(", "\n("))})

ft_prim %>% theme_zebra() %>% align(j = -1, align = "center", part = "all")


```


```{r GEE-Prim-subset, cache = FALSE}

# Create table for each model
risk_ratio_min <- fit_geepack_prim_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Primary outcome - ACTs sold to malaria test-positive clients",
                 tidy_fun = my_tidy_rr_p6,
                 exponentiate = TRUE)

risk_diff_min <- fit_geepack_prim_id_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Primary outcome - ACTs sold to malaria test-positive clients",
                 tidy_fun = my_tidy_rd_p6)

risk_ratio_fully <- fit_geepack_prim_fully_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Primary outcome - ACTs sold to malaria test-positive clients",
                 tidy_fun = my_tidy_rr_p6,
                 exponentiate = TRUE) 

risk_diff_fully <- fit_geepack_prim_fully_id_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Primary outcome - ACTs sold to malaria test-positive clients",
                 tidy_fun = my_tidy_rd_p6)

# Combine tables
tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

gt_prim <- tbl_merge(tbls = list(tbl_ratio, tbl_diff),
                     tab_spanner = c("**Risk Ratio**", "**Risk Difference**"))
```

\newpage

## Secondary Outcome 1 - Use of mRDT

```{r GEE-Sec-1, results = 'hide', cache = FALSE}

# Get n/N (%) for each arm
prop_sec_tested <- data3 %>% 
  select(outcome_sec_tested, arm) %>%
  na.omit() %>%
  group_by(arm) %>%
  summarize(n = sum(outcome_sec_tested),
            N = n()) %>%
  mutate(prop_num = n / N) %>%
  mutate(prop = paste0(n,
                       "/",
                       N,
                       " (",
                       signif(100 * prop_num, digits = 2), "%)"))

# Create datasets with no missing values in outcome variable or covariates
data_gee_sec_tested <- data_gee %>%
  dplyr::select(outcome_sec_tested, cluster_id, all_of(min_adjusted_vector)) %>%
  drop_na() %>%
  mutate(arm = dplyr::recode(arm,
                             "CDPD" = paste("CDPD, ", prop_sec_tested[prop_sec_tested$arm == "CDPD", ]$prop), 
                             "Control" = paste("Control, ", prop_sec_tested[prop_sec_tested$arm == "Control", ]$prop), 
                             "CD" = paste("CD, ", prop_sec_tested[prop_sec_tested$arm == "CD", ]$prop))) %>%
  set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>%
  as.data.frame()

data_gee_sec_tested_fully <- data_gee %>%
  dplyr::select(outcome_sec_tested, cluster_id, all_of(fully_adjusted_2_vector)) %>%
  drop_na() %>%
  mutate(arm = dplyr::recode(arm,
                             "CDPD" = paste("CDPD, ", prop_sec_tested[prop_sec_tested$arm == "CDPD", ]$prop), 
                             "Control" = paste("Control, ", prop_sec_tested[prop_sec_tested$arm == "Control", ]$prop), 
                             "CD" = paste("CD, ", prop_sec_tested[prop_sec_tested$arm == "CD", ]$prop))) %>%
  set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>%
  as.data.frame()


# Log link
# Minimally adjusted
fit_geepack_sec_tested <- geeglm(as.formula(paste0("outcome_sec_tested ~ ", min_adjusted)),
                           id = cluster_id, 
                           data = data_gee_sec_tested,
                           family = poisson,
                           corstr = "independence")

# summary(fit_geepack_sec_tested)

# Fully adjusted
fit_geepack_sec_tested_fully <- geeglm(as.formula(paste0("outcome_sec_tested ~ ", fully_adjusted_2)),
                           id = cluster_id, 
                           data = data_gee_sec_tested_fully,
                           family = poisson,
                           corstr = "independence")

# Identity link

# Minimally adjusted
fit_geepack_sec_tested_id <- geeglm(as.formula(paste0("outcome_sec_tested ~ ", min_adjusted)),
                           id = cluster_id, 
                           data = data_gee_sec_tested,
                           family = poisson(link = "identity"),
                           corstr = "independence")

# Fully adjusted (doesn't converge with wealth and school_2 having >2 categories)
# fit_geepack_sec_tested_fully_id <- geeglm(as.formula(paste0("outcome_sec_tested ~ ", fully_adjusted)),
#                            id = cluster_id,
#                            data = data_gee_sec_tested_fully,
#                            family = poisson(link = "identity"),
#                            corstr = "independence")

# fit_geepack_sec_tested_fully_id <- geeglm(as.formula(paste0("outcome_sec_tested ~ ", fully_adjusted_2)),
#                            id = cluster_id,
#                            data = data_gee_sec_tested_fully,
#                            family = poisson(link = "identity"),
#                            corstr = "independence")

fit_geepack_sec_tested_fully_id <- geeglm(as.formula(paste0("outcome_sec_tested ~ ", fully_adjusted_2)),
                           id = cluster_id,
                           data = data_gee_sec_tested_fully,
                           family = binomial(link = "identity"),
                           corstr = "independence")


```



```{r GEE-Sec-1-kc, cache = FALSE, results = 'hide'}

# Small sample corrections

# Log link

# Minimally adjusted
fit_PoisBCV_sec_tested <- PoisBCV(
  y = data_gee_sec_tested$outcome_sec_tested,
  X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_tested),
  beta <- fit_geepack_sec_tested_id$coefficients,
  alpha <- 0, # Independent working correlation
  phi <- 1,
  id <- data_gee_sec_tested$cluster_id,
  link <- "log"
  )

fit_geepack_sec_tested_smv <- fit_geepack_sec_tested

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_sec_tested_smv$geese$vbeta) <- (fit_PoisBCV_sec_tested$outbeta[, "BC1-stderr"])^2

# Fully adjusted
# fit_kc_sec_tested_fully <- GEE.var.kc(as.formula(paste0("outcome_sec_tested ~ ", fully_adjusted)),
#                                 id = "cluster_id", 
#                                 data = data_gee_sec_tested_fully,
#                                 family = poisson, corstr = "independence")

fit_PoisBCV_sec_tested_fully <- PoisBCV(
  y = data_gee_sec_tested_fully$outcome_sec_tested,
  X <- model.matrix(as.formula(paste0("~ ", fully_adjusted_2)), data_gee_sec_tested_fully),
  beta <- fit_geepack_sec_tested_fully_id$coefficients,
  alpha <- 0, # Independent working correlation
  phi <- 1,
  id <- data_gee_sec_tested_fully$cluster_id,
  link <- "log"
  )

fit_geepack_sec_tested_fully_smv <- fit_geepack_sec_tested_fully

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_sec_tested_fully_smv$geese$vbeta) <- (fit_PoisBCV_sec_tested_fully$outbeta[, "BC1-stderr"])^2

# Identity link 
# Minimally adjusted
fit_PoisBCV_sec_tested_id <- PoisBCV(
  y = data_gee_sec_tested$outcome_sec_tested,
  X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_tested),
  beta <- fit_geepack_sec_tested_id$coefficients,
  alpha <- 0, # Independent working correlation
  phi <- 1,
  id <- data_gee_sec_tested$cluster_id,
  link <- "identity"
  )

fit_geepack_sec_tested_id_smv <- fit_geepack_sec_tested_id

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_sec_tested_id_smv$geese$vbeta) <- (fit_PoisBCV_sec_tested_id$outbeta[, "BC1-stderr"])^2

# # Fully adjusted
# fit_PoisBCV_sec_tested_fully_id <- PoisBCV(
#   y = data_gee_sec_tested_fully$outcome_sec_tested,
#   X <- model.matrix(as.formula(paste0("~ ", fully_adjusted)), data_gee_sec_tested_fully),
#   beta <- fit_geepack_sec_tested_fully_id$coefficients,
#   alpha <- 0, # Independent working correlation
#   phi <- 1,
#   id <- data_gee_sec_tested_fully$cluster_id,
#   link <- "identity"
#   )
# 
# fit_geepack_sec_tested_fully_id_smv <- fit_geepack_sec_tested_fully_id
# 
# # Replace robust variances with small sample corrected variances (KC)
# diag(fit_geepack_sec_tested_fully_id_smv$geese$vbeta) <- (fit_PoisBCV_sec_tested_fully_id$outbeta[, "BC1-stderr"])^2

# Fully adjusted (use binomial family with identity link because Poisson with identity did not converge)
fit_BinBCV_sec_tested_fully_id <- BinBCV(
  y = data_gee_sec_tested_fully$outcome_sec_tested,
  X <- model.matrix(as.formula(paste0("~ ", fully_adjusted_2)), data_gee_sec_tested_fully),
  beta <- fit_geepack_sec_tested_fully_id$coefficients,
  alpha <- 0, # Independent working correlation
  phi <- 1,
  id <- data_gee_sec_tested_fully$cluster_id,
  link <- "identity"
  )

fit_geepack_sec_tested_fully_id_smv <- fit_geepack_sec_tested_fully_id

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_sec_tested_fully_id_smv$geese$vbeta) <- (fit_BinBCV_sec_tested_fully_id$outbeta[, "BC1-stderr"])^2


```


```{r GEE-Sec-1-kc-out, cache = FALSE, ft.keepnext = FALSE}

risk_ratio_min <- fit_geepack_sec_tested_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE)

risk_diff_min <- fit_geepack_sec_tested_id_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rd_p6)

risk_ratio_fully <- fit_geepack_sec_tested_fully_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) 

risk_diff_fully <- fit_geepack_sec_tested_fully_id_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rd_p6)

tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

ft_prim <- tbl_merge(tbls = list(tbl_ratio, tbl_diff),
                     tab_spanner = c("**Risk Ratio**", "**Risk Difference**")) %>%
  modify_header(
    estimate_1_1 = "**Minimally Adjusted RR** **(95% CI)**",
    estimate_2_1 = "**Fully Adjusted RR** **(95% CI)**",
    estimate_1_2 = "**Minimally Adjusted RD** **(95% CI)**",
    estimate_2_2 = "**Fully Adjusted RD** **(95% CI)**"
    ) %>%
  modify_footnote(
    update = c(estimate_1_1, estimate_2_1, estimate_1_2, estimate_2_2) ~ 
      "RR = Risk Ratio, RD = Risk Difference, CI = Confidence Interval",
    abbreviation = TRUE
  ) %>%
  as_flex_table() %>%
  flextable::set_caption(
    as_paragraph(
      as_chunk(paste0('GEE analysis with KC Small Sample Variance Correction, \nN (minimally adjusted) = ',
                      nrow(data_gee_sec_tested),
                      ', N (fully adjusted) = ',
                      nrow(data_gee_sec_tested_fully)), 
               props = fp_text_default(
                 font.family = "Cambria",
                 font.size = 12,
                 italic = TRUE))
    ),
    autonum = officer::run_autonum(seq_id = "tab",
                                   bkm = "PrimGEEsec1")) %>%
  bold(bold = TRUE, part = "header") %>%
  autofit() %>%
  set_table_properties(layout = "autofit") %>%
  font(fontname = "Calibri", part = "all") %>%
  fontsize(size = 10, part = "all")

# ft_prim$body$content$content$data[, 2:5] <- lapply(ft_prim$body$content$content$data[, 2:5],
#                                              function(x) {mutate(x, txt = str_replace(
#                                                txt, "\\(", "\n("))})

ft_prim %>% theme_zebra() %>% align(j = -1, align = "center", part = "all")


```


```{r GEE-Sec-1-subset, cache = FALSE}

risk_ratio_min <- fit_geepack_sec_tested_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Suspected malaria cases that receive a mRDT", 
                 tidy_fun = my_tidy_rr_p6,
                 exponentiate = TRUE)

risk_diff_min <- fit_geepack_sec_tested_id_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Suspected malaria cases that receive a mRDT",
                 tidy_fun = my_tidy_rd_p6)

risk_ratio_fully <- fit_geepack_sec_tested_fully_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Suspected malaria cases that receive a mRDT",
                 tidy_fun = my_tidy_rr_p6,
                 exponentiate = TRUE) 

risk_diff_fully <- fit_geepack_sec_tested_fully_id_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Suspected malaria cases that receive a mRDT",
                 tidy_fun = my_tidy_rd_p6)

tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

gt_sec1 <- tbl_merge(tbls = list(tbl_ratio, tbl_diff),
                     tab_spanner = c("**Risk Ratio**", "**Risk Difference**"))


```

\newpage

## Secondary Outcome 2 - Adherence to mRDT result

```{r GEE-Sec-2, results = 'hide'}

# Get n/N (%) for each arm
prop_sec_adherence <- data3 %>% 
  select(outcome_sec_adherence, arm) %>%
  na.omit() %>%
  group_by(arm) %>%
  summarize(n = sum(outcome_sec_adherence),
            N = n()) %>%
  mutate(prop_num = n / N) %>%
  mutate(prop = paste0(n,
                       "/",
                       N,
                       " (",
                       signif(100 * prop_num, digits = 2), "%)"))

# Create datasets with no missing values in outcome variable or covariates
data_gee_sec_adherence <- data_gee %>%
  dplyr::select(outcome_sec_adherence, cluster_id, all_of(min_adjusted_vector)) %>%
  drop_na() %>%
  mutate(arm = dplyr::recode(arm,
                             "CDPD" = paste("CDPD, ", prop_sec_adherence[prop_sec_adherence$arm == "CDPD", ]$prop), 
                             "Control" = paste("Control, ", prop_sec_adherence[prop_sec_adherence$arm == "Control", ]$prop), 
                             "CD" = paste("CD, ", prop_sec_adherence[prop_sec_adherence$arm == "CD", ]$prop))) %>%
  set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>%
  as.data.frame()

data_gee_sec_adherence_fully <- data_gee %>%
  dplyr::select(outcome_sec_adherence, cluster_id, all_of(fully_adjusted_vector)) %>%
  drop_na() %>%
  mutate(arm = dplyr::recode(arm,
                             "CDPD" = paste("CDPD, ", prop_sec_adherence[prop_sec_adherence$arm == "CDPD", ]$prop), 
                             "Control" = paste("Control, ", prop_sec_adherence[prop_sec_adherence$arm == "Control", ]$prop), 
                             "CD" = paste("CD, ", prop_sec_adherence[prop_sec_adherence$arm == "CD", ]$prop))) %>%
  set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>%
  as.data.frame()


# Log link

# Minimally adjusted
fit_geepack_sec_adherence <- geeglm(as.formula(paste0("outcome_sec_adherence ~ ", min_adjusted)),
                           id = cluster_id, 
                           data = data_gee_sec_adherence,
                           family = poisson,
                           corstr = "independence")

# Fully adjusted
fit_geepack_sec_adherence_fully <- geeglm(as.formula(paste0("outcome_sec_adherence ~ ", fully_adjusted)),
                           id = cluster_id, 
                           data = data_gee_sec_adherence_fully,
                           family = poisson,
                           corstr = "independence")


# Identity link

# Minimally adjusted
fit_geepack_sec_adherence_id <- geeglm(as.formula(paste0("outcome_sec_adherence ~ ", min_adjusted)),
                           id = cluster_id, 
                           data = data_gee_sec_adherence,
                           family = poisson(link = "identity"),
                           corstr = "independence")

# Fully adjusted (Does not converge)
# fit_geepack_sec_adherence_fully_id <- geeglm(as.formula(paste0("outcome_sec_adherence ~ ", fully_adjusted)),
#                            id = cluster_id, 
#                            data = data_gee_sec_adherence_fully,
#                            family = poisson(link = "identity"),
#                            corstr = "independence")

# fit_geepack_sec_adherence_fully_id <- geeglm(as.formula(paste0("outcome_sec_adherence ~ ", fully_adjusted)),
#                            id = cluster_id, 
#                            data = data_gee_sec_adherence_fully,
#                            family = binomial(link = "identity"),
#                            corstr = "independence")

fit_geepack_sec_adherence_fully_id <- geeglm(as.formula(paste0("outcome_sec_adherence ~ ", fully_adjusted)),
                           id = cluster_id, 
                           data = data_gee_sec_adherence_fully,
                           family = gaussian(link = "identity"),
                           corstr = "independence")


```



```{r GEE-Sec-2-kc, cache = FALSE, results = 'hide'}

# Small sample corrections

# Log link

# Minimally adjusted
# fit_kc_sec_adherence <- GEE.var.kc(as.formula(paste0("outcome_sec_adherence ~ ", min_adjusted)),
#                           id = "cluster_id", 
#                           data = data_gee_sec_adherence,
#                           family = poisson, corstr = "independence")

fit_PoisBCV_sec_adherence <- PoisBCV(
  y = data_gee_sec_adherence$outcome_sec_adherence,
  X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_adherence),
  beta <- fit_geepack_sec_adherence_id$coefficients,
  alpha <- 0, # Independent working correlation
  phi <- 1,
  id <- data_gee_sec_adherence$cluster_id,
  link <- "log"
  )

fit_geepack_sec_adherence_smv <- fit_geepack_sec_adherence

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_sec_adherence_smv$geese$vbeta) <- (fit_PoisBCV_sec_adherence$outbeta[, "BC1-stderr"])^2

# Fully adjusted
# fit_kc_sec_adherence_fully <- GEE.var.kc(as.formula(paste0("outcome_sec_adherence ~ ", fully_adjusted)),
#                                 id = "cluster_id", 
#                                 data = data_gee_sec_adherence_fully,
#                                 family = poisson, corstr = "independence")

fit_PoisBCV_sec_adherence_fully <- PoisBCV(
  y = data_gee_sec_adherence_fully$outcome_sec_adherence,
  X <- model.matrix(as.formula(paste0("~ ", fully_adjusted)), data_gee_sec_adherence_fully),
  beta <- fit_geepack_sec_adherence_fully_id$coefficients,
  alpha <- 0, # Independent working correlation
  phi <- 1,
  id <- data_gee_sec_adherence_fully$cluster_id,
  link <- "log"
  )

fit_geepack_sec_adherence_fully_smv <- fit_geepack_sec_adherence_fully

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_sec_adherence_fully_smv$geese$vbeta) <- (fit_PoisBCV_sec_adherence_fully$outbeta[, "BC1-stderr"])^2

# Identity link 

# Minimally adjusted
fit_PoisBCV_sec_adherence_id <- PoisBCV(
  y = data_gee_sec_adherence$outcome_sec_adherence,
  X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_adherence),
  beta <- fit_geepack_sec_adherence_id$coefficients,
  alpha <- 0, # Independent working correlation
  phi <- 1,
  id <- data_gee_sec_adherence$cluster_id,
  link <- "identity"
  )

fit_geepack_sec_adherence_id_smv <- fit_geepack_sec_adherence_id

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_sec_adherence_id_smv$geese$vbeta) <- (fit_PoisBCV_sec_adherence_id$outbeta[, "BC1-stderr"])^2

# Fully adjusted (doesn't converge)
# fit_PoisBCV_sec_adherence_fully_id <- PoisBCV(
#   y = data_gee_sec_adherence_fully$outcome_sec_adherence,
#   X <- model.matrix(as.formula(paste0("~ ", fully_adjusted)), data_gee_sec_adherence_fully),
#   beta <- fit_geepack_sec_adherence_fully_id$coefficients,
#   alpha <- 0, # Independent working correlation
#   phi <- 1,
#   id <- data_gee_sec_adherence_fully$cluster_id,
#   link <- "identity"
#   )

# No custom formula for gaussian distribution, use geesmv package as advised by Xueqi
fit_PoisBCV_sec_adherence_fully_id <- GEE.var.kc(as.formula(paste0("outcome_sec_adherence ~ ",
                                                                   fully_adjusted)),
                                                 id = "cluster_id", 
                                                 data = data_gee_sec_adherence_fully,
                                                 family = gaussian, corstr = "independence")

fit_geepack_sec_adherence_fully_id_smv <- fit_geepack_sec_adherence_fully_id

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_sec_adherence_fully_id_smv$geese$vbeta) <- fit_PoisBCV_sec_adherence_fully_id$cov.beta


```


```{r GEE-Sec-2-kc-out, cache = FALSE, ft.keepnext = FALSE}

# Create table for each model
risk_ratio_min <- fit_geepack_sec_adherence_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE)

risk_diff_min <- fit_geepack_sec_adherence_id_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rd_p6)

risk_ratio_fully <- fit_geepack_sec_adherence_fully_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) 

risk_diff_fully <- fit_geepack_sec_adherence_fully_id_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rd_p6)

# Combine tables
tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

ft_prim <- tbl_merge(tbls = list(tbl_ratio, tbl_diff),
                     tab_spanner = c("**Risk Ratio**", "**Risk Difference**")) %>%
  modify_header(
    estimate_1_1 = "**Minimally Adjusted RR** **(95% CI)**",
    estimate_2_1 = "**Fully Adjusted RR** **(95% CI)**",
    estimate_1_2 = "**Minimally Adjusted RD** **(95% CI)**",
    estimate_2_2 = "**Fully Adjusted RD** **(95% CI)**"
    ) %>%
  modify_footnote(
    update = c(estimate_1_1, estimate_2_1, estimate_1_2, estimate_2_2) ~ 
      "RR = Risk Ratio, RD = Risk Difference, CI = Confidence Interval",
    abbreviation = TRUE
  ) %>%
  as_flex_table() %>%
  flextable::set_caption(
    as_paragraph(
      as_chunk(paste0('GEE analysis with KC Small Sample Variance Correction, \nN (minimally adjusted) = ',
                      nrow(data_gee_sec_adherence),
                      ', N (fully adjusted) = ',
                      nrow(data_gee_sec_adherence_fully)), 
               props = fp_text_default(
                 font.family = "Cambria",
                 font.size = 12,
                 italic = TRUE))
    ),
    autonum = officer::run_autonum(seq_id = "tab",
                                   bkm = "PrimGEEsec2")) %>%
  bold(bold = TRUE, part = "header") %>%
  autofit() %>%
  set_table_properties(layout = "autofit") %>%
  font(fontname = "Calibri", part = "all") %>%
  fontsize(size = 10, part = "all")

# ft_prim$body$content$content$data[, 2:5] <- lapply(ft_prim$body$content$content$data[, 2:5],
#                                              function(x) {mutate(x, txt = str_replace(
#                                                txt, "\\(", "\n("))})

ft_prim %>% theme_zebra() %>% align(j = -1, align = "center", part = "all")


```


```{r GEE-Sec-2-subset, cache = FALSE}

# Create table for each model
risk_ratio_min <- fit_geepack_sec_adherence_smv %>% 
  tbl_regression(include = arm,
                 label = arm ~ "Malaria tested clients whose treatment adhered to test results",
                 tidy_fun = my_tidy_rr_p6,
                 exponentiate = TRUE)

risk_diff_min <- fit_geepack_sec_adherence_id_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Malaria tested clients whose treatment adhered to test results",
                 tidy_fun = my_tidy_rd_p6)

risk_ratio_fully <- fit_geepack_sec_adherence_fully_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Malaria tested clients whose treatment adhered to test results",
                 tidy_fun = my_tidy_rr_p6,
                 exponentiate = TRUE) 

risk_diff_fully <- fit_geepack_sec_adherence_fully_id_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Malaria tested clients whose treatment adhered to test results",
                 tidy_fun = my_tidy_rd_p6)

# Combine tables
tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

gt_sec2 <- tbl_merge(tbls = list(tbl_ratio, tbl_diff),
                     tab_spanner = c("**Risk Ratio**", "**Risk Difference**")) 

```


\newpage


## Secondary Outcome 3 - Appropriate case management

```{r GEE-Sec-3, results = 'hide'}

# Get n/N (%) for each arm
prop_sec_appropriate <- data3 %>% 
  select(outcome_sec_appropriate, arm) %>%
  na.omit() %>%
  group_by(arm) %>%
  summarize(n = sum(outcome_sec_appropriate),
            N = n()) %>%
  mutate(prop_num = n / N) %>%
  mutate(prop = paste0(n,
                       "/",
                       N,
                       " (",
                       signif(100 * prop_num, digits = 2), "%)"))

# Create datasets with no missing values in outcome variable or covariates
data_gee_sec_appropriate <- data_gee %>%
  dplyr::select(outcome_sec_appropriate, cluster_id, all_of(min_adjusted_vector)) %>%
  drop_na() %>%
  mutate(arm = dplyr::recode(arm,
                             "CDPD" = paste("CDPD, ", prop_sec_appropriate[prop_sec_appropriate$arm == "CDPD", ]$prop), 
                             "Control" = paste("Control, ", prop_sec_appropriate[prop_sec_appropriate$arm == "Control", ]$prop), 
                             "CD" = paste("CD, ", prop_sec_appropriate[prop_sec_appropriate$arm == "CD", ]$prop))) %>%
  set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>%
  as.data.frame()

data_gee_sec_appropriate_fully <- data_gee %>%
  dplyr::select(outcome_sec_appropriate, cluster_id, all_of(fully_adjusted_vector)) %>%
  drop_na() %>%
  mutate(arm = dplyr::recode(arm,
                             "CDPD" = paste("CDPD, ", prop_sec_appropriate[prop_sec_appropriate$arm == "CDPD", ]$prop), 
                             "Control" = paste("Control, ", prop_sec_appropriate[prop_sec_appropriate$arm == "Control", ]$prop), 
                             "CD" = paste("CD, ", prop_sec_appropriate[prop_sec_appropriate$arm == "CD", ]$prop))) %>%
  set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>%
  as.data.frame()


# Log link

# Minimally adjusted
fit_geepack_sec_appropriate <- geeglm(as.formula(paste0("outcome_sec_appropriate ~ ", min_adjusted)),
                           id = cluster_id, 
                           data = data_gee_sec_appropriate,
                           family = poisson,
                           corstr = "independence")

# Fully adjusted
fit_geepack_sec_appropriate_fully <- geeglm(as.formula(paste0("outcome_sec_appropriate ~ ", fully_adjusted)),
                           id = cluster_id, 
                           data = data_gee_sec_appropriate_fully,
                           family = poisson,
                           corstr = "independence")


# Identity link

# Minimally adjusted (doesn't converge with Poisson family)
fit_geepack_sec_appropriate_id <- geeglm(as.formula(paste0("outcome_sec_appropriate ~ ", min_adjusted)),
                           id = cluster_id, 
                           data = data_gee_sec_appropriate,
                           family = binomial(link = "identity"),
                           corstr = "independence")

# Fully adjusted (doesn't converge with Poisson or binomial family)
fit_geepack_sec_appropriate_fully_id <- geeglm(as.formula(paste0("outcome_sec_appropriate ~ ", fully_adjusted)),
                           id = cluster_id,
                           data = data_gee_sec_appropriate_fully,
                           family = gaussian(link = "identity"),
                           corstr = "independence")




```



```{r GEE-Sec-3-kc, cache = FALSE, results = 'hide'}

# Small sample corrections

# Log link

# Minimally adjusted
fit_PoisBCV_sec_appropriate <- PoisBCV(
  y = data_gee_sec_appropriate$outcome_sec_appropriate,
  X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_appropriate),
  beta <- fit_geepack_sec_appropriate_id$coefficients,
  alpha <- 0, # Independent working correlation
  phi <- 1,
  id <- data_gee_sec_appropriate$cluster_id,
  link <- "log"
  )

fit_geepack_sec_appropriate_smv <- fit_geepack_sec_appropriate

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_sec_appropriate_smv$geese$vbeta) <- (fit_PoisBCV_sec_appropriate$outbeta[, "BC1-stderr"])^2

# Fully adjusted
# fit_kc_sec_appropriate_fully <- GEE.var.kc(as.formula(paste0("outcome_sec_appropriate ~ ", fully_adjusted)),
#                                 id = "cluster_id", 
#                                 data = data_gee_sec_appropriate_fully,
#                                 family = poisson, corstr = "independence")

fit_PoisBCV_sec_appropriate_fully <- PoisBCV(
  y = data_gee_sec_appropriate_fully$outcome_sec_appropriate,
  X <- model.matrix(as.formula(paste0("~ ", fully_adjusted)), data_gee_sec_appropriate_fully),
  beta <- fit_geepack_sec_appropriate_fully_id$coefficients,
  alpha <- 0, # Independent working correlation
  phi <- 1,
  id <- data_gee_sec_appropriate_fully$cluster_id,
  link <- "log"
  )

fit_geepack_sec_appropriate_fully_smv <- fit_geepack_sec_appropriate_fully

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_sec_appropriate_fully_smv$geese$vbeta) <- (fit_PoisBCV_sec_appropriate_fully$outbeta[, "BC1-stderr"])^2

# Identity link 

# Minimally adjusted (doesn't converge)
# fit_PoisBCV_sec_appropriate_id <- PoisBCV(
#   y = data_gee_sec_appropriate$outcome_sec_appropriate,
#   X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_appropriate),
#   beta <- fit_geepack_sec_appropriate_id$coefficients,
#   alpha <- 0, # Independent working correlation
#   phi <- 1,
#   id <- data_gee_sec_appropriate$cluster_id,
#   link <- "identity"
#   )

fit_BinBCV_sec_appropriate_id <- BinBCV(
  y = data_gee_sec_appropriate$outcome_sec_appropriate,
  X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_appropriate),
  beta <- fit_geepack_sec_appropriate_id$coefficients,
  alpha <- 0, # Independent working correlation
  phi <- 1,
  id <- data_gee_sec_appropriate$cluster_id,
  link <- "identity"
  )

fit_geepack_sec_appropriate_id_smv <- fit_geepack_sec_appropriate_id

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_sec_appropriate_id_smv$geese$vbeta) <- (fit_BinBCV_sec_appropriate_id$outbeta[, "BC1-stderr"])^2

# Fully adjusted (doesn't converge)
# fit_BinBCV_sec_appropriate_fully_id <- BinBCV(
#   y = data_gee_sec_appropriate_fully$outcome_sec_appropriate,
#   X <- model.matrix(as.formula(paste0("~ ", fully_adjusted)), data_gee_sec_appropriate_fully),
#   beta <- fit_geepack_sec_appropriate_fully_id$coefficients,
#   alpha <- 0, # Independent working correlation
#   phi <- 1,
#   id <- data_gee_sec_appropriate_fully$cluster_id,
#   link <- "identity"
#   )

# No custom formula for gaussian distribution, use geesmv package as advised by Xueqi
fit_NormalBCV_sec_appropriate_fully_id <- GEE.var.kc(as.formula(paste0("outcome_sec_appropriate ~ ",
                                                                   fully_adjusted)),
                                                  id = "cluster_id", 
                                                  data = data_gee_sec_appropriate_fully,
                                                  family = gaussian, corstr = "independence")

fit_geepack_sec_appropriate_fully_id_smv <- fit_geepack_sec_appropriate_fully_id

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_sec_appropriate_fully_id_smv$geese$vbeta) <- fit_NormalBCV_sec_appropriate_fully_id$cov.beta


```


```{r GEE-Sec-3-kc-out, cache = FALSE, ft.keepnext = FALSE}

# Create table for each model
risk_ratio_min <- fit_geepack_sec_appropriate_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE)

risk_diff_min <- fit_geepack_sec_appropriate_id_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rd_p6)

risk_ratio_fully <- fit_geepack_sec_appropriate_fully_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) 

risk_diff_fully <- fit_geepack_sec_appropriate_fully_id_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rd_p6)

# Combine tables
tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

ft_prim <- tbl_merge(tbls = list(tbl_ratio, tbl_diff),
                     tab_spanner = c("**Risk Ratio**", "**Risk Difference**")) %>%
  modify_header(
    estimate_1_1 = "**Minimally Adjusted RR** **(95% CI)**",
    estimate_2_1 = "**Fully Adjusted RR** **(95% CI)**",
    estimate_1_2 = "**Minimally Adjusted RD** **(95% CI)**",
    estimate_2_2 = "**Fully Adjusted RD** **(95% CI)**"
    ) %>%
  modify_footnote(
    update = c(estimate_1_1, estimate_2_1, estimate_1_2, estimate_2_2) ~ 
      "RR = Risk Ratio, RD = Risk Difference, CI = Confidence Interval",
    abbreviation = TRUE
  ) %>%
  as_flex_table() %>%
  flextable::set_caption(
    as_paragraph(
      as_chunk(paste0('GEE analysis with KC Small Sample Variance Correction, \nN (minimally adjusted) = ',
                      nrow(data_gee_sec_appropriate),
                      ', N (fully adjusted) = ',
                      nrow(data_gee_sec_appropriate_fully)), 
               props = fp_text_default(
                 font.family = "Cambria",
                 font.size = 12,
                 italic = TRUE))
    ),
    autonum = officer::run_autonum(seq_id = "tab", 
                                   bkm = "PrimGEEsec3")) %>%
  bold(bold = TRUE, part = "header") %>%
  autofit() %>%
  set_table_properties(layout = "autofit") %>%
  font(fontname = "Calibri", part = "all") %>%
  fontsize(size = 10, part = "all")

# ft_prim$body$content$content$data[, 2:5] <- lapply(ft_prim$body$content$content$data[, 2:5],
#                                              function(x) {mutate(x, txt = str_replace(
#                                                txt, "\\(", "\n("))})

ft_prim %>% theme_zebra() %>% align(j = -1, align = "center", part = "all")


```


```{r GEE-Sec-3-subset, cache = FALSE}

# Create table for each model
risk_ratio_min <- fit_geepack_sec_appropriate_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Suspected malaria cases that are managed appropriately", 
                 tidy_fun = my_tidy_rr_p6,
                 exponentiate = TRUE)

risk_diff_min <- fit_geepack_sec_appropriate_id_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Suspected malaria cases that are managed appropriately",
                 tidy_fun = my_tidy_rd_p6)

risk_ratio_fully <- fit_geepack_sec_appropriate_fully_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Suspected malaria cases that are managed appropriately",
                 tidy_fun = my_tidy_rr_p6,
                 exponentiate = TRUE) 

risk_diff_fully <- fit_geepack_sec_appropriate_fully_id_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Suspected malaria cases that are managed appropriately",
                 tidy_fun = my_tidy_rd_p6)

# Combine tables
tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

gt_sec3 <- tbl_merge(tbls = list(tbl_ratio, tbl_diff),
                     tab_spanner = c("**Risk Ratio**", "**Risk Difference**")) 

```

\newpage

## Secondary Outcome 4 - ACT use among untested

```{r GEE-Sec-4, results = 'hide'}

# Get n/N (%) for each arm
prop_sec_ACT_untested <- data3 %>% 
  select(outcome_sec_ACT_untested, arm) %>%
  na.omit() %>%
  group_by(arm) %>%
  summarize(n = sum(outcome_sec_ACT_untested),
            N = n()) %>%
  mutate(prop_num = n / N) %>%
  mutate(prop = paste0(n,
                       "/",
                       N,
                       " (",
                       signif(100 * prop_num, digits = 2), "%)"))

# Create datasets with no missing values in outcome variable or covariates
data_gee_sec_ACT_untested <- data_gee %>%
  dplyr::select(outcome_sec_ACT_untested, cluster_id, all_of(min_adjusted_vector)) %>%
  drop_na() %>%
  mutate(arm = dplyr::recode(arm,
                             "CDPD" = paste("CDPD, ", prop_sec_ACT_untested[prop_sec_ACT_untested$arm == "CDPD", ]$prop), 
                             "Control" = paste("Control, ", prop_sec_ACT_untested[prop_sec_ACT_untested$arm == "Control", ]$prop), 
                             "CD" = paste("CD, ", prop_sec_ACT_untested[prop_sec_ACT_untested$arm == "CD", ]$prop))) %>%
  set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>%
  as.data.frame()

data_gee_sec_ACT_untested_fully <- data_gee %>%
  dplyr::select(outcome_sec_ACT_untested, cluster_id, all_of(fully_adjusted_vector)) %>%
  drop_na() %>%
  mutate(arm = dplyr::recode(arm,
                             "CDPD" = paste("CDPD, ", prop_sec_ACT_untested[prop_sec_ACT_untested$arm == "CDPD", ]$prop), 
                             "Control" = paste("Control, ", prop_sec_ACT_untested[prop_sec_ACT_untested$arm == "Control", ]$prop), 
                             "CD" = paste("CD, ", prop_sec_ACT_untested[prop_sec_ACT_untested$arm == "CD", ]$prop))) %>%
  set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>%
  as.data.frame()


# Log link

# Minimally adjusted
fit_geepack_sec_ACT_untested <- geeglm(as.formula(paste0("outcome_sec_ACT_untested ~ ", min_adjusted)),
                           id = cluster_id, 
                           data = data_gee_sec_ACT_untested,
                           family = poisson,
                           corstr = "independence")

# Fully adjusted
fit_geepack_sec_ACT_untested_fully <- geeglm(as.formula(paste0("outcome_sec_ACT_untested ~ ", fully_adjusted)),
                           id = cluster_id, 
                           data = data_gee_sec_ACT_untested_fully,
                           family = poisson,
                           corstr = "independence")


# Identity link

# Minimally adjusted
fit_geepack_sec_ACT_untested_id <- geeglm(as.formula(paste0("outcome_sec_ACT_untested ~ ", min_adjusted)),
                           id = cluster_id, 
                           data = data_gee_sec_ACT_untested,
                           family = poisson(link = "identity"),
                           corstr = "independence")

# Fully adjusted 
fit_geepack_sec_ACT_untested_fully_id <- geeglm(as.formula(paste0("outcome_sec_ACT_untested ~ ", fully_adjusted)),
                           id = cluster_id, 
                           data = data_gee_sec_ACT_untested_fully,
                           family = poisson(link = "identity"),
                           corstr = "independence")


```



```{r GEE-Sec-4-kc, cache = FALSE, results = 'hide'}

# Small sample corrections

# Log link

# Minimally adjusted
fit_PoisBCV_sec_ACT_untested <- PoisBCV(
  y = data_gee_sec_ACT_untested$outcome_sec_ACT_untested,
  X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_ACT_untested),
  beta <- fit_geepack_sec_ACT_untested_id$coefficients,
  alpha <- 0, # Independent working correlation
  phi <- 1,
  id <- data_gee_sec_ACT_untested$cluster_id,
  link <- "log"
  )

fit_geepack_sec_ACT_untested_smv <- fit_geepack_sec_ACT_untested

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_sec_ACT_untested_smv$geese$vbeta) <- (fit_PoisBCV_sec_ACT_untested$outbeta[, "BC1-stderr"])^2

# Fully adjusted
# fit_kc_sec_ACT_untested_fully <- GEE.var.kc(as.formula(paste0("outcome_sec_ACT_untested ~ ", fully_adjusted)),
#                                 id = "cluster_id", 
#                                 data = data_gee_sec_ACT_untested_fully,
#                                 family = poisson, corstr = "independence")

fit_PoisBCV_sec_ACT_untested_fully <- PoisBCV(
  y = data_gee_sec_ACT_untested_fully$outcome_sec_ACT_untested,
  X <- model.matrix(as.formula(paste0("~ ", fully_adjusted)), data_gee_sec_ACT_untested_fully),
  beta <- fit_geepack_sec_ACT_untested_fully_id$coefficients,
  alpha <- 0, # Independent working correlation
  phi <- 1,
  id <- data_gee_sec_ACT_untested_fully$cluster_id,
  link <- "log"
  )

fit_geepack_sec_ACT_untested_fully_smv <- fit_geepack_sec_ACT_untested_fully

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_sec_ACT_untested_fully_smv$geese$vbeta) <- (fit_PoisBCV_sec_ACT_untested_fully$outbeta[, "BC1-stderr"])^2

# Identity link 

# Minimally adjusted
fit_PoisBCV_sec_ACT_untested_id <- PoisBCV(
  y = data_gee_sec_ACT_untested$outcome_sec_ACT_untested,
  X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_ACT_untested),
  beta <- fit_geepack_sec_ACT_untested_id$coefficients,
  alpha <- 0, # Independent working correlation
  phi <- 1,
  id <- data_gee_sec_ACT_untested$cluster_id,
  link <- "identity"
  )

fit_geepack_sec_ACT_untested_id_smv <- fit_geepack_sec_ACT_untested_id

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_sec_ACT_untested_id_smv$geese$vbeta) <- (fit_PoisBCV_sec_ACT_untested_id$outbeta[, "BC1-stderr"])^2

# Fully adjusted
fit_PoisBCV_sec_ACT_untested_fully_id <- PoisBCV(
  y = data_gee_sec_ACT_untested_fully$outcome_sec_ACT_untested,
  X <- model.matrix(as.formula(paste0("~ ", fully_adjusted)), data_gee_sec_ACT_untested_fully),
  beta <- fit_geepack_sec_ACT_untested_fully_id$coefficients,
  alpha <- 0, # Independent working correlation
  phi <- 1,
  id <- data_gee_sec_ACT_untested_fully$cluster_id,
  link <- "identity"
  )

fit_geepack_sec_ACT_untested_fully_id_smv <- fit_geepack_sec_ACT_untested_fully_id

# Replace robust variances with small sample corrected variances (KC)
diag(fit_geepack_sec_ACT_untested_fully_id_smv$geese$vbeta) <- (fit_PoisBCV_sec_ACT_untested_fully_id$outbeta[, "BC1-stderr"])^2


```


```{r GEE-Sec-4-kc-out, cache = FALSE, ft.keepnext = FALSE}

# Create table for each model
risk_ratio_min <- fit_geepack_sec_ACT_untested_smv %>%
  tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE)

risk_diff_min <- fit_geepack_sec_ACT_untested_id_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rd_p6)

risk_ratio_fully <- fit_geepack_sec_ACT_untested_fully_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) 

risk_diff_fully <- fit_geepack_sec_ACT_untested_fully_id_smv %>% 
  tbl_regression(tidy_fun = my_tidy_rd_p6)

# Combine tables
tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

ft_prim <- tbl_merge(tbls = list(tbl_ratio, tbl_diff),
                     tab_spanner = c("**Risk Ratio**", "**Risk Difference**")) %>%
  modify_header(
    estimate_1_1 = "**Minimally Adjusted RR** **(95% CI)**",
    estimate_2_1 = "**Fully Adjusted RR** **(95% CI)**",
    estimate_1_2 = "**Minimally Adjusted RD** **(95% CI)**",
    estimate_2_2 = "**Fully Adjusted RD** **(95% CI)**"
    ) %>%
  modify_footnote(
    update = c(estimate_1_1, estimate_2_1, estimate_1_2, estimate_2_2) ~ 
      "RR = Risk Ratio, RD = Risk Difference, CI = Confidence Interval",
    abbreviation = TRUE
  ) %>%
  as_flex_table() %>%
  flextable::set_caption(
    as_paragraph(
      as_chunk(paste0('GEE analysis with KC Small Sample Variance Correction, \nN (minimally adjusted) = ',
                      nrow(data_gee_sec_ACT_untested),
                      ', N (fully adjusted) = ',
                      nrow(data_gee_sec_ACT_untested_fully)), 
               props = fp_text_default(
                 font.family = "Cambria",
                 font.size = 12,
                 italic = TRUE))
    ),
    autonum = officer::run_autonum(seq_id = "tab",
                                   bkm = "PrimGEEsec4")) %>%
  bold(bold = TRUE, part = "header") %>%
  autofit() %>%
  set_table_properties(layout = "autofit") %>%
  font(fontname = "Calibri", part = "all") %>%
  fontsize(size = 10, part = "all")

# ft_prim$body$content$content$data[, 2:5] <- lapply(ft_prim$body$content$content$data[, 2:5],
#                                              function(x) {mutate(x, txt = str_replace(
#                                                txt, "\\(", "\n("))})

ft_prim %>% theme_zebra() %>% align(j = -1, align = "center", part = "all")


```


```{r GEE-Sec-4-subset, cache = FALSE}

# Create table for each model
risk_ratio_min <- fit_geepack_sec_ACT_untested_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Untested clients taking ACT", 
                 tidy_fun = my_tidy_rr_p6,
                 exponentiate = TRUE)

risk_diff_min <- fit_geepack_sec_ACT_untested_id_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Untested clients taking ACT",
                 tidy_fun = my_tidy_rd_p6)

risk_ratio_fully <- fit_geepack_sec_ACT_untested_fully_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Untested clients taking ACT", 
                 tidy_fun = my_tidy_rr_p6,
                 exponentiate = TRUE) 

risk_diff_fully <- fit_geepack_sec_ACT_untested_fully_id_smv %>% 
  tbl_regression(include = arm, 
                 label = arm ~ "Untested clients taking ACT",
                 tidy_fun = my_tidy_rd_p6)

# Combine tables
tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully),
                       tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>%
  modify_column_hide(columns = contains("p.value"))

gt_sec4 <- tbl_merge(tbls = list(tbl_ratio, tbl_diff),
                     tab_spanner = c("**Risk Ratio**", "**Risk Difference**")
                     ) 
```



\newpage

# Other tables included in Results paper

## Table 2 (Results paper). Minimally adjusted and fully adjusted estimates of intervention effects for pre-specified study outcomes

```{r Combine-results-table, cache = FALSE, ft.keepnext = FALSE}

gt_results <- tbl_stack(tbls = list(gt_prim, gt_sec1, gt_sec2, gt_sec3, gt_sec4))

ft_results <- gt_results %>%
  modify_header(
    estimate_1_1 = "**Minimally Adjusted RR** **(95% CI)**",
    estimate_2_1 = "**Fully Adjusted RR** **(95% CI)**",
    estimate_1_2 = "**Minimally Adjusted RD** **(95% CI)**",
    estimate_2_2 = "**Fully Adjusted RD** **(95% CI)**"
    ) %>%
  modify_footnote(
    update = c(estimate_1_1, estimate_2_1, estimate_1_2, estimate_2_2) ~ 
      "RR = Risk Ratio, RD = Risk Difference, CI = Confidence Interval",
    abbreviation = TRUE
  ) %>%
  as_flex_table() %>%
  flextable::set_caption('GEE analysis with KC Small Sample Variance Correction.',
                         autonum = officer::run_autonum(seq_id = "tab", 
                                                        bkm = "PrimGEEsec4")) %>%
  bold(bold = TRUE, part = "header") %>%
  autofit() %>%
  set_table_properties(layout = "autofit") %>%
  font(fontname = "Calibri", part = "all") %>%
  fontsize(size = 10, part = "all")


ft_results %>% theme_zebra() %>% align(j = -1, align = "center", part = "all")

```



```{r, eval = T}

shop_test_order <- 
  data4 %>%
  mutate(arm = fct_relevel(as.factor(arm), "Control", "CDPD")) %>%
  group_by(shop_ni) %>%

  mutate(cluster_primary_prop = mean(outcome_prim, na.rm = T)) %>%
  mutate(cluster_primary_denom = sum(!is.na(outcome_prim))) %>%
  
  mutate(cluster_sec1_prop = mean(outcome_sec_tested_expanded, na.rm = T)) %>%
  mutate(cluster_sec1_denom = sum(!is.na(outcome_sec_tested_expanded))) %>%
  
  mutate(cluster_sec2_prop = mean(outcome_sec_adherence, na.rm = T)) %>%
  mutate(cluster_sec2_denom = sum(!is.na(outcome_sec_adherence))) %>%
  
  mutate(cluster_sec3_prop = mean(outcome_sec_appropriate_expanded, na.rm = T)) %>%
  mutate(cluster_sec3_denom = sum(!is.na(outcome_sec_appropriate_expanded))) %>%
  
  mutate(cluster_sec4_prop = mean(outcome_sec_ACT_untested, na.rm = T)) %>%
  mutate(cluster_sec4_denom = sum(!is.na(outcome_sec_ACT_untested))) %>%
  

  mutate(cluster_discount_prop_2 = mean(discount_bin, na.rm = T)) %>%
  mutate(cluster_discountPos_prop = mean(discountPos_bin, na.rm = T)) %>%
  mutate(cluster_discountPos_denom = sum(!is.na(discountPos_bin))) %>%
  mutate(cluster_actPrice0Pos_prop = mean(act_price_pos_0, na.rm = T)) %>%
  mutate(cluster_actPrice0Pos_denom = sum(!is.na(act_price_pos_0))) %>%
  mutate(cluster_actPrice_prop = mean(act_price_num, na.rm = T)) %>%
  mutate(cluster_actPricePos_prop = mean(act_price_pos_num, na.rm = T)) %>%
  
  mutate(cluster_actLinePrice_prop = mean(price_line_pos, na.rm = T)) %>%
  mutate(cluster_actLinePrice_denom = sum(!is.na(price_line_pos))) %>%
  
  mutate(cluster_actLinePricePos_prop = mean(price_line, na.rm = T)) %>%
  mutate(cluster_actLinePricePos_denom = sum(!is.na(price_line))) %>%
  
  mutate(cluster_freeACT_prop = mean(free_act, na.rm = T)) %>%
  mutate(cluster_freeACT_denom = sum(!is.na(free_act))) %>%
  
  mutate(cluster_freeACTPos_prop = mean(free_act_pos, na.rm = T)) %>%
  mutate(cluster_freeACTPos_denom = sum(!is.na(free_act_pos))) %>%
  
  mutate(cluster_ses_prop = mean(PC1, na.rm = T)) %>%
  
  ### Added 12/11/23 for ordering Figure in manuscript
  filter(!(test_results %in% c("Invalid", "Dont know"))) %>%
  mutate(cluster_sec1_mosaic_prop = mean(outcome_sec_tested_expanded, na.rm = T)) %>%
  mutate(cluster_sec1_mosaic_denom = sum(!is.na(outcome_sec_tested_expanded), na.rm = T)) %>%
  ###
  
  select(shop_ni, arm, contains("_prop"), contains("_denom")) %>%

  distinct() %>%
  ungroup() %>%
  mutate(shop_ni = fct_reorder(shop_ni, cluster_sec1_prop)) %>%
  mutate(shop_ni = fct_reorder(shop_ni, as.numeric(arm))) %>%
  group_by(shop_ni) %>%
  mutate(shop_test_rank = cur_group_id()) %>%
  arrange(shop_test_rank) %>%
  group_by(arm) %>%
  mutate(shop_test_rank_in_arm = row_number()) %>%
  ### Added 12/11/23 for ordering Figure in manuscript
  ungroup() %>%
  mutate(shop_ni = fct_reorder(shop_ni, cluster_sec1_mosaic_prop)) %>%
  mutate(shop_ni = fct_reorder(shop_ni, as.numeric(arm))) %>%
  group_by(shop_ni) %>%
  mutate(shop_test_rank_mosaic = cur_group_id()) %>%
  arrange(shop_test_rank_mosaic) %>%
  group_by(arm) %>%
  mutate(shop_test_rank_in_arm_mosaic = row_number()) %>%
  ###
  ungroup() %>%
  mutate(shop_ni = fct_reorder(shop_ni, cluster_freeACTPos_prop)) %>%
  mutate(shop_ni = fct_reorder(shop_ni, as.numeric(arm)))


```


```{r, eval = T}

data_mosaic <-
  data4 %>% 
  # select(test, ACT, test_results, test_else, test_else_res) %>%
  mutate(test_mosaic = as.factor(
    case_when(
      test_results == "Positive" ~ "Positive",
      test_results == "Negative" ~ "Negative",
      test == "No mRDT" ~ "No mRDT"
    ))) %>%
  mutate(test_mosaic = fct_relevel(test_mosaic, 
                                   "Positive", "Negative", "No mRDT")) %>%
  mutate(ACT_mosaic = as.factor(
    case_when(
      ACT == "ACT" ~ "ACT",
      ACT == "No ACT" & other_AM == 1 ~ "Other (non-ACT) AM",
      ACT == "No ACT" & other_AM == 0 ~ "No AM"
    )
  )) %>%
  mutate(arm = fct_relevel(as.factor(arm), "Control", "CD", "CDPD")) %>%
  left_join(shop_test_order, by = c("shop_ni", "arm")) %>%
  set_variable_labels(
    ACT_mosaic = "ACT purchasing",
    test_mosaic = "mRDT results"
  )


```


\newpage

## Table 3 (Results paper). Testing and treatment decisions by arm

```{r Table3}

data_table3 <- data_mosaic %>%
  mutate(AL_2 = factor(AL, 
                       levels = c(0, 1)),
         other_ACT_2 = factor(ifelse(AL == 1, 0, other_ACT), 
                              levels = c(0, 1)),
         other_AM_2 = factor(ifelse(AL == 1 | other_ACT == 1, 0, other_AM), 
                             levels = c(0, 1))) %>%
  set_variable_labels(
    AL_2 = "Took AL",
    other_ACT_2 = "Took ACT (not AL)",
    other_AM_2 = "Took other antimalarial",
    total_spent_na = "Overall expenditure"
  )

table3_all <- data_table3 %>%
  set_variable_labels(Interviewees = "Interviewees") %>%
  tbl_summary(
    include = c(Interviewees),
    by = arm,
    type = list(everything() ~ "dichotomous"),
    value = list(everything() ~ 1),
    statistic = list(everything() ~ "{n} ({p}%)",
                     Interviewees ~ "{n}"),
    missing_text = "Missing") %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
  add_overall(last = TRUE, col_label = "**Overall**") %>%
  add_stat_label()

table3_tested <- data_table3 %>%
  set_variable_labels(Interviewees = "Tested at outlet") %>%
  # filter(test_mosaic == "Positive" | test_mosaic == "Negative") %>%
  filter(test == "mRDT") %>%
  tbl_summary(
    include = c(Interviewees),
    by = arm,
    type = list(everything() ~ "dichotomous"),
    value = list(everything() ~ 1),
    statistic = list(everything() ~ "{n} ({p}%)",
                     Interviewees ~ "{n}"),
    missing_text = "Missing") %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
  add_overall(last = TRUE, col_label = "**Overall**") %>%
  add_stat_label()

table3_pos <- data_table3 %>%
  set_variable_labels(Interviewees = "Positive test at outlet") %>%
  filter(test_mosaic == "Positive") %>%
  tbl_summary(
    include = c(Interviewees,
                AL_2,
                other_ACT_2,
                other_AM_2),
    by = arm,
    type = list(everything() ~ "dichotomous"),
    value = list(everything() ~ 1),
    statistic = list(everything() ~ "{n} ({p}%)",
                     Interviewees ~ "{n}"),
    missing_text = "Missing") %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
  add_overall(last = TRUE, col_label = "**Overall**") %>%
  add_stat_label()

table3_neg <- data_table3 %>%
  set_variable_labels(Interviewees = "Negative test at outlet") %>%
  filter(test_mosaic == "Negative") %>%
  tbl_summary(
    include = c(Interviewees,
                AL_2,
                other_ACT_2,
                other_AM_2),
    by = arm,
    type = list(everything() ~ "dichotomous"),
    value = list(everything() ~ 1),
    statistic = list(everything() ~ "{n} ({p}%)",
                     Interviewees ~ "{n}"),
    missing_text = "Missing") %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
  add_overall(last = TRUE, col_label = "**Overall**") %>%
  add_stat_label()

table3_invalid <- data_table3 %>%
  set_variable_labels(Interviewees = "Invalid or unknown test result at outlet") %>%
  filter(test == "mRDT" & 
           (test_results %in% c("Invalid", "Dont know") | is.na(test_results))) %>%
  tbl_summary(
    include = c(Interviewees),
    by = arm,
    type = list(everything() ~ "dichotomous"),
    value = list(everything() ~ 1),
    statistic = list(everything() ~ "{n} ({p}%)",
                     Interviewees ~ "{n}"),
    missing_text = "Missing") %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
  add_overall(last = TRUE, col_label = "**Overall**") %>%
  add_stat_label()

table3_untested <- data_table3 %>%
  set_variable_labels(Interviewees = "Not tested at outlet") %>%
  filter(test_mosaic == "No mRDT") %>%
  tbl_summary(
    include = c(Interviewees),
    by = arm,
    type = list(everything() ~ "dichotomous"),
    value = list(everything() ~ 1),
    statistic = list(everything() ~ "{n} ({p}%)",
                     Interviewees ~ "{n}"),
    missing_text = "Missing") %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
  add_overall(last = TRUE, col_label = "**Overall**") %>%
  add_stat_label()

table3_no_test <- data_table3 %>%
  set_variable_labels(Interviewees = "No test") %>%
  filter(test_mosaic == "No mRDT" &
            ((test_else == "No" ) |
               is.na(test_else))) %>%
  tbl_summary(
    include = c(Interviewees,
                AL_2,
                other_ACT_2,
                other_AM_2),
    by = arm,
    type = list(everything() ~ "dichotomous"),
    value = list(everything() ~ 1),
    statistic = list(everything() ~ "{n} ({p}%)",
                     Interviewees ~ "{n}"),
    missing_text = "Missing") %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
  add_overall(last = TRUE, col_label = "**Overall**") %>%
  add_stat_label()

table3_else_pos <- data_table3 %>%
  set_variable_labels(Interviewees = "Had positive test from elsewhere") %>%
  filter(test_mosaic == "No mRDT" &
           test_else != "No" &
           (str_detect(tolower(malaria_test_res), "positive"))) %>%
  tbl_summary(
    include = c(Interviewees,
                AL_2,
                other_ACT_2,
                other_AM_2),
    by = arm,
    type = list(everything() ~ "dichotomous"),
    value = list(everything() ~ 1),
    statistic = list(everything() ~ "{n} ({p}%)",
                     Interviewees ~ "{n}"),
    missing_text = "Missing") %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
  add_overall(last = TRUE, col_label = "**Overall**") %>%
  add_stat_label()

table3_else_neg <- data_table3 %>%
  set_variable_labels(Interviewees = "Had negative test from elsewhere") %>%
  filter(test_mosaic == "No mRDT" & 
           (str_detect(tolower(malaria_test_res), "negative"))) %>%
  tbl_summary(
    include = c(Interviewees,
                AL_2,
                other_ACT_2,
                other_AM_2),
    by = arm,
    type = list(everything() ~ "dichotomous"),
    value = list(everything() ~ 1),
    statistic = list(everything() ~ "{n} ({p}%)",
                     Interviewees ~ "{n}"),
    missing_text = "Missing") %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
  add_overall(last = TRUE, col_label = "**Overall**") %>%
  add_stat_label()

table3_gt <- tbl_stack(list(table3_all, table3_tested, table3_pos, table3_neg,
                            table3_invalid, table3_untested, table3_no_test,
                            table3_else_pos, table3_else_neg)) 
```

```{r}
table3_gt %>%
  as_flex_table() %>%
  footnote(i = 1, j = 1,
           value = as_paragraph("Includes only interviewees who consented and met all inclusion criteria."),
           ref_symbols = c("1"),
           part = "body") %>%
  flextable::set_caption('Testing and treatment decisions by arm.',
                         autonum = officer::run_autonum(seq_id = "tab", 
                                                        bkm = "Table3Paper")) %>%
  bold(bold = TRUE, part = "header") %>%
  autofit() %>%
  set_table_properties(layout = "autofit") %>%
  font(fontname = "Calibri", part = "all") %>%
  fontsize(size = 10, part = "all") %>%
  theme_zebra() %>% 
  align(j = -1, align = "center", part = "all")

```


\newpage

## Table 4 (Results paper). Summary of price variables among ACT purchasers.

```{r, ft.keepnext = FALSE}

data_mosaic %>%
  filter(ACT == "ACT") %>% 
  tbl_summary(include = c(arm, contains( "_cat"),
                          altpay, discount),
              by = arm,
              missing_text = "Missing") %>%
  add_overall(last = TRUE) %>%
  add_stat_label() %>%
  as_flex_table() %>%
  flextable::set_caption('Summary of price variables, among ACT purchasers.',
                         autonum = officer::run_autonum(seq_id = "tab", 
                                                        bkm = "discountPaySummariesPurchasers")) %>%
  bold(bold = TRUE, part = "header") %>%
  autofit() %>%
  set_table_properties(layout = "autofit") %>%
  font(fontname = "Calibri", part = "all") %>%
  fontsize(size = 10, part = "all") %>% 
  align(j = -1, align = "center", part = "all")

```


## Table 5 (Results paper). Total spent by test behavior and result.

```{r totalSpent}

data_mosaic %>%
  tbl_summary(by = test_mosaic,
              include = total_spent_na,
              type = list(all_continuous() ~ "continuous2"),
              statistic = list(all_continuous2() ~ c("{mean} ({sd})", 
                                                     "{median} [{p25}, {p75}]")),
              missing_text = "Missing") %>%
  add_overall(last = TRUE, col_label = "**Overall**") %>%
  add_stat_label() %>%
  as_flex_table() %>%
  flextable::set_caption('Total spent by test behavior and result.',
                         autonum = officer::run_autonum(seq_id = "tab", 
                                                        bkm = "totalSpent")) %>%
  bold(bold = TRUE, part = "header") %>%
  autofit() %>%
  set_table_properties(layout = "autofit") %>%
  font(fontname = "Calibri", part = "all") %>%
  fontsize(size = 10, part = "all") %>% 
  align(j = -1, align = "center", part = "all")

```




\newpage

# Tables included in paper supplement, other miscellaneous tables

```{r Stratification-function}
# Set border style
border_style <- officer::fp_border(color = "black", width = 1)

data5 <- data_tab1 %>% 
  mutate(across(c(ACT, case, test), as.factor)) %>%
  # mutate(across(c(ACT, case, test), recode_factor, "0" = "No", "1" = "Yes"))
  mutate(ACT = recode_factor(ACT, "1" = "ACT", "0" = "No ACT")) %>%
  mutate(case = recode_factor(case, "1" = "Suspected case", "0" = "Not suspected case")) %>%
  mutate(test = recode_factor(test, "Yes" = "mRDT", "No" = "No mRDT"))

get_stratification <- function(strat_var){
  
  reset_gtsummary_theme()
  # theme_gtsummary_journal(journal = "jama")
  theme_gtsummary_compact()

  tbl1 <- 
    data5 %>%
    filter(!is.na({{strat_var}})) %>%
    tbl_strata(
      strata = {{strat_var}},
      .header = "{strata}, N = {n}",
      .tbl_fun = 
        ~ .x %>%
        tbl_summary(include = c(Interviewees, respondent, respondent_age, 
                                client_gender, respondent_gender),
                    by = arm,
                    type = list(Interviewees ~ "dichotomous"),
                    value = list(Interviewees ~ 1),
                    statistic = list(Interviewees ~ "{N}"),
                    missing = "always",
                    missing_text = "Missing") %>%
        modify_header(all_stat_cols() ~ "**{level}**") %>%
        modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
        add_overall(last = TRUE, col_label = "**Overall**") %>%
        add_stat_label()
    )
  
  tbl_adult <- 
    data5 %>%
    filter(!is.na({{strat_var}})) %>%
    filter(respondent == "Adult client") %>%
    tbl_strata(
      strata = {{strat_var}},
      .header = "{strata}, N = {n}",
      .tbl_fun = 
        ~ .x %>%
        tbl_summary(include = c(adult_client_age, adult_client_gender),
                    by = arm,
                    missing = "always",
                    missing_text = "Missing") %>%
        modify_header(all_stat_cols() ~ "**{level}**") %>%
        modify_spanning_header(all_stat_cols() ~ "**Study Arm**")  %>%
        add_overall(last = TRUE, col_label = "**Overall**") %>%
        add_stat_label()
      # ,
      # .header = "**{strata}**, N = {n}"
    )
  
  tbl_child <- 
    data5 %>% 
    filter(!is.na({{strat_var}})) %>%
    filter(respondent == "Guardian of child client") %>%
    tbl_strata(
      strata = {{strat_var}},
      .header = "{strata}, N = {n}",
      .tbl_fun = 
        ~ .x %>%
        tbl_summary(include = c(child_age, child_gender),
                    by = arm,
                    type = list(child_age ~ "continuous2"),
                    statistic = list(all_continuous2() ~ c("{mean} ({sd})", 
                                                           "{median} [{p25}, {p75}]",
                                                           "{min}, {max}")),
                    missing = "always",
                    missing_text = "Missing") %>%
        modify_header(all_stat_cols() ~ "**{level}**") %>%
        modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
        add_overall(last = TRUE, col_label = "**Overall**") %>%
        add_stat_label()
    )
  
  tbl_covariates <- data5 %>% 
    filter(!is.na({{strat_var}})) %>%
    tbl_strata(
      strata = {{strat_var}},
      .header = "{strata}, N = {n}",
      .tbl_fun = 
        ~ .x %>%
        tbl_summary(include = c(school_2, 
                                wealth_quintile
                                ),
                    by = arm,
                    statistic = list(all_continuous2() ~ c("{mean} ({sd})", 
                                                           "{median} [{p25}, {p75}]",
                                                           "{min}, {max}")),
                    missing = "always",
                    missing_text = "Missing") %>%
        modify_header(all_stat_cols() ~ "**{level}**") %>%
        modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
        add_overall(last = TRUE, col_label = "**Overall**") %>%
        add_stat_label()
    )
  
  tbl_period <- data5 %>%
    filter(!is.na({{strat_var}})) %>%
    tbl_strata(
      strata = {{strat_var}},
      .header = "{strata}, N = {n}",
      .tbl_fun = 
        ~ .x %>%
        tbl_summary(include = c(timespan),
                    by = arm,
                    statistic = list(all_continuous2() ~ c("{mean} ({sd})", 
                                                           "{median} [{p25}, {p75}]",
                                                           "{min}, {max}")),
                    missing = "always",
                    missing_text = "Missing") %>%
        modify_header(all_stat_cols() ~ "**{level}**") %>%
        modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>%
        add_overall(last = TRUE, col_label = "**Overall**") %>%
        add_stat_label()
    )
  
  tbl1_combined <- tbl_stack(list(tbl1, tbl_adult, tbl_child, tbl_covariates, tbl_period))
  
  return(tbl1_combined)
  
}


```


## Table S1 (Results paper supplement). Secondary Outcomes- Sample characteristics by intervention arm and mRDT* use.

```{r Table-B1-4, cache = FALSE, ft.keepnext = FALSE}

gt_strat_tested <- get_stratification(test)

ft_strat_tested <- gt_strat_tested %>%
  as_flex_table() %>%
  flextable::set_caption('Secondary Outcomes 2 & 4 -- Sample characteristics by intervention arm and mRDT* use.',
                         autonum = officer::run_autonum(seq_id = "tab", 
                                                        bkm = "Table1test")) %>%
  bold(bold = TRUE, part = "header") %>%
  autofit() %>%
  set_table_properties(layout = "autofit") %>%
  font(fontname = "Calibri", part = "all") %>%
  fontsize(size = 10, part = "all") %>% 
  theme_zebra() %>% 
  align(j = -1, align = "center", part = "all") %>%
  vline(j = 4, part = "all", border = border_style) %>%
  add_footer_row(values = as_paragraph("*Only mRDTs conducted at the shop are included"), colwidths = 7)

ft_strat_tested

```


\newpage

## Table S2 (Results paper supplement). Adherence by client gender and test result, among mRDT purchasers.

```{r t9gender}

adherence_all_gender <- data_mosaic %>%
  set_variable_labels(
    outcome_sec_adherence = "mRDT tested clients whose treatment adhered to test results"
  ) %>%
  tbl_summary(
    include = c(outcome_sec_adherence),
    by = client_gender,
    type = list(outcome_sec_adherence ~ "dichotomous"),
    # value = list(outcome_sec_adherence = "ACT"),
    statistic = list(all_dichotomous() ~ "{n} / {N} ({p}%)"),
    missing = "no",
    missing_text = "Missing") %>%
  add_stat_label() %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Study Arm**")  %>%
  add_overall(last = TRUE, col_label = "**Overall**")  

adherence_pos_gender <- data_mosaic %>%
  filter(test_mosaic == "Positive") %>%
  set_variable_labels(
    outcome_sec_adherence = "Positive mRDT clients whose treatment adhered to test results"
  ) %>%
  tbl_summary(
    include = c(outcome_sec_adherence),
    by = client_gender,
    type = list(outcome_sec_adherence ~ "dichotomous"),
    # value = list(outcome_sec_adherence = "ACT"),
    statistic = list(all_dichotomous() ~ "{n} / {N} ({p}%)"),
    missing = "no",
    missing_text = "Missing") %>%
  add_stat_label() %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Study Arm**")  %>%
  add_overall(last = TRUE, col_label = "**Overall**") 

adherence_neg_gender <- data_mosaic %>%
  filter(test_mosaic == "Negative") %>%
  set_variable_labels(
    outcome_sec_adherence = "Negative mRDT clients whose treatment adhered to test results"
  ) %>%
  tbl_summary(
    include = c(outcome_sec_adherence),
    by = client_gender,
    type = list(outcome_sec_adherence ~ "dichotomous"),
    # value = list(outcome_sec_adherence = "ACT"),
    statistic = list(all_dichotomous() ~ "{n} / {N} ({p}%)"),
    missing = "no",
    missing_text = "Missing") %>%
  add_stat_label() %>%
  modify_header(all_stat_cols() ~ "**{level}**") %>%
  modify_spanning_header(all_stat_cols() ~ "**Study Arm**")  %>%
  add_overall(last = TRUE, col_label = "**Overall**") 

tbl_stack(list(adherence_all_gender, adherence_pos_gender, adherence_neg_gender)) %>%
  as_flex_table() %>%
  flextable::set_caption('Adherence by client gender and test result, among mRDT purchasers.',
                         autonum = officer::run_autonum(seq_id = "tab", 
                                                        bkm = "t9gender")) %>%
  bold(bold = TRUE, part = "header") %>%
  autofit() %>%
  set_table_properties(layout = "autofit") %>%
  font(fontname = "Calibri", part = "all") %>%
  fontsize(size = 10, part = "all") %>%
  theme_zebra() %>% 
  align(j = -1, align = "center", part = "all")

```

## Table S3 (Results paper supplement). Proportion who purchased antibiotics, overall and by subgroups.

```{r table-antibiotics-CPHIA, ft.keepnext = FALSE}

ab_interviewees <- data_mosaic %>%
  mutate(Interviewees = fct_recode(as.factor(Interviewees), "All interviewees" = "1")) %>%
  set_variable_labels(Interviewees = "Overall") %>%
  tbl_cross(row = Interviewees,
            col = antibiotic,
            statistic = "{n}/{N} ({p}%)",
            percent = "row",
            missing_text = "Missing",
            margin = NULL)

ab_client_gender <- data_mosaic %>%
  tbl_cross(row = client_gender,
            col = antibiotic,
            statistic = "{n}/{N} ({p}%)",
            percent = "row",
            missing_text = "Missing",
            margin = NULL) 

ab_client_age <- data_mosaic %>%
  tbl_cross(row = client_age,
            col = antibiotic,
            statistic = "{n}/{N} ({p}%)",
            percent = "row",
            missing_text = "Missing",
            margin = NULL) 

ab_respondent_gender <- data_mosaic %>%
  tbl_cross(row = respondent_gender,
            col = antibiotic,
            statistic = "{n}/{N} ({p}%)",
            percent = "row",
            missing_text = "Missing",
            margin = NULL) 

ab_respondent_age <- data_mosaic %>%
  tbl_cross(row = respondent_age,
            col = antibiotic,
            statistic = "{n}/{N} ({p}%)",
            percent = "row",
            missing_text = "Missing",
            margin = NULL)

ab_education <- data_mosaic %>%
  tbl_cross(row = school_2,
            col = antibiotic,
            statistic = "{n}/{N} ({p}%)",
            percent = "row",
            missing_text = "Missing",
            margin = NULL)

ab_wealth <- data_mosaic %>%
  tbl_cross(row = wealth_quintile,
            col = antibiotic,
            statistic = "{n}/{N} ({p}%)",
            percent = "row",
            missing_text = "Missing",
            margin = NULL)

ab_test <- data_mosaic %>%
  tbl_cross(row = test_mosaic,
            col = antibiotic,
            statistic = "{n}/{N} ({p}%)",
            percent = "row",
            missing_text = "Missing",
            margin = NULL)

ab_act <- data_mosaic %>%
  tbl_cross(row = ACT_mosaic,
            col = antibiotic,
            statistic = "{n}/{N} ({p}%)",
            percent = "row",
            missing_text = "Missing",
            margin = NULL)

ab_stacked <- tbl_stack(list(ab_interviewees, ab_client_gender, ab_client_age,
                             ab_respondent_gender, ab_respondent_age, ab_education,
                             ab_wealth, ab_test, ab_act)) %>%
  modify_column_hide(stat_1) %>%
  modify_header(stat_2 ~ "**Purchased antibiotic**") %>%
  modify_spanning_header(stat_2 ~ "")

ab_stacked %>%
  as_flex_table() %>%
  flextable::set_caption('Proportion who purchased antibiotic, overall and by subgroups.',
                         autonum = officer::run_autonum(seq_id = "tab", 
                                                        bkm = "antibiotic-summary")) %>%
  bold(bold = TRUE, part = "header") %>%
  autofit() %>%
  set_table_properties(layout = "autofit") %>%
  font(fontname = "Calibri", part = "all") %>%
  fontsize(size = 10, part = "all") %>%
  theme_zebra() %>%
  align(j = -1, align = "center", part = "all")

```


## Table S4 (Results paper supplement). Injections summary.

```{r injections-table}



data_injections <- data4 %>%
  set_variable_labels(injection_combined = "Any injection reported") %>%
  mutate(test_status = as.factor(case_when(
    test_results == "Positive" ~ "Positive test (any source)",
    test_results == "Negative" ~ "Negative test (shop)",
    test_else_res == "Positive" ~ "Positive test (any source)",
    (is.na(test_results) | test_results %in% c("Dont know", "Invalid")) & 
       (test_else_res != "Positive" | is.na(test_else_res)) ~ "No test"
  ))) %>% 
  mutate(test_status = fct_relevel(test_status, 
                                   "No test", 
                                   "Positive test (any source)", 
                                   "Negative test (shop)")) %>%
  mutate(injection_art_oral = as.factor(case_when(
    injection_type_combined == "Identified as artemisinin injection" &
      ACT == "ACT" ~ "Yes",
    injection_combined == "Yes" ~ "No",
  ))) %>%
  mutate(injection_malaria_oral = as.factor(case_when(
    injection_type_combined == "Identified as 'malaria injection' or 'antimalarial injection' or quinine injection" &
      ACT == "ACT" ~ "Yes",
    injection_combined == "Yes" ~ "No",
  ))) %>%
  mutate(injection_unspecified_oral = as.factor(case_when(
    injection_type_combined == "Unspecified injection" &
      ACT == "ACT" ~ "Yes",
    injection_combined == "Yes" ~ "No",
  ))) %>%
  mutate(injection_combined_artemisinin = ifelse(injection_type_combined == "Identified as artemisinin injection", "Yes", "No"),
         injection_combined_malaria = ifelse(injection_type_combined == "Identified as 'malaria injection' or 'antimalarial injection' or quinine injection", "Yes", "No"),
         injection_combined_painkiller = ifelse(injection_type_combined == "Painkiller or antibiotic injection", "Yes", "No"),
         injection_combined_unspecified = ifelse(injection_type_combined == "Unspecified injection", "Yes", "No")) %>%
  mutate(across(starts_with("injection_combined_"), fct_na_value_to_level, level = "No")) %>%
  set_variable_labels(
    injection_art_oral = "Artemisinin injection plus oral ACT (AL or DP)",
    injection_malaria_oral = "'Malaria' injection plus oral ACT (AL or DP)",
    injection_unspecified_oral = "Unspecified injection plus oral ACT (AL or DP)",
    injection_combined_artemisinin = "Injection identified as artemisinin injection",
    injection_combined_malaria = "Injection identified as 'malaria injection' or 'antimalarial injection' or quinine injection",
    injection_combined_painkiller = "Painkiller or antibiotic injection",
    injection_combined_unspecified = "Unspecified injection"
  )


gt_injections_test <- data_injections %>% 
  tbl_summary(include = c(injection_combined, injection_type_combined),
              by = test_status,
              type = list(injection_combined ~ "dichotomous"),
              # statistic = list(injection_combined ~ "{N}")
              missing = "no"
              ) 


gt_injections_arm <- data_injections %>% 
  tbl_summary(include = c(injection_combined, injection_type_combined),
              by = arm,
              type = list(injection_combined ~ "dichotomous"),
              # statistic = list(injection_combined ~ "{N}")
              missing = "no"
              ) %>%
  add_overall(last = TRUE, col_label = "**Overall**") 

gt_injections_merged <- tbl_merge(list(gt_injections_test, gt_injections_arm),
                                  tab_spanner = c("**Test status**", "**Study Arm**"))

gt_injections_merged %>%
  as_flex_table() %>%
  flextable::set_caption('Injections summary.',
                         autonum = officer::run_autonum(seq_id = "tab", 
                                                        bkm = "InjectionsTable")) %>%
  bold(bold = TRUE, part = "header") %>%
  autofit() %>%
  set_table_properties(layout = "autofit") %>%
  font(fontname = "Calibri", part = "all") %>%
  fontsize(size = 10, part = "all") %>%
  theme_zebra() %>% 
  align(j = -1, align = "center", part = "all")
  
```


```{r data-dictionary, eval = FALSE}

# Create

write_excel_csv(codebook::codebook_table(data_gee) %>%
                  select(-c(ordered, hist, format.stata)),
                file = paste0(repository_path, "/data_gee_codebook_ni.csv")
  )

```