--- title: "TESTsmART Nigeria Analysis" author: "David Arthur, John Gallis, Yunji Zhou, Liz Turner" subtitle: "Contents produced in accordance with Statistical Analysis Plan 'TESTsmART Aim 2 SAP Nigeria 2023-06-13 Final.docx'" output: bookdown::word_document2: number_sections: false word_document: default html_document: df_print: paged --- ```{r setup, include = FALSE} knitr::opts_chunk$set(echo = FALSE, warning = FALSE, message = FALSE, ft.align = "left", ft.keepnext = TRUE, cache = FALSE) ``` ```{r libraries, include = FALSE, cache = F} # Packages packages <- c("MASS","gt","gtsummary","tidyverse","haven","flextable","ftExtra","expss", "labelled","lme4","lmerTest","sjmisc", "bookdown","foreign", "psych", "LearnPCA", "ICCbin", "gee", "geepack", "geesmv", "ggmosaic") # Install packages not yet installed installed_packages <- packages %in% rownames(installed.packages()) if (any(installed_packages == FALSE)) { install.packages(packages[!installed_packages]) } # Packages loading invisible(lapply(packages, library, character.only = TRUE)) ``` ```{r set paths, include = FALSE} ## set data_path to location of 'TS_Kenya_analytic_dataset.rdata' data_path <- "________" ``` ```{r load-data} load(paste0(data_path, "/TS_Nigeria_analytic_dataset.rdata")) data_included <- data_raw_ni %>% # For some reason in REDCap, discount was coded as 0="yes" and 1="no", so recode to avoid confusion mutate(discount_recoded = case_when( discount == 0 ~ 1, discount == 1 ~ 0, discount == 999 ~ 999 )) %>% mutate(test_pay_cat = as.factor( case_when( test_pay == 9999 ~ "Don't know", near(test_pay, 0) ~ "0", near(test_pay, 250) ~ "250", test_pay > 250 ~ ">250" ) )) %>% mutate(test_pay_cat = fct_relevel( test_pay_cat, "0", "40", ">40", "Don't know")) %>% # Discount among all included mutate(discount_bin = case_when( discount_recoded == 1 ~ 1, discount_recoded == 0 ~ 0, discount_recoded == 999 ~ 0 )) %>% # Discount among ACT purchasers mutate(discountACT_bin = case_when( discount_recoded == 1 & ACT == 1 ~ 1, discount_recoded == 0 & ACT == 1 ~ 0, discount_recoded == 999 & ACT == 1 ~ 0 )) %>% # Discount among mRDT positive ACT purchasers mutate(discountPos_bin = case_when( discount_recoded == 1 & ACT == 1 & test_results == 2 ~ 1, discount_recoded == 0 & ACT == 1 & test_results == 2 ~ 0, discount_recoded == 999 & ACT == 1 & test_results == 2 ~ 0 )) %>% mutate(act_price_num = ifelse(act_price < 9999, act_price, NA)) %>% mutate(act_price_pos_num = ifelse(act_price_pos < 9999, act_price_pos, NA)) %>% mutate(act_price_neg_num = ifelse(act_price_neg < 9999, act_price_neg, NA)) %>% mutate(act_price_cat = as.factor( case_when( act_price == 9999 ~ "Don't know", near(act_price, 0) ~ "0", act_price > 0 & act_price < 650 ~ "1-649", act_price >= 650 & act_price <= 850 ~ "650-850", act_price > 850 ~ ">850" ) )) %>% mutate(act_price_pos_cat = as.factor( case_when( act_price_pos == 9999 ~ "Don't know", near(act_price_pos, 0) ~ "0", act_price_pos > 0 & act_price_pos < 650 ~ "1-649", act_price_pos >= 650 & act_price_pos <= 850 ~ "650-850", act_price_pos > 850 ~ ">850" ) )) %>% mutate(act_price_neg_cat = as.factor( case_when( act_price_neg == 9999 ~ "Don't know", near(act_price_neg, 0) ~ "0", act_price_neg > 0 & act_price_neg < 650 ~ "1-649", act_price_neg >= 650 & act_price_neg <= 850 ~ "650-850", act_price_neg > 850 ~ ">850" ) )) %>% mutate(act_price_cat = fct_relevel( act_price_cat, "0", "1-649", "650-850", ">850", "Don't know")) %>% mutate(act_price_pos_cat = fct_relevel( act_price_pos_cat, "0", "1-649", "650-850", ">850", "Don't know")) %>% mutate(act_price_neg_cat = fct_relevel( act_price_neg_cat, "0", "1-649", "650-850", ">850", "Don't know")) %>% mutate(act_price_pos_0 = case_when( near(act_price_pos, 0) ~ 1, act_price_pos > 0 & act_price < 9999 ~ 0 )) %>% mutate(total_spent_na = ifelse(!(total_spent %in% c(999, 9999, 99999)), total_spent, NA)) %>% set_variable_labels( test_pay_cat = "mRDT cost (Naira, categorized)", act_price_cat = "ACT cost (Naira, categorized)", act_price_pos_cat = "ACT cost among test positive (Naira, categorized)", act_price_neg_cat = "ACT cost among test negative/untested (Naira, categorized)", act_price_pos_0 = "Free ACT among test positive", total_spent_na = "How much did you spend at the pharmacy today? ('Don't know' as missing)" ) %>% mutate(arm = fct_relevel(arm, "Control")) %>% mutate(arm = fct_drop(arm)) ``` ```{r asset-variables, cache = FALSE} reset_gtsummary_theme() theme_gtsummary_compact() data_asset <- data_included %>% select(record_id, water:occupation_private_sp) %>% mutate(toilet_bin = case_when( is.element(toilet, c(5, 7, 8, 555)) ~ 1, is.element(toilet, c(2, 3, 4, 6)) ~ 2 )) %>% mutate(floor_bin = case_when( is.element(floor, c(3)) ~ 1, is.element(floor, c(4, 5, 6, 7, 555)) ~ 2 )) %>% mutate(walls_bin = case_when( is.element(walls, c(3, 5, 6)) ~ 1, is.element(walls, c(1, 2, 4, 7)) ~ 2 )) %>% mutate(own_house = case_when( is.element(house, c(1)) ~ 1, is.element(house, c(2)) ~ 0 )) %>% mutate(cows_bin = ifelse(livestock == 0, 0, cows)) %>% mutate(sheep_bin = ifelse(livestock == 0, 0, sheep)) %>% mutate(goats_bin = ifelse(livestock == 0, 0, goats)) %>% set_value_labels(toilet_bin = c("Non-improved latrine" = 1, "Improved latrine" = 2), floor_bin = c("Earthen" = 1, "Hard surface" = 2), walls_bin = c("Porous" = 1, "Non-porous" = 2), cows_bin = c("No" = 0, "Yes" = 1), sheep_bin = c("No" = 0, "Yes" = 1), goats_bin = c("No" = 0, "Yes" = 1)) asset_table <- data_asset %>% haven::as_factor() %>% select(-c(contains("_sp"), contains("sqft"), school, roof)) %>% # replace when confirm what to do with values = 9999 or 999 mutate(land_acres = ifelse(land_acres >= 999, NA, land_acres)) %>% mutate(cows_amt = ifelse(cows_amt >= 999, NA, cows_amt)) %>% mutate(sheep_amt = ifelse(sheep_amt >= 999, NA, sheep_amt)) %>% mutate(goats_amt = ifelse(goats_amt >= 999, NA, goats_amt)) %>% set_variable_labels(land_acres = "Number of acres", cows_amt = "How many cows does your household have?", sheep_amt = "How many sheep does your household have?", goats_amt = "How many goats does your household have?") %>% tbl_summary(include = -record_id, type = list(all_continuous() ~ "continuous2", all_dichotomous() ~ "categorical"), statistic = list(all_continuous2() ~ c("{mean} ({sd})", "{median} [{p25}, {p75}]", "{min}, {max}", "{N_nonmiss}")), missing_text = "Missing") %>% add_stat_label() ``` ```{r polychoricPCA, cache = FALSE, results = 'hide'} # Start by excluding variables from above table where <10% or >90% of observations are "Yes". data_pca <- data_asset %>% select(record_id, refrigerator, radio, motorcycle, car, bank, own_house) %>% data.frame() %>% mutate(across(everything(), ~ na_if(., 999))) %>% na.omit() # Calculate polychoric PCA scores pc3 <- principal(data_pca %>% select(-record_id), cor = "mixed", nfactors = 1) # Join PCA scores to other data data2 <- bind_cols(data_pca, pc3$scores) %>% arrange(PC1, record_id) %>% set_variable_labels(PC1 = "Wealth index (continuous)") ``` ```{r combining-data} # Multiple tables data3 <- data_included %>% group_by(shop_ni) %>% mutate(cluster_id = cur_group_id()) %>% mutate(Interviewees = 1) %>% ungroup() %>% left_join(data2 %>% select(record_id, contains("_bin"), PC1), by = "record_id") %>% mutate(wealth_quintile = as.factor(cut_number(PC1, n = 5, labels = FALSE))) %>% mutate(wealth_binary = as.factor( case_when( is.element(wealth_quintile, c(1, 2)) ~ "0 to 40th", is.element(wealth_quintile, c(3, 4, 5)) ~ ">40.0" ))) %>% mutate(wealth_quintile = fct_recode(wealth_quintile, "0 to 20th" = "1", ">20.0 to 40th" = "2", ">40.0 to 60th" = "3", ">60.0 to 80th" = "4", ">80.0" = "5")) %>% mutate(school_binary = as.factor( case_when( is.element(school_2, c(1, 2)) ~ "None/Primary", is.element(school_2, c(3, 4, 5, 6)) ~ "Secondary/College/University/Polytechnic" ))) %>% set_variable_labels( # timespan = "Time Period", wealth_quintile = "Wealth index (quintile)", wealth_binary = "Wealth index (binary)", outcome_prim = "ACTs sold to malaria test-positive clients", # outcome_prim_sa_al = "ALs sold to malaria test-positive clients", # outcome_prim_sa_pre_exp = "ACTs sold to malaria test-positive clients before expansion", # outcome_prim_sa_post_exp = "ACTs sold to malaria test-positive clients after expansion", # outcome_prim_sa_pre_leaflets = "ACTs sold to malaria test-positive clients before CHW outreach", # outcome_prim_sa_post_leaflets = "ACTs sold to malaria test-positive clients after CHW outreach", outcome_sec_tested = "Suspected malaria cases that receive a mRDT", outcome_sec_tested_no_previous = "Suspected malaria cases that receive a mRDT with no prior test", outcome_sec_tested_expanded = "Met inclusion criteria that receive a mRDT", outcome_sec_adherence = "Malaria tested clients whose treatment adhered to test results", outcome_sec_appropriate = "Suspected malaria cases that are managed appropriately", outcome_sec_appropriate_expanded = "Met inclusion criteria that are managed appropriately", outcome_sec_ACT_untested = "Untested clients taking ACT", # al_expansion = "Post-expansion of approved ALs", # leaflets = "Post-deployment of CHWs", school_2 = "Education level", school_binary = "Education level (binary)") %>% haven::as_factor() %>% mutate(timespan = as.factor(timespan)) %>% set_variable_labels(timespan = "Time Period") # data3 %>% select(timespan) # str(data3$timespan) data3$timespan <- fct_relevel(data3$timespan, c("One", "Two", "Three", "Four")) %>% fct_recode("Feb-May 2022" = "One", "Jun-Aug 2022" = "Two", "Sep-Nov 2022" = "Three", "Dec 2022-Feb 2023" = "Four") # Add suffix to med# variables for purpose of reshaping rename_function <- function(name) { paste0(name, "_type") } prices_long <- data3 %>% rename_with(rename_function, matches("^med\\d+$")) %>% select(record_id, matches("^med\\d+_type"), matches("med\\d+_price"), ) %>% pivot_longer(cols = c(matches("^med\\d+_type"), matches("med\\d+_price")), names_to = c("num", ".value"), names_pattern = "med(\\d+)_(\\w*)") %>% filter(type %in% c("AL (Lonart/CoArtem/Artefan/Lumartem)", "Other ACT (DHAP, DP, Duocotexin, P-alaxin)")) %>% rename(price_line = price) %>% set_variable_labels(price_line = "Price (from line listing)") prices_paid <- prices_long %>% distinct(record_id, .keep_all = TRUE) %>% mutate(across(price_line, ~ na_if(., 9999))) %>% mutate(across(price_line, ~ na_if(., 999))) %>% mutate(across(price_line, ~ na_if(., 99999))) %>% mutate(free_act = case_when( price_line == 0 ~ 1, price_line > 0 ~ 0, )) %>% select(-num) %>% set_variable_labels(free_act = "Free ACT (from line listing)") data4 <- data3 %>% mutate(across(c(ACT, case, test), as.factor)) %>% mutate(ACT = recode_factor(ACT, "1" = "ACT", "0" = "No ACT")) %>% mutate(case = recode_factor(case, "1" = "Suspected case", "0" = "Not suspected case")) %>% mutate(test = recode_factor(test, "Yes" = "mRDT", "No" = "No mRDT")) %>% left_join(prices_paid, by = "record_id") %>% mutate(price_line_pos = ifelse(test_results == "Positive", price_line, NA)) %>% mutate(price_line_neg = ifelse(test_results == "Negative", price_line, NA)) %>% mutate(price_line_untested = ifelse(test == "No mRDT", price_line, NA)) %>% mutate(free_act_pos = ifelse(test_results == "Positive", free_act, NA)) %>% set_variable_labels( price_line_pos = "Price with Positive mRDT (from line listing)", price_line_neg = "Price with Negative mRDT (from line listing)", price_line_untested = "Price with no mRDT (from line listing)", free_act_pos = "Free ACT with Positive mRDT (from line listing)" ) ``` ## Table 1 (Results paper). Characteristics of participants enrolled in TESTsmART by arm ```{r, ft.keepnext = FALSE} data_tab1 <- data3 %>% mutate(arm = haven::as_factor(arm)) %>% mutate(arm = fct_relevel(arm, "Control")) %>% set_variable_labels( school_2 = "Respondent education level", wealth_quintile = "Respondent wealth index (quintile)" ) tbl_interviewees <- data_tab1 %>% tbl_summary(include = c(Interviewees, respondent # , respondent_age, client_gender, respondent_gender ), by = arm, type = list(Interviewees ~ "dichotomous"), value = list(Interviewees ~ 1), statistic = list(Interviewees ~ "{N}"), missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() tbl1 <- data_tab1 %>% tbl_summary(include = c(respondent_age, respondent_gender), by = arm, # type = list(Interviewees ~ "dichotomous"), # statistic = list(Interviewees ~ "{N}"), missing = "always", missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() tbl_child <- data_tab1 %>% filter(respondent == "Guardian of child client") %>% tbl_summary(include = c(child_age, child_gender), by = arm, type = list(child_age ~ "continuous2"), statistic = list(all_continuous2() ~ c("{mean} ({sd})", "{median} [{p25}, {p75}]", "{min}, {max}")), missing = "always", missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() tbl_covariates <- data_tab1 %>% tbl_summary(include = c(school_2, wealth_quintile ), by = arm, statistic = list(all_continuous2() ~ c("{mean} ({sd})", "{median} [{p25}, {p75}]", "{min}, {max}")), missing = "always", missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() tbl_period <- data_tab1 %>% tbl_summary(include = c(timespan), by = arm, statistic = list(all_continuous2() ~ c("{mean} ({sd})", "{median} [{p25}, {p75}]", "{min}, {max}")), missing = "always", missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() tbl1_combined <- tbl_stack(list(tbl_interviewees, tbl1, tbl_covariates, tbl_child, tbl_period)) tbl1_combined %>% as_flex_table() %>% footnote(i = 1, j = 1, value = as_paragraph("Includes only interviewees who consented and met all inclusion criteria."), ref_symbols = c("1"), part = "body") %>% flextable::set_caption('Characteristics of participants enrolled in TESTsmART by arm', autonum = officer::run_autonum(seq_id = "tab", bkm = "Table1")) %>% bold(bold = TRUE, part = "header") %>% autofit() %>% set_table_properties(layout = "autofit") %>% font(fontname = "Calibri", part = "all") %>% fontsize(size = 10, part = "all") %>% theme_zebra() %>% align(j = -1, align = "center", part = "all") ``` # Summary of Numerators and Denominators for Primary and Secondary Outcomes | Outcome | Numerator | Denominator | |:--------|:----------|:------------| | Primary - ACT purchased by test positive | tested positive and purchased ACT | purchased ACT | | Secondary 1 - Use of mRDT | mRDT tested | suspected malaria | | Secondary 2 - Adherence to mRDT result | (mRDT+ and ACT) OR (mRDT- and no AM) | tested | | Secondary 3 - Appropriate case management | (mRDT+ and ACT) OR (mRDT- and no AM) | suspected malaria | | Secondary 4 - ACT use among untested | purchased ACT and untested | untested | ```{r Calculate-proportions-and-sizes} get_size <- function(outcome){ data3 %>% group_by(arm, shop_ni) %>% # filter(shop_ni == shop) %>% filter(!is.na(.data[[outcome]])) %>% summarize(cluster_size = n(), numerator = sum(.data[[outcome]]), prop = mean(.data[[outcome]])) %>% ungroup() } outcomes <- data3 %>% select(all_of(contains("outcome_"))) %>% select(-outcome_sec_adherence_else) %>% names() outcome_sizes <- data.frame() for(outcome in outcomes){ new_df <- data.frame(outcome = as.factor(outcome)) %>% bind_cols(get_size(outcome)) outcome_sizes <- bind_rows(outcome_sizes, new_df) } outcome_sizes <- outcome_sizes %>% mutate(outcome_full = dplyr::recode(outcome, "outcome_prim" = "Primary", "outcome_prim_sa_al" = "Primary - restricted to AL", "outcome_prim_sa_pre_leaflets" = "Primary - before leaflet distribution", "outcome_prim_sa_post_leaflets" = "Primary - after leaflet distribution", "outcome_sec_tested" = "Tested (denom = suspected)", "outcome_sec_tested_no_previous" = "Tested - no prior test", "outcome_sec_tested_expanded" = "Tested - expanded denom", "outcome_sec_adherence" = "Adherence (denom = tested)", "outcome_sec_appropriate" = "Approp manage (denom = suspected)", "outcome_sec_appropriate_expanded" = "Approp manage - expanded denom", "outcome_sec_ACT_untested" = "ACT use untested (denom = untested)" )) ``` \newpage # GEE Analysis - Full table for each outcome For each outcome, we estimated risk ratios using the modified Poisson approach with log link. To estimate risk differences, we first tried the modified Poisson approach with identity link. In cases where that model failed to converge, we used the same model, but with the binary versions of education level and wealth index rather than quintiles. In cases where that model also failed to converge, we used a binomial distribution with identity link, with education level and wealth index divided into quintiles. In cases where that model also failed to converge, we used a binomial distribution with identity link, with the binary versions of education level and wealth index. In cases where that model also failed to converge, we used a normal distribution with identity link, with education level and wealth index divided into quintiles. The tables below indicate the model used for each outcome, determined by convergence results and the criteria outlined in the previous paragraph. ```{r GEE-Prim, results = 'hide', cache = F} reset_gtsummary_theme() theme_gtsummary_journal(journal = "jama") theme_gtsummary_compact() # Get n/N (%) for each arm prop_prim <- data3 %>% select(outcome_prim, arm) %>% na.omit() %>% group_by(arm) %>% summarize(n = sum(outcome_prim), N = n()) %>% mutate(prop_num = n / N) %>% mutate(prop = paste0(n, "/", N, " (", signif(100 * prop_num, digits = 2), "%)")) # Data must be sorted by cluster id for gee, geeglm, and geesmv functions data_gee <- data3 %>% arrange(shop_ni) %>% mutate(period_quadratic = period^2) %>% mutate(period_cubic = period^3) %>% mutate(arm = as.factor(arm)) %>% mutate(arm = fct_relevel(arm, "Control")) # levels(data_gee$arm) # Independent variables for minimally adjusted model min_adjusted_vector <- c("arm", "period", "period_quadratic", "period_cubic", "quadrant") # Independent variables for fully adjusted model fully_adjusted_vector <- c("arm", "period", "period_quadratic", "period_cubic", "client_gender", "client_age", "school_2", "wealth_quintile", "quadrant") # Independent variables for fully adjusted model (wealth as binary when quintiles won't converge) fully_adjusted_2_vector <- c("arm", "period", "period_quadratic", "period_cubic", "client_gender", "client_age", "school_binary", "wealth_binary", "quadrant") # Look at number of observations with missing values for covariates in fully adjusted model data_gee %>% select(all_of(fully_adjusted_vector)) %>% map(~sum(is.na(.))) # Create strings to use in model formulas min_adjusted <- paste0(min_adjusted_vector, collapse = " + ") fully_adjusted <- paste0(fully_adjusted_vector, collapse = " + ") fully_adjusted_2 <- paste0(fully_adjusted_2_vector, collapse = " + ") # Create datasets with no missing values in outcome variable or covariates data_gee_prim <- data_gee %>% dplyr::select(outcome_prim, cluster_id, all_of(min_adjusted_vector)) %>% drop_na() %>% mutate(arm = dplyr::recode(arm, "CDPD" = paste("CDPD, ", prop_prim[prop_prim$arm == "CDPD", ]$prop), "Control" = paste("Control, ", prop_prim[prop_prim$arm == "Control", ]$prop), "CD" = paste("CD, ", prop_prim[prop_prim$arm == "CD", ]$prop))) %>% set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>% as.data.frame() data_gee_prim_fully <- data_gee %>% dplyr::select(outcome_prim, cluster_id, all_of(fully_adjusted_vector)) %>% drop_na() %>% mutate(arm = dplyr::recode(arm, "CDPD" = paste("CDPD, ", prop_prim[prop_prim$arm == "CDPD", ]$prop), "Control" = paste("Control, ", prop_prim[prop_prim$arm == "Control", ]$prop), "CD" = paste("CD, ", prop_prim[prop_prim$arm == "CD", ]$prop))) %>% set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>% as.data.frame() # Log link # Minimally adjusted fit_geepack_prim <- geeglm(as.formula(paste0("outcome_prim ~ ", min_adjusted)), id = cluster_id, data = data_gee_prim, family = poisson, corstr = "independence") # Fully adjusted fit_geepack_prim_fully <- geeglm(as.formula(paste0("outcome_prim ~ ", fully_adjusted)), id = cluster_id, data = data_gee_prim_fully, family = poisson, corstr = "independence") # Identity link # Minimally adjusted (doesn't converge using Poisson distribution) # fit_geepack_prim_id <- geeglm(as.formula(paste0("outcome_prim ~ ", min_adjusted)), # id = cluster_id, # data = data_gee_prim, # family = poisson(link = "identity"), # corstr = "independence") # fit_geepack_prim_id <- geeglm(as.formula(paste0("outcome_prim ~ ", min_adjusted)), # id = cluster_id, # data = data_gee_prim, # family = binomial(link = "identity"), # corstr = "independence") fit_geepack_prim_id <- geeglm(as.formula(paste0("outcome_prim ~ ", min_adjusted)), id = cluster_id, data = data_gee_prim, family = gaussian(link = "identity"), corstr = "independence") # Fully adjusted (doesn't converge using Poisson distribution) # fit_geepack_prim_fully_id <- geeglm(as.formula(paste0("outcome_prim ~ ", fully_adjusted)), # id = cluster_id, # data = data_gee_prim_fully, # family = poisson(link = "identity"), # corstr = "independence") # fit_geepack_prim_fully_id <- geeglm(as.formula(paste0("outcome_prim ~ ", fully_adjusted_2)), # id = cluster_id, # data = data_gee_prim_fully, # family = poisson(link = "identity"), # corstr = "independence") # fit_geepack_prim_fully_id <- geeglm(as.formula(paste0("outcome_prim ~ ", fully_adjusted_2)), # id = cluster_id, # data = data_gee_prim_fully, # family = binomial(link = "identity"), # corstr = "independence") fit_geepack_prim_fully_id <- geeglm(as.formula(paste0("outcome_prim ~ ", fully_adjusted)), id = cluster_id, data = data_gee_prim_fully, family = gaussian(link = "identity"), corstr = "independence") # str(summary_prim) # prop_prim[prop_prim$arm == "CDPD", ]$prop ``` ```{r Models-used} models_used <- outcome_sizes %>% distinct(outcome_full) %>% filter(!str_detect(outcome_full, "after")) %>% mutate(outcome_full = str_replace(outcome_full, "before", "before/after")) %>% mutate(outcome_full = str_remove(outcome_full, "\\(.*")) %>% mutate(RR_min = "Poisson", RR_fully = c("Poisson", "Poisson", "Poisson", "Poisson (wealth and school binary)", "Poisson", "Poisson (wealth and school binary)", "Poisson", "Poisson", "Poisson", "Poisson" ), RD_min = c("Normal", "Normal", "Normal", "Poisson", "Poisson", "Poisson", "Poisson", "Binomial", "Binomial", "Poisson" ), RD_fully = c("Normal", "Normal", "Normal", "Binomial (wealth and school binary)", "Normal", "Binomial (wealth and school binary)", "Normal", "Normal", "Normal", "Poisson" )) models_used %>% # select(-number) %>% flextable() %>% set_header_labels(values = list( outcome_full = "Outcome", RR_min = "Risk Ratio Minimally Adjusted", RR_fully = "Risk Ratio Fully Adjusted", RD_min = "Risk Difference Minimally Adjusted", RD_fully = "Risk Difference Fully Adjusted")) %>% flextable::set_caption('Distributions used in GEE models for each outcome.', autonum = officer::run_autonum(seq_id = "tab", bkm = "ModelsUsed")) %>% bold(bold = TRUE, part = "header") %>% autofit() %>% set_table_properties(layout = "autofit") %>% font(fontname = "Calibri", part = "all") %>% fontsize(size = 10, part = "all") ```  
```{r Model-vars} vars_1 <- data.frame(min = min_adjusted_vector) %>% mutate(number = row_number()) vars_2 <- data.frame(full = fully_adjusted_vector) %>% mutate(number = row_number()) vars_3 <- data.frame(full_bin = fully_adjusted_2_vector) %>% mutate(number = row_number()) vars_per_model <- full_join(vars_1, vars_2, by = "number") %>% full_join(vars_3, by = "number") %>% set_variable_labels(min = "Minimally Adjusted", full = "Fully Adjusted", full_bin = "Fully Adjusted (binary)") %>% mutate(across(everything(), str_replace, "arm", "Arm"), across(everything(), str_replace, "period$", "Time period (month)"), across(everything(), str_replace, "period_quadratic", "Time period squared"), across(everything(), str_replace, "period_cubic", "Time period cubed"), across(everything(), str_replace, "quadrant", "Quadrant"), across(everything(), str_replace, "client_gender", "Client gender"), across(everything(), str_replace, "client_age", "Client age"), across(everything(), str_replace, "school_2$", "Level of schooling (quintile)"), across(everything(), str_replace, "wealth_quintile", "Wealth (quintile)"), across(everything(), str_replace, "school_binary", "Level of schooling (binary)"), across(everything(), str_replace, "wealth_binary", "Wealth (binary)")) # expss::use_labels( # data = vars_per_model # # %>% # # select(-number) %>% # # data.frame() # , # expr = { # flextable(..data) %>% # flextable::set_caption('Variables included in each model.', # autonum = officer::run_autonum(seq_id = "tab", # bkm = "ModelVars")) # } # ) vars_per_model %>% select(-number) %>% flextable() %>% set_header_labels(values = list( min = "Minimally Adjusted", full = "Fully Adjusted", full_bin = "Fully Adjusted (binary)" )) %>% flextable::set_caption('Variables included in each model.', autonum = officer::run_autonum(seq_id = "tab", bkm = "ModelVars")) %>% footnote(i = 1, j = 3, value = as_paragraph( c("used in cases where fully adjusted model did not converge otherwise")), part = "header", ref_symbols = c("*")) %>% bold(bold = TRUE, part = "header") %>% autofit() %>% set_table_properties(layout = "autofit") %>% font(fontname = "Calibri", part = "all") %>% fontsize(size = 10, part = "all") ``` ```{r Define-functions-CI-t-dist} # Functions to calculate t-based confidence interval and p-value # for calculating degrees of freedom penalty, only include cluster-level covariates and intercept (p = 6 for most models: "intercept", "armCD", "armControl", "period", "period_quadratic", "period_cubic", "quadrant"; set p in function call for models where p != 7) my_tidy_rd_p6 <- function(x, exponentiate = FALSE, conf.level = 0.95, p = 6, ...) { covariates <- broom::tidy(x, exponentiate = exponentiate, conf.int = FALSE) summary_1 <- broom::glance(x, exponentiate = exponentiate, conf.int = FALSE) I <- summary_1$n.clusters covariates %>% # calculate the confidence intervals, and save them in a tibble mutate(conf.low = estimate - qt(0.975, df = I - p) * std.error, conf.high = estimate + qt(0.975, df = I - p) * std.error) %>% # calculate t-based p-value mutate(p.value = 2 * pt(abs(estimate) / std.error, df = I - p, lower.tail = FALSE)) } my_tidy_rr_p6 <- function(x, exponentiate = TRUE, conf.level = 0.95, p = 6, ...) { covariates <- broom::tidy(x, exponentiate = exponentiate, conf.int = FALSE) summary_1 <- broom::glance(x, exponentiate = exponentiate, conf.int = FALSE) I <- summary_1$n.clusters covariates %>% # calculate t-based confidence intervals mutate(conf.low = estimate * exp(-qt(0.975, df = I - p) * std.error), conf.high = estimate * exp(qt(0.975, df = I - p) * std.error)) %>% # calculate t-based p-value mutate(p.value = 2 * pt(abs(log(estimate)) / std.error, df = I - p, lower.tail = FALSE)) } # Revise for Nigeria SAs # for calculating degrees of freedom penalty, only include cluster-level covariates and intercept (p = 8 for AL expansion and leaflet distribution models: "intercept", "armCDPD", "period", "period_quadratic", "period_cubic", "quadrant", "leaflets", "leaflet:arm" interaction term) my_tidy_rd_p8 <- function(x, exponentiate = FALSE, conf.level = 0.95, p = 8, ...) { covariates <- broom::tidy(x, exponentiate = exponentiate, conf.int = FALSE) summary_1 <- broom::glance(x, exponentiate = exponentiate, conf.int = FALSE) I <- summary_1$n.clusters covariates %>% # calculate the confidence intervals, and save them in a tibble mutate(conf.low = estimate - qt(0.975, df = I - p) * std.error, conf.high = estimate + qt(0.975, df = I - p) * std.error) %>% # calculate t-based p-value mutate(p.value = 2 * pt(abs(estimate) / std.error, df = I - p, lower.tail = FALSE)) } my_tidy_rr_p8 <- function(x, exponentiate = TRUE, conf.level = 0.95, p = 8, ...) { covariates <- broom::tidy(x, exponentiate = exponentiate, conf.int = FALSE) summary_1 <- broom::glance(x, exponentiate = exponentiate, conf.int = FALSE) I <- summary_1$n.clusters covariates %>% # calculate t-based confidence intervals mutate(conf.low = estimate * exp(-qt(0.975, df = I - p) * std.error), conf.high = estimate * exp(qt(0.975, df = I - p) * std.error)) %>% # calculate t-based p-value mutate(p.value = 2 * pt(abs(log(estimate)) / std.error, df = I - p, lower.tail = FALSE)) } ``` ```{r Define-function-PoisBCV} ############################################# # poisson GEE # Bias-corrected Variance ############################################# ############################################# # Input # y: N by 1 vector of outcomes # X: N by p design matrix (including intercept) # beta: p by 1 mean model parameter estimates # alpha: 1 by 1 correlation parameter estimate # phi: 1 by 1 dispersion parameter estimate # id: N by 1 cluster indicator # link: Link function - 'logit', 'log', 'identity' ############################################# PoisBCV=function(y,X,beta,alpha,phi,id,link){ # Creates two vectors that have the start and end points for each cluster BEGINEND=function(n){ last=cumsum(n) first=last-n+1 return(cbind(first,last)) } # Calculate the inverse for symmetric and positive finite matrix FINDINV=function(A){ require(MASS) AHALF=chol(A) GINV=ginv(AHALF) AINV=tcrossprod(GINV) return(AINV) } # Score function SCORE=function(beta,alpha,phi,y,X,n,p){ U=rep(0,p) UUtran=Ustar=matrix(0,p,p) locx=BEGINEND(n) for(i in 1:length(n)){ X_c=X[locx[i,1]:locx[i,2],,drop=FALSE] y_c=y[locx[i,1]:locx[i,2]] U_c=rep(0,p) Ustar_c=matrix(0,p,p) if (link=='log') { mu_c=exp(c(X_c%*%beta)) C=X_c*mu_c } else if (link=='identity') { mu_c=c(X_c%*%beta) C=X_c } A=y_c-mu_c INVR=diag(1/(1-alpha),n[i])-matrix(alpha/((1-alpha)*(1-alpha+n[i]*alpha)),n[i],n[i]) INVB=diag(1/sqrt(mu_c),n[i]) %*% INVR %*% diag(1/sqrt(mu_c),n[i])/phi U_c=t(C)%*%INVB%*%A UUtran_c=tcrossprod(U_c) Ustar_c=t(C)%*%INVB%*%C U=U+U_c UUtran=UUtran+UUtran_c Ustar=Ustar+Ustar_c } return(list(U=U,UUtran=UUtran,Ustar=Ustar)) } # Compute (A - mm`)^{-1}c without performing the inverse directly INVBIG=function(ainvc,ainvm,m,c,start,end){ for(i in start:end){ b=ainvm[,i] bt=t(b) btm=bt%*%m btmi=btm[,i] gam=1-btmi bg=b/gam ainvc=ainvc+bg%*%(bt%*%c) if(i% tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_min <- fit_geepack_prim_id_smv %>% tbl_regression(tidy_fun = my_tidy_rd_p6) risk_ratio_fully <- fit_geepack_prim_fully_smv %>% tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_fully <- fit_geepack_prim_fully_id_smv %>% tbl_regression(tidy_fun = my_tidy_rd_p6) # Combine tables tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) ft_prim <- tbl_merge(tbls = list(tbl_ratio, tbl_diff), tab_spanner = c("**Risk Ratio**", "**Risk Difference**")) %>% modify_header( estimate_1_1 = "**Minimally Adjusted RR** **(95% CI)**", estimate_2_1 = "**Fully Adjusted RR** **(95% CI)**", estimate_1_2 = "**Minimally Adjusted RD** **(95% CI)**", estimate_2_2 = "**Fully Adjusted RD** **(95% CI)**" ) %>% modify_footnote( update = c(estimate_1_1, estimate_2_1, estimate_1_2, estimate_2_2) ~ "RR = Risk Ratio, RD = Risk Difference, CI = Confidence Interval", abbreviation = TRUE ) %>% as_flex_table() %>% flextable::set_caption( as_paragraph( as_chunk(paste0('GEE analysis with KC Small Sample Variance Correction, \nN (minimally adjusted) = ', nrow(data_gee_prim), ', N (fully adjusted) = ', nrow(data_gee_prim_fully)), props = fp_text_default( font.family = "Cambria", font.size = 12, italic = TRUE)) ), autonum = officer::run_autonum(seq_id = "tab", bkm = "PrimGEEkc")) %>% bold(bold = TRUE, part = "header") %>% autofit() %>% set_table_properties(layout = "autofit") %>% font(fontname = "Calibri", part = "all") %>% fontsize(size = 10, part = "all") # ft_prim$body$content$content$data[, 2:5] <- lapply(ft_prim$body$content$content$data[, 2:5], # function(x) {mutate(x, txt = str_replace( # txt, "\\(", "\n("))}) ft_prim %>% theme_zebra() %>% align(j = -1, align = "center", part = "all") ``` ```{r GEE-Prim-subset, cache = FALSE} # Create table for each model risk_ratio_min <- fit_geepack_prim_smv %>% tbl_regression(include = arm, label = arm ~ "Primary outcome - ACTs sold to malaria test-positive clients", tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_min <- fit_geepack_prim_id_smv %>% tbl_regression(include = arm, label = arm ~ "Primary outcome - ACTs sold to malaria test-positive clients", tidy_fun = my_tidy_rd_p6) risk_ratio_fully <- fit_geepack_prim_fully_smv %>% tbl_regression(include = arm, label = arm ~ "Primary outcome - ACTs sold to malaria test-positive clients", tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_fully <- fit_geepack_prim_fully_id_smv %>% tbl_regression(include = arm, label = arm ~ "Primary outcome - ACTs sold to malaria test-positive clients", tidy_fun = my_tidy_rd_p6) # Combine tables tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) gt_prim <- tbl_merge(tbls = list(tbl_ratio, tbl_diff), tab_spanner = c("**Risk Ratio**", "**Risk Difference**")) ``` \newpage ## Secondary Outcome 1 - Use of mRDT ```{r GEE-Sec-1, results = 'hide', cache = FALSE} # Get n/N (%) for each arm prop_sec_tested <- data3 %>% select(outcome_sec_tested, arm) %>% na.omit() %>% group_by(arm) %>% summarize(n = sum(outcome_sec_tested), N = n()) %>% mutate(prop_num = n / N) %>% mutate(prop = paste0(n, "/", N, " (", signif(100 * prop_num, digits = 2), "%)")) # Create datasets with no missing values in outcome variable or covariates data_gee_sec_tested <- data_gee %>% dplyr::select(outcome_sec_tested, cluster_id, all_of(min_adjusted_vector)) %>% drop_na() %>% mutate(arm = dplyr::recode(arm, "CDPD" = paste("CDPD, ", prop_sec_tested[prop_sec_tested$arm == "CDPD", ]$prop), "Control" = paste("Control, ", prop_sec_tested[prop_sec_tested$arm == "Control", ]$prop), "CD" = paste("CD, ", prop_sec_tested[prop_sec_tested$arm == "CD", ]$prop))) %>% set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>% as.data.frame() data_gee_sec_tested_fully <- data_gee %>% dplyr::select(outcome_sec_tested, cluster_id, all_of(fully_adjusted_2_vector)) %>% drop_na() %>% mutate(arm = dplyr::recode(arm, "CDPD" = paste("CDPD, ", prop_sec_tested[prop_sec_tested$arm == "CDPD", ]$prop), "Control" = paste("Control, ", prop_sec_tested[prop_sec_tested$arm == "Control", ]$prop), "CD" = paste("CD, ", prop_sec_tested[prop_sec_tested$arm == "CD", ]$prop))) %>% set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>% as.data.frame() # Log link # Minimally adjusted fit_geepack_sec_tested <- geeglm(as.formula(paste0("outcome_sec_tested ~ ", min_adjusted)), id = cluster_id, data = data_gee_sec_tested, family = poisson, corstr = "independence") # summary(fit_geepack_sec_tested) # Fully adjusted fit_geepack_sec_tested_fully <- geeglm(as.formula(paste0("outcome_sec_tested ~ ", fully_adjusted_2)), id = cluster_id, data = data_gee_sec_tested_fully, family = poisson, corstr = "independence") # Identity link # Minimally adjusted fit_geepack_sec_tested_id <- geeglm(as.formula(paste0("outcome_sec_tested ~ ", min_adjusted)), id = cluster_id, data = data_gee_sec_tested, family = poisson(link = "identity"), corstr = "independence") # Fully adjusted (doesn't converge with wealth and school_2 having >2 categories) # fit_geepack_sec_tested_fully_id <- geeglm(as.formula(paste0("outcome_sec_tested ~ ", fully_adjusted)), # id = cluster_id, # data = data_gee_sec_tested_fully, # family = poisson(link = "identity"), # corstr = "independence") # fit_geepack_sec_tested_fully_id <- geeglm(as.formula(paste0("outcome_sec_tested ~ ", fully_adjusted_2)), # id = cluster_id, # data = data_gee_sec_tested_fully, # family = poisson(link = "identity"), # corstr = "independence") fit_geepack_sec_tested_fully_id <- geeglm(as.formula(paste0("outcome_sec_tested ~ ", fully_adjusted_2)), id = cluster_id, data = data_gee_sec_tested_fully, family = binomial(link = "identity"), corstr = "independence") ``` ```{r GEE-Sec-1-kc, cache = FALSE, results = 'hide'} # Small sample corrections # Log link # Minimally adjusted fit_PoisBCV_sec_tested <- PoisBCV( y = data_gee_sec_tested$outcome_sec_tested, X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_tested), beta <- fit_geepack_sec_tested_id$coefficients, alpha <- 0, # Independent working correlation phi <- 1, id <- data_gee_sec_tested$cluster_id, link <- "log" ) fit_geepack_sec_tested_smv <- fit_geepack_sec_tested # Replace robust variances with small sample corrected variances (KC) diag(fit_geepack_sec_tested_smv$geese$vbeta) <- (fit_PoisBCV_sec_tested$outbeta[, "BC1-stderr"])^2 # Fully adjusted # fit_kc_sec_tested_fully <- GEE.var.kc(as.formula(paste0("outcome_sec_tested ~ ", fully_adjusted)), # id = "cluster_id", # data = data_gee_sec_tested_fully, # family = poisson, corstr = "independence") fit_PoisBCV_sec_tested_fully <- PoisBCV( y = data_gee_sec_tested_fully$outcome_sec_tested, X <- model.matrix(as.formula(paste0("~ ", fully_adjusted_2)), data_gee_sec_tested_fully), beta <- fit_geepack_sec_tested_fully_id$coefficients, alpha <- 0, # Independent working correlation phi <- 1, id <- data_gee_sec_tested_fully$cluster_id, link <- "log" ) fit_geepack_sec_tested_fully_smv <- fit_geepack_sec_tested_fully # Replace robust variances with small sample corrected variances (KC) diag(fit_geepack_sec_tested_fully_smv$geese$vbeta) <- (fit_PoisBCV_sec_tested_fully$outbeta[, "BC1-stderr"])^2 # Identity link # Minimally adjusted fit_PoisBCV_sec_tested_id <- PoisBCV( y = data_gee_sec_tested$outcome_sec_tested, X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_tested), beta <- fit_geepack_sec_tested_id$coefficients, alpha <- 0, # Independent working correlation phi <- 1, id <- data_gee_sec_tested$cluster_id, link <- "identity" ) fit_geepack_sec_tested_id_smv <- fit_geepack_sec_tested_id # Replace robust variances with small sample corrected variances (KC) diag(fit_geepack_sec_tested_id_smv$geese$vbeta) <- (fit_PoisBCV_sec_tested_id$outbeta[, "BC1-stderr"])^2 # # Fully adjusted # fit_PoisBCV_sec_tested_fully_id <- PoisBCV( # y = data_gee_sec_tested_fully$outcome_sec_tested, # X <- model.matrix(as.formula(paste0("~ ", fully_adjusted)), data_gee_sec_tested_fully), # beta <- fit_geepack_sec_tested_fully_id$coefficients, # alpha <- 0, # Independent working correlation # phi <- 1, # id <- data_gee_sec_tested_fully$cluster_id, # link <- "identity" # ) # # fit_geepack_sec_tested_fully_id_smv <- fit_geepack_sec_tested_fully_id # # # Replace robust variances with small sample corrected variances (KC) # diag(fit_geepack_sec_tested_fully_id_smv$geese$vbeta) <- (fit_PoisBCV_sec_tested_fully_id$outbeta[, "BC1-stderr"])^2 # Fully adjusted (use binomial family with identity link because Poisson with identity did not converge) fit_BinBCV_sec_tested_fully_id <- BinBCV( y = data_gee_sec_tested_fully$outcome_sec_tested, X <- model.matrix(as.formula(paste0("~ ", fully_adjusted_2)), data_gee_sec_tested_fully), beta <- fit_geepack_sec_tested_fully_id$coefficients, alpha <- 0, # Independent working correlation phi <- 1, id <- data_gee_sec_tested_fully$cluster_id, link <- "identity" ) fit_geepack_sec_tested_fully_id_smv <- fit_geepack_sec_tested_fully_id # Replace robust variances with small sample corrected variances (KC) diag(fit_geepack_sec_tested_fully_id_smv$geese$vbeta) <- (fit_BinBCV_sec_tested_fully_id$outbeta[, "BC1-stderr"])^2 ``` ```{r GEE-Sec-1-kc-out, cache = FALSE, ft.keepnext = FALSE} risk_ratio_min <- fit_geepack_sec_tested_smv %>% tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_min <- fit_geepack_sec_tested_id_smv %>% tbl_regression(tidy_fun = my_tidy_rd_p6) risk_ratio_fully <- fit_geepack_sec_tested_fully_smv %>% tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_fully <- fit_geepack_sec_tested_fully_id_smv %>% tbl_regression(tidy_fun = my_tidy_rd_p6) tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) ft_prim <- tbl_merge(tbls = list(tbl_ratio, tbl_diff), tab_spanner = c("**Risk Ratio**", "**Risk Difference**")) %>% modify_header( estimate_1_1 = "**Minimally Adjusted RR** **(95% CI)**", estimate_2_1 = "**Fully Adjusted RR** **(95% CI)**", estimate_1_2 = "**Minimally Adjusted RD** **(95% CI)**", estimate_2_2 = "**Fully Adjusted RD** **(95% CI)**" ) %>% modify_footnote( update = c(estimate_1_1, estimate_2_1, estimate_1_2, estimate_2_2) ~ "RR = Risk Ratio, RD = Risk Difference, CI = Confidence Interval", abbreviation = TRUE ) %>% as_flex_table() %>% flextable::set_caption( as_paragraph( as_chunk(paste0('GEE analysis with KC Small Sample Variance Correction, \nN (minimally adjusted) = ', nrow(data_gee_sec_tested), ', N (fully adjusted) = ', nrow(data_gee_sec_tested_fully)), props = fp_text_default( font.family = "Cambria", font.size = 12, italic = TRUE)) ), autonum = officer::run_autonum(seq_id = "tab", bkm = "PrimGEEsec1")) %>% bold(bold = TRUE, part = "header") %>% autofit() %>% set_table_properties(layout = "autofit") %>% font(fontname = "Calibri", part = "all") %>% fontsize(size = 10, part = "all") # ft_prim$body$content$content$data[, 2:5] <- lapply(ft_prim$body$content$content$data[, 2:5], # function(x) {mutate(x, txt = str_replace( # txt, "\\(", "\n("))}) ft_prim %>% theme_zebra() %>% align(j = -1, align = "center", part = "all") ``` ```{r GEE-Sec-1-subset, cache = FALSE} risk_ratio_min <- fit_geepack_sec_tested_smv %>% tbl_regression(include = arm, label = arm ~ "Suspected malaria cases that receive a mRDT", tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_min <- fit_geepack_sec_tested_id_smv %>% tbl_regression(include = arm, label = arm ~ "Suspected malaria cases that receive a mRDT", tidy_fun = my_tidy_rd_p6) risk_ratio_fully <- fit_geepack_sec_tested_fully_smv %>% tbl_regression(include = arm, label = arm ~ "Suspected malaria cases that receive a mRDT", tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_fully <- fit_geepack_sec_tested_fully_id_smv %>% tbl_regression(include = arm, label = arm ~ "Suspected malaria cases that receive a mRDT", tidy_fun = my_tidy_rd_p6) tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) gt_sec1 <- tbl_merge(tbls = list(tbl_ratio, tbl_diff), tab_spanner = c("**Risk Ratio**", "**Risk Difference**")) ``` \newpage ## Secondary Outcome 2 - Adherence to mRDT result ```{r GEE-Sec-2, results = 'hide'} # Get n/N (%) for each arm prop_sec_adherence <- data3 %>% select(outcome_sec_adherence, arm) %>% na.omit() %>% group_by(arm) %>% summarize(n = sum(outcome_sec_adherence), N = n()) %>% mutate(prop_num = n / N) %>% mutate(prop = paste0(n, "/", N, " (", signif(100 * prop_num, digits = 2), "%)")) # Create datasets with no missing values in outcome variable or covariates data_gee_sec_adherence <- data_gee %>% dplyr::select(outcome_sec_adherence, cluster_id, all_of(min_adjusted_vector)) %>% drop_na() %>% mutate(arm = dplyr::recode(arm, "CDPD" = paste("CDPD, ", prop_sec_adherence[prop_sec_adherence$arm == "CDPD", ]$prop), "Control" = paste("Control, ", prop_sec_adherence[prop_sec_adherence$arm == "Control", ]$prop), "CD" = paste("CD, ", prop_sec_adherence[prop_sec_adherence$arm == "CD", ]$prop))) %>% set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>% as.data.frame() data_gee_sec_adherence_fully <- data_gee %>% dplyr::select(outcome_sec_adherence, cluster_id, all_of(fully_adjusted_vector)) %>% drop_na() %>% mutate(arm = dplyr::recode(arm, "CDPD" = paste("CDPD, ", prop_sec_adherence[prop_sec_adherence$arm == "CDPD", ]$prop), "Control" = paste("Control, ", prop_sec_adherence[prop_sec_adherence$arm == "Control", ]$prop), "CD" = paste("CD, ", prop_sec_adherence[prop_sec_adherence$arm == "CD", ]$prop))) %>% set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>% as.data.frame() # Log link # Minimally adjusted fit_geepack_sec_adherence <- geeglm(as.formula(paste0("outcome_sec_adherence ~ ", min_adjusted)), id = cluster_id, data = data_gee_sec_adherence, family = poisson, corstr = "independence") # Fully adjusted fit_geepack_sec_adherence_fully <- geeglm(as.formula(paste0("outcome_sec_adherence ~ ", fully_adjusted)), id = cluster_id, data = data_gee_sec_adherence_fully, family = poisson, corstr = "independence") # Identity link # Minimally adjusted fit_geepack_sec_adherence_id <- geeglm(as.formula(paste0("outcome_sec_adherence ~ ", min_adjusted)), id = cluster_id, data = data_gee_sec_adherence, family = poisson(link = "identity"), corstr = "independence") # Fully adjusted (Does not converge) # fit_geepack_sec_adherence_fully_id <- geeglm(as.formula(paste0("outcome_sec_adherence ~ ", fully_adjusted)), # id = cluster_id, # data = data_gee_sec_adherence_fully, # family = poisson(link = "identity"), # corstr = "independence") # fit_geepack_sec_adherence_fully_id <- geeglm(as.formula(paste0("outcome_sec_adherence ~ ", fully_adjusted)), # id = cluster_id, # data = data_gee_sec_adherence_fully, # family = binomial(link = "identity"), # corstr = "independence") fit_geepack_sec_adherence_fully_id <- geeglm(as.formula(paste0("outcome_sec_adherence ~ ", fully_adjusted)), id = cluster_id, data = data_gee_sec_adherence_fully, family = gaussian(link = "identity"), corstr = "independence") ``` ```{r GEE-Sec-2-kc, cache = FALSE, results = 'hide'} # Small sample corrections # Log link # Minimally adjusted # fit_kc_sec_adherence <- GEE.var.kc(as.formula(paste0("outcome_sec_adherence ~ ", min_adjusted)), # id = "cluster_id", # data = data_gee_sec_adherence, # family = poisson, corstr = "independence") fit_PoisBCV_sec_adherence <- PoisBCV( y = data_gee_sec_adherence$outcome_sec_adherence, X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_adherence), beta <- fit_geepack_sec_adherence_id$coefficients, alpha <- 0, # Independent working correlation phi <- 1, id <- data_gee_sec_adherence$cluster_id, link <- "log" ) fit_geepack_sec_adherence_smv <- fit_geepack_sec_adherence # Replace robust variances with small sample corrected variances (KC) diag(fit_geepack_sec_adherence_smv$geese$vbeta) <- (fit_PoisBCV_sec_adherence$outbeta[, "BC1-stderr"])^2 # Fully adjusted # fit_kc_sec_adherence_fully <- GEE.var.kc(as.formula(paste0("outcome_sec_adherence ~ ", fully_adjusted)), # id = "cluster_id", # data = data_gee_sec_adherence_fully, # family = poisson, corstr = "independence") fit_PoisBCV_sec_adherence_fully <- PoisBCV( y = data_gee_sec_adherence_fully$outcome_sec_adherence, X <- model.matrix(as.formula(paste0("~ ", fully_adjusted)), data_gee_sec_adherence_fully), beta <- fit_geepack_sec_adherence_fully_id$coefficients, alpha <- 0, # Independent working correlation phi <- 1, id <- data_gee_sec_adherence_fully$cluster_id, link <- "log" ) fit_geepack_sec_adherence_fully_smv <- fit_geepack_sec_adherence_fully # Replace robust variances with small sample corrected variances (KC) diag(fit_geepack_sec_adherence_fully_smv$geese$vbeta) <- (fit_PoisBCV_sec_adherence_fully$outbeta[, "BC1-stderr"])^2 # Identity link # Minimally adjusted fit_PoisBCV_sec_adherence_id <- PoisBCV( y = data_gee_sec_adherence$outcome_sec_adherence, X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_adherence), beta <- fit_geepack_sec_adherence_id$coefficients, alpha <- 0, # Independent working correlation phi <- 1, id <- data_gee_sec_adherence$cluster_id, link <- "identity" ) fit_geepack_sec_adherence_id_smv <- fit_geepack_sec_adherence_id # Replace robust variances with small sample corrected variances (KC) diag(fit_geepack_sec_adherence_id_smv$geese$vbeta) <- (fit_PoisBCV_sec_adherence_id$outbeta[, "BC1-stderr"])^2 # Fully adjusted (doesn't converge) # fit_PoisBCV_sec_adherence_fully_id <- PoisBCV( # y = data_gee_sec_adherence_fully$outcome_sec_adherence, # X <- model.matrix(as.formula(paste0("~ ", fully_adjusted)), data_gee_sec_adherence_fully), # beta <- fit_geepack_sec_adherence_fully_id$coefficients, # alpha <- 0, # Independent working correlation # phi <- 1, # id <- data_gee_sec_adherence_fully$cluster_id, # link <- "identity" # ) # No custom formula for gaussian distribution, use geesmv package as advised by Xueqi fit_PoisBCV_sec_adherence_fully_id <- GEE.var.kc(as.formula(paste0("outcome_sec_adherence ~ ", fully_adjusted)), id = "cluster_id", data = data_gee_sec_adherence_fully, family = gaussian, corstr = "independence") fit_geepack_sec_adherence_fully_id_smv <- fit_geepack_sec_adherence_fully_id # Replace robust variances with small sample corrected variances (KC) diag(fit_geepack_sec_adherence_fully_id_smv$geese$vbeta) <- fit_PoisBCV_sec_adherence_fully_id$cov.beta ``` ```{r GEE-Sec-2-kc-out, cache = FALSE, ft.keepnext = FALSE} # Create table for each model risk_ratio_min <- fit_geepack_sec_adherence_smv %>% tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_min <- fit_geepack_sec_adherence_id_smv %>% tbl_regression(tidy_fun = my_tidy_rd_p6) risk_ratio_fully <- fit_geepack_sec_adherence_fully_smv %>% tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_fully <- fit_geepack_sec_adherence_fully_id_smv %>% tbl_regression(tidy_fun = my_tidy_rd_p6) # Combine tables tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) ft_prim <- tbl_merge(tbls = list(tbl_ratio, tbl_diff), tab_spanner = c("**Risk Ratio**", "**Risk Difference**")) %>% modify_header( estimate_1_1 = "**Minimally Adjusted RR** **(95% CI)**", estimate_2_1 = "**Fully Adjusted RR** **(95% CI)**", estimate_1_2 = "**Minimally Adjusted RD** **(95% CI)**", estimate_2_2 = "**Fully Adjusted RD** **(95% CI)**" ) %>% modify_footnote( update = c(estimate_1_1, estimate_2_1, estimate_1_2, estimate_2_2) ~ "RR = Risk Ratio, RD = Risk Difference, CI = Confidence Interval", abbreviation = TRUE ) %>% as_flex_table() %>% flextable::set_caption( as_paragraph( as_chunk(paste0('GEE analysis with KC Small Sample Variance Correction, \nN (minimally adjusted) = ', nrow(data_gee_sec_adherence), ', N (fully adjusted) = ', nrow(data_gee_sec_adherence_fully)), props = fp_text_default( font.family = "Cambria", font.size = 12, italic = TRUE)) ), autonum = officer::run_autonum(seq_id = "tab", bkm = "PrimGEEsec2")) %>% bold(bold = TRUE, part = "header") %>% autofit() %>% set_table_properties(layout = "autofit") %>% font(fontname = "Calibri", part = "all") %>% fontsize(size = 10, part = "all") # ft_prim$body$content$content$data[, 2:5] <- lapply(ft_prim$body$content$content$data[, 2:5], # function(x) {mutate(x, txt = str_replace( # txt, "\\(", "\n("))}) ft_prim %>% theme_zebra() %>% align(j = -1, align = "center", part = "all") ``` ```{r GEE-Sec-2-subset, cache = FALSE} # Create table for each model risk_ratio_min <- fit_geepack_sec_adherence_smv %>% tbl_regression(include = arm, label = arm ~ "Malaria tested clients whose treatment adhered to test results", tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_min <- fit_geepack_sec_adherence_id_smv %>% tbl_regression(include = arm, label = arm ~ "Malaria tested clients whose treatment adhered to test results", tidy_fun = my_tidy_rd_p6) risk_ratio_fully <- fit_geepack_sec_adherence_fully_smv %>% tbl_regression(include = arm, label = arm ~ "Malaria tested clients whose treatment adhered to test results", tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_fully <- fit_geepack_sec_adherence_fully_id_smv %>% tbl_regression(include = arm, label = arm ~ "Malaria tested clients whose treatment adhered to test results", tidy_fun = my_tidy_rd_p6) # Combine tables tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) gt_sec2 <- tbl_merge(tbls = list(tbl_ratio, tbl_diff), tab_spanner = c("**Risk Ratio**", "**Risk Difference**")) ``` \newpage ## Secondary Outcome 3 - Appropriate case management ```{r GEE-Sec-3, results = 'hide'} # Get n/N (%) for each arm prop_sec_appropriate <- data3 %>% select(outcome_sec_appropriate, arm) %>% na.omit() %>% group_by(arm) %>% summarize(n = sum(outcome_sec_appropriate), N = n()) %>% mutate(prop_num = n / N) %>% mutate(prop = paste0(n, "/", N, " (", signif(100 * prop_num, digits = 2), "%)")) # Create datasets with no missing values in outcome variable or covariates data_gee_sec_appropriate <- data_gee %>% dplyr::select(outcome_sec_appropriate, cluster_id, all_of(min_adjusted_vector)) %>% drop_na() %>% mutate(arm = dplyr::recode(arm, "CDPD" = paste("CDPD, ", prop_sec_appropriate[prop_sec_appropriate$arm == "CDPD", ]$prop), "Control" = paste("Control, ", prop_sec_appropriate[prop_sec_appropriate$arm == "Control", ]$prop), "CD" = paste("CD, ", prop_sec_appropriate[prop_sec_appropriate$arm == "CD", ]$prop))) %>% set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>% as.data.frame() data_gee_sec_appropriate_fully <- data_gee %>% dplyr::select(outcome_sec_appropriate, cluster_id, all_of(fully_adjusted_vector)) %>% drop_na() %>% mutate(arm = dplyr::recode(arm, "CDPD" = paste("CDPD, ", prop_sec_appropriate[prop_sec_appropriate$arm == "CDPD", ]$prop), "Control" = paste("Control, ", prop_sec_appropriate[prop_sec_appropriate$arm == "Control", ]$prop), "CD" = paste("CD, ", prop_sec_appropriate[prop_sec_appropriate$arm == "CD", ]$prop))) %>% set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>% as.data.frame() # Log link # Minimally adjusted fit_geepack_sec_appropriate <- geeglm(as.formula(paste0("outcome_sec_appropriate ~ ", min_adjusted)), id = cluster_id, data = data_gee_sec_appropriate, family = poisson, corstr = "independence") # Fully adjusted fit_geepack_sec_appropriate_fully <- geeglm(as.formula(paste0("outcome_sec_appropriate ~ ", fully_adjusted)), id = cluster_id, data = data_gee_sec_appropriate_fully, family = poisson, corstr = "independence") # Identity link # Minimally adjusted (doesn't converge with Poisson family) fit_geepack_sec_appropriate_id <- geeglm(as.formula(paste0("outcome_sec_appropriate ~ ", min_adjusted)), id = cluster_id, data = data_gee_sec_appropriate, family = binomial(link = "identity"), corstr = "independence") # Fully adjusted (doesn't converge with Poisson or binomial family) fit_geepack_sec_appropriate_fully_id <- geeglm(as.formula(paste0("outcome_sec_appropriate ~ ", fully_adjusted)), id = cluster_id, data = data_gee_sec_appropriate_fully, family = gaussian(link = "identity"), corstr = "independence") ``` ```{r GEE-Sec-3-kc, cache = FALSE, results = 'hide'} # Small sample corrections # Log link # Minimally adjusted fit_PoisBCV_sec_appropriate <- PoisBCV( y = data_gee_sec_appropriate$outcome_sec_appropriate, X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_appropriate), beta <- fit_geepack_sec_appropriate_id$coefficients, alpha <- 0, # Independent working correlation phi <- 1, id <- data_gee_sec_appropriate$cluster_id, link <- "log" ) fit_geepack_sec_appropriate_smv <- fit_geepack_sec_appropriate # Replace robust variances with small sample corrected variances (KC) diag(fit_geepack_sec_appropriate_smv$geese$vbeta) <- (fit_PoisBCV_sec_appropriate$outbeta[, "BC1-stderr"])^2 # Fully adjusted # fit_kc_sec_appropriate_fully <- GEE.var.kc(as.formula(paste0("outcome_sec_appropriate ~ ", fully_adjusted)), # id = "cluster_id", # data = data_gee_sec_appropriate_fully, # family = poisson, corstr = "independence") fit_PoisBCV_sec_appropriate_fully <- PoisBCV( y = data_gee_sec_appropriate_fully$outcome_sec_appropriate, X <- model.matrix(as.formula(paste0("~ ", fully_adjusted)), data_gee_sec_appropriate_fully), beta <- fit_geepack_sec_appropriate_fully_id$coefficients, alpha <- 0, # Independent working correlation phi <- 1, id <- data_gee_sec_appropriate_fully$cluster_id, link <- "log" ) fit_geepack_sec_appropriate_fully_smv <- fit_geepack_sec_appropriate_fully # Replace robust variances with small sample corrected variances (KC) diag(fit_geepack_sec_appropriate_fully_smv$geese$vbeta) <- (fit_PoisBCV_sec_appropriate_fully$outbeta[, "BC1-stderr"])^2 # Identity link # Minimally adjusted (doesn't converge) # fit_PoisBCV_sec_appropriate_id <- PoisBCV( # y = data_gee_sec_appropriate$outcome_sec_appropriate, # X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_appropriate), # beta <- fit_geepack_sec_appropriate_id$coefficients, # alpha <- 0, # Independent working correlation # phi <- 1, # id <- data_gee_sec_appropriate$cluster_id, # link <- "identity" # ) fit_BinBCV_sec_appropriate_id <- BinBCV( y = data_gee_sec_appropriate$outcome_sec_appropriate, X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_appropriate), beta <- fit_geepack_sec_appropriate_id$coefficients, alpha <- 0, # Independent working correlation phi <- 1, id <- data_gee_sec_appropriate$cluster_id, link <- "identity" ) fit_geepack_sec_appropriate_id_smv <- fit_geepack_sec_appropriate_id # Replace robust variances with small sample corrected variances (KC) diag(fit_geepack_sec_appropriate_id_smv$geese$vbeta) <- (fit_BinBCV_sec_appropriate_id$outbeta[, "BC1-stderr"])^2 # Fully adjusted (doesn't converge) # fit_BinBCV_sec_appropriate_fully_id <- BinBCV( # y = data_gee_sec_appropriate_fully$outcome_sec_appropriate, # X <- model.matrix(as.formula(paste0("~ ", fully_adjusted)), data_gee_sec_appropriate_fully), # beta <- fit_geepack_sec_appropriate_fully_id$coefficients, # alpha <- 0, # Independent working correlation # phi <- 1, # id <- data_gee_sec_appropriate_fully$cluster_id, # link <- "identity" # ) # No custom formula for gaussian distribution, use geesmv package as advised by Xueqi fit_NormalBCV_sec_appropriate_fully_id <- GEE.var.kc(as.formula(paste0("outcome_sec_appropriate ~ ", fully_adjusted)), id = "cluster_id", data = data_gee_sec_appropriate_fully, family = gaussian, corstr = "independence") fit_geepack_sec_appropriate_fully_id_smv <- fit_geepack_sec_appropriate_fully_id # Replace robust variances with small sample corrected variances (KC) diag(fit_geepack_sec_appropriate_fully_id_smv$geese$vbeta) <- fit_NormalBCV_sec_appropriate_fully_id$cov.beta ``` ```{r GEE-Sec-3-kc-out, cache = FALSE, ft.keepnext = FALSE} # Create table for each model risk_ratio_min <- fit_geepack_sec_appropriate_smv %>% tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_min <- fit_geepack_sec_appropriate_id_smv %>% tbl_regression(tidy_fun = my_tidy_rd_p6) risk_ratio_fully <- fit_geepack_sec_appropriate_fully_smv %>% tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_fully <- fit_geepack_sec_appropriate_fully_id_smv %>% tbl_regression(tidy_fun = my_tidy_rd_p6) # Combine tables tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) ft_prim <- tbl_merge(tbls = list(tbl_ratio, tbl_diff), tab_spanner = c("**Risk Ratio**", "**Risk Difference**")) %>% modify_header( estimate_1_1 = "**Minimally Adjusted RR** **(95% CI)**", estimate_2_1 = "**Fully Adjusted RR** **(95% CI)**", estimate_1_2 = "**Minimally Adjusted RD** **(95% CI)**", estimate_2_2 = "**Fully Adjusted RD** **(95% CI)**" ) %>% modify_footnote( update = c(estimate_1_1, estimate_2_1, estimate_1_2, estimate_2_2) ~ "RR = Risk Ratio, RD = Risk Difference, CI = Confidence Interval", abbreviation = TRUE ) %>% as_flex_table() %>% flextable::set_caption( as_paragraph( as_chunk(paste0('GEE analysis with KC Small Sample Variance Correction, \nN (minimally adjusted) = ', nrow(data_gee_sec_appropriate), ', N (fully adjusted) = ', nrow(data_gee_sec_appropriate_fully)), props = fp_text_default( font.family = "Cambria", font.size = 12, italic = TRUE)) ), autonum = officer::run_autonum(seq_id = "tab", bkm = "PrimGEEsec3")) %>% bold(bold = TRUE, part = "header") %>% autofit() %>% set_table_properties(layout = "autofit") %>% font(fontname = "Calibri", part = "all") %>% fontsize(size = 10, part = "all") # ft_prim$body$content$content$data[, 2:5] <- lapply(ft_prim$body$content$content$data[, 2:5], # function(x) {mutate(x, txt = str_replace( # txt, "\\(", "\n("))}) ft_prim %>% theme_zebra() %>% align(j = -1, align = "center", part = "all") ``` ```{r GEE-Sec-3-subset, cache = FALSE} # Create table for each model risk_ratio_min <- fit_geepack_sec_appropriate_smv %>% tbl_regression(include = arm, label = arm ~ "Suspected malaria cases that are managed appropriately", tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_min <- fit_geepack_sec_appropriate_id_smv %>% tbl_regression(include = arm, label = arm ~ "Suspected malaria cases that are managed appropriately", tidy_fun = my_tidy_rd_p6) risk_ratio_fully <- fit_geepack_sec_appropriate_fully_smv %>% tbl_regression(include = arm, label = arm ~ "Suspected malaria cases that are managed appropriately", tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_fully <- fit_geepack_sec_appropriate_fully_id_smv %>% tbl_regression(include = arm, label = arm ~ "Suspected malaria cases that are managed appropriately", tidy_fun = my_tidy_rd_p6) # Combine tables tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) gt_sec3 <- tbl_merge(tbls = list(tbl_ratio, tbl_diff), tab_spanner = c("**Risk Ratio**", "**Risk Difference**")) ``` \newpage ## Secondary Outcome 4 - ACT use among untested ```{r GEE-Sec-4, results = 'hide'} # Get n/N (%) for each arm prop_sec_ACT_untested <- data3 %>% select(outcome_sec_ACT_untested, arm) %>% na.omit() %>% group_by(arm) %>% summarize(n = sum(outcome_sec_ACT_untested), N = n()) %>% mutate(prop_num = n / N) %>% mutate(prop = paste0(n, "/", N, " (", signif(100 * prop_num, digits = 2), "%)")) # Create datasets with no missing values in outcome variable or covariates data_gee_sec_ACT_untested <- data_gee %>% dplyr::select(outcome_sec_ACT_untested, cluster_id, all_of(min_adjusted_vector)) %>% drop_na() %>% mutate(arm = dplyr::recode(arm, "CDPD" = paste("CDPD, ", prop_sec_ACT_untested[prop_sec_ACT_untested$arm == "CDPD", ]$prop), "Control" = paste("Control, ", prop_sec_ACT_untested[prop_sec_ACT_untested$arm == "Control", ]$prop), "CD" = paste("CD, ", prop_sec_ACT_untested[prop_sec_ACT_untested$arm == "CD", ]$prop))) %>% set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>% as.data.frame() data_gee_sec_ACT_untested_fully <- data_gee %>% dplyr::select(outcome_sec_ACT_untested, cluster_id, all_of(fully_adjusted_vector)) %>% drop_na() %>% mutate(arm = dplyr::recode(arm, "CDPD" = paste("CDPD, ", prop_sec_ACT_untested[prop_sec_ACT_untested$arm == "CDPD", ]$prop), "Control" = paste("Control, ", prop_sec_ACT_untested[prop_sec_ACT_untested$arm == "Control", ]$prop), "CD" = paste("CD, ", prop_sec_ACT_untested[prop_sec_ACT_untested$arm == "CD", ]$prop))) %>% set_variable_labels(arm = "Arm, sample proportion n/N (%)") %>% as.data.frame() # Log link # Minimally adjusted fit_geepack_sec_ACT_untested <- geeglm(as.formula(paste0("outcome_sec_ACT_untested ~ ", min_adjusted)), id = cluster_id, data = data_gee_sec_ACT_untested, family = poisson, corstr = "independence") # Fully adjusted fit_geepack_sec_ACT_untested_fully <- geeglm(as.formula(paste0("outcome_sec_ACT_untested ~ ", fully_adjusted)), id = cluster_id, data = data_gee_sec_ACT_untested_fully, family = poisson, corstr = "independence") # Identity link # Minimally adjusted fit_geepack_sec_ACT_untested_id <- geeglm(as.formula(paste0("outcome_sec_ACT_untested ~ ", min_adjusted)), id = cluster_id, data = data_gee_sec_ACT_untested, family = poisson(link = "identity"), corstr = "independence") # Fully adjusted fit_geepack_sec_ACT_untested_fully_id <- geeglm(as.formula(paste0("outcome_sec_ACT_untested ~ ", fully_adjusted)), id = cluster_id, data = data_gee_sec_ACT_untested_fully, family = poisson(link = "identity"), corstr = "independence") ``` ```{r GEE-Sec-4-kc, cache = FALSE, results = 'hide'} # Small sample corrections # Log link # Minimally adjusted fit_PoisBCV_sec_ACT_untested <- PoisBCV( y = data_gee_sec_ACT_untested$outcome_sec_ACT_untested, X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_ACT_untested), beta <- fit_geepack_sec_ACT_untested_id$coefficients, alpha <- 0, # Independent working correlation phi <- 1, id <- data_gee_sec_ACT_untested$cluster_id, link <- "log" ) fit_geepack_sec_ACT_untested_smv <- fit_geepack_sec_ACT_untested # Replace robust variances with small sample corrected variances (KC) diag(fit_geepack_sec_ACT_untested_smv$geese$vbeta) <- (fit_PoisBCV_sec_ACT_untested$outbeta[, "BC1-stderr"])^2 # Fully adjusted # fit_kc_sec_ACT_untested_fully <- GEE.var.kc(as.formula(paste0("outcome_sec_ACT_untested ~ ", fully_adjusted)), # id = "cluster_id", # data = data_gee_sec_ACT_untested_fully, # family = poisson, corstr = "independence") fit_PoisBCV_sec_ACT_untested_fully <- PoisBCV( y = data_gee_sec_ACT_untested_fully$outcome_sec_ACT_untested, X <- model.matrix(as.formula(paste0("~ ", fully_adjusted)), data_gee_sec_ACT_untested_fully), beta <- fit_geepack_sec_ACT_untested_fully_id$coefficients, alpha <- 0, # Independent working correlation phi <- 1, id <- data_gee_sec_ACT_untested_fully$cluster_id, link <- "log" ) fit_geepack_sec_ACT_untested_fully_smv <- fit_geepack_sec_ACT_untested_fully # Replace robust variances with small sample corrected variances (KC) diag(fit_geepack_sec_ACT_untested_fully_smv$geese$vbeta) <- (fit_PoisBCV_sec_ACT_untested_fully$outbeta[, "BC1-stderr"])^2 # Identity link # Minimally adjusted fit_PoisBCV_sec_ACT_untested_id <- PoisBCV( y = data_gee_sec_ACT_untested$outcome_sec_ACT_untested, X <- model.matrix(as.formula(paste0("~ ", min_adjusted)), data_gee_sec_ACT_untested), beta <- fit_geepack_sec_ACT_untested_id$coefficients, alpha <- 0, # Independent working correlation phi <- 1, id <- data_gee_sec_ACT_untested$cluster_id, link <- "identity" ) fit_geepack_sec_ACT_untested_id_smv <- fit_geepack_sec_ACT_untested_id # Replace robust variances with small sample corrected variances (KC) diag(fit_geepack_sec_ACT_untested_id_smv$geese$vbeta) <- (fit_PoisBCV_sec_ACT_untested_id$outbeta[, "BC1-stderr"])^2 # Fully adjusted fit_PoisBCV_sec_ACT_untested_fully_id <- PoisBCV( y = data_gee_sec_ACT_untested_fully$outcome_sec_ACT_untested, X <- model.matrix(as.formula(paste0("~ ", fully_adjusted)), data_gee_sec_ACT_untested_fully), beta <- fit_geepack_sec_ACT_untested_fully_id$coefficients, alpha <- 0, # Independent working correlation phi <- 1, id <- data_gee_sec_ACT_untested_fully$cluster_id, link <- "identity" ) fit_geepack_sec_ACT_untested_fully_id_smv <- fit_geepack_sec_ACT_untested_fully_id # Replace robust variances with small sample corrected variances (KC) diag(fit_geepack_sec_ACT_untested_fully_id_smv$geese$vbeta) <- (fit_PoisBCV_sec_ACT_untested_fully_id$outbeta[, "BC1-stderr"])^2 ``` ```{r GEE-Sec-4-kc-out, cache = FALSE, ft.keepnext = FALSE} # Create table for each model risk_ratio_min <- fit_geepack_sec_ACT_untested_smv %>% tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_min <- fit_geepack_sec_ACT_untested_id_smv %>% tbl_regression(tidy_fun = my_tidy_rd_p6) risk_ratio_fully <- fit_geepack_sec_ACT_untested_fully_smv %>% tbl_regression(tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_fully <- fit_geepack_sec_ACT_untested_fully_id_smv %>% tbl_regression(tidy_fun = my_tidy_rd_p6) # Combine tables tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) ft_prim <- tbl_merge(tbls = list(tbl_ratio, tbl_diff), tab_spanner = c("**Risk Ratio**", "**Risk Difference**")) %>% modify_header( estimate_1_1 = "**Minimally Adjusted RR** **(95% CI)**", estimate_2_1 = "**Fully Adjusted RR** **(95% CI)**", estimate_1_2 = "**Minimally Adjusted RD** **(95% CI)**", estimate_2_2 = "**Fully Adjusted RD** **(95% CI)**" ) %>% modify_footnote( update = c(estimate_1_1, estimate_2_1, estimate_1_2, estimate_2_2) ~ "RR = Risk Ratio, RD = Risk Difference, CI = Confidence Interval", abbreviation = TRUE ) %>% as_flex_table() %>% flextable::set_caption( as_paragraph( as_chunk(paste0('GEE analysis with KC Small Sample Variance Correction, \nN (minimally adjusted) = ', nrow(data_gee_sec_ACT_untested), ', N (fully adjusted) = ', nrow(data_gee_sec_ACT_untested_fully)), props = fp_text_default( font.family = "Cambria", font.size = 12, italic = TRUE)) ), autonum = officer::run_autonum(seq_id = "tab", bkm = "PrimGEEsec4")) %>% bold(bold = TRUE, part = "header") %>% autofit() %>% set_table_properties(layout = "autofit") %>% font(fontname = "Calibri", part = "all") %>% fontsize(size = 10, part = "all") # ft_prim$body$content$content$data[, 2:5] <- lapply(ft_prim$body$content$content$data[, 2:5], # function(x) {mutate(x, txt = str_replace( # txt, "\\(", "\n("))}) ft_prim %>% theme_zebra() %>% align(j = -1, align = "center", part = "all") ``` ```{r GEE-Sec-4-subset, cache = FALSE} # Create table for each model risk_ratio_min <- fit_geepack_sec_ACT_untested_smv %>% tbl_regression(include = arm, label = arm ~ "Untested clients taking ACT", tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_min <- fit_geepack_sec_ACT_untested_id_smv %>% tbl_regression(include = arm, label = arm ~ "Untested clients taking ACT", tidy_fun = my_tidy_rd_p6) risk_ratio_fully <- fit_geepack_sec_ACT_untested_fully_smv %>% tbl_regression(include = arm, label = arm ~ "Untested clients taking ACT", tidy_fun = my_tidy_rr_p6, exponentiate = TRUE) risk_diff_fully <- fit_geepack_sec_ACT_untested_fully_id_smv %>% tbl_regression(include = arm, label = arm ~ "Untested clients taking ACT", tidy_fun = my_tidy_rd_p6) # Combine tables tbl_ratio <- tbl_merge(tbls = list(risk_ratio_min, risk_ratio_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) tbl_diff <- tbl_merge(tbls = list(risk_diff_min, risk_diff_fully), tab_spanner = c("**Minimally adjusted**", "**Fully adjusted**")) %>% modify_column_hide(columns = contains("p.value")) gt_sec4 <- tbl_merge(tbls = list(tbl_ratio, tbl_diff), tab_spanner = c("**Risk Ratio**", "**Risk Difference**") ) ``` \newpage # Other tables included in Results paper ## Table 2 (Results paper). Minimally adjusted and fully adjusted estimates of intervention effects for pre-specified study outcomes ```{r Combine-results-table, cache = FALSE, ft.keepnext = FALSE} gt_results <- tbl_stack(tbls = list(gt_prim, gt_sec1, gt_sec2, gt_sec3, gt_sec4)) ft_results <- gt_results %>% modify_header( estimate_1_1 = "**Minimally Adjusted RR** **(95% CI)**", estimate_2_1 = "**Fully Adjusted RR** **(95% CI)**", estimate_1_2 = "**Minimally Adjusted RD** **(95% CI)**", estimate_2_2 = "**Fully Adjusted RD** **(95% CI)**" ) %>% modify_footnote( update = c(estimate_1_1, estimate_2_1, estimate_1_2, estimate_2_2) ~ "RR = Risk Ratio, RD = Risk Difference, CI = Confidence Interval", abbreviation = TRUE ) %>% as_flex_table() %>% flextable::set_caption('GEE analysis with KC Small Sample Variance Correction.', autonum = officer::run_autonum(seq_id = "tab", bkm = "PrimGEEsec4")) %>% bold(bold = TRUE, part = "header") %>% autofit() %>% set_table_properties(layout = "autofit") %>% font(fontname = "Calibri", part = "all") %>% fontsize(size = 10, part = "all") ft_results %>% theme_zebra() %>% align(j = -1, align = "center", part = "all") ``` ```{r, eval = T} shop_test_order <- data4 %>% mutate(arm = fct_relevel(as.factor(arm), "Control", "CDPD")) %>% group_by(shop_ni) %>% mutate(cluster_primary_prop = mean(outcome_prim, na.rm = T)) %>% mutate(cluster_primary_denom = sum(!is.na(outcome_prim))) %>% mutate(cluster_sec1_prop = mean(outcome_sec_tested_expanded, na.rm = T)) %>% mutate(cluster_sec1_denom = sum(!is.na(outcome_sec_tested_expanded))) %>% mutate(cluster_sec2_prop = mean(outcome_sec_adherence, na.rm = T)) %>% mutate(cluster_sec2_denom = sum(!is.na(outcome_sec_adherence))) %>% mutate(cluster_sec3_prop = mean(outcome_sec_appropriate_expanded, na.rm = T)) %>% mutate(cluster_sec3_denom = sum(!is.na(outcome_sec_appropriate_expanded))) %>% mutate(cluster_sec4_prop = mean(outcome_sec_ACT_untested, na.rm = T)) %>% mutate(cluster_sec4_denom = sum(!is.na(outcome_sec_ACT_untested))) %>% mutate(cluster_discount_prop_2 = mean(discount_bin, na.rm = T)) %>% mutate(cluster_discountPos_prop = mean(discountPos_bin, na.rm = T)) %>% mutate(cluster_discountPos_denom = sum(!is.na(discountPos_bin))) %>% mutate(cluster_actPrice0Pos_prop = mean(act_price_pos_0, na.rm = T)) %>% mutate(cluster_actPrice0Pos_denom = sum(!is.na(act_price_pos_0))) %>% mutate(cluster_actPrice_prop = mean(act_price_num, na.rm = T)) %>% mutate(cluster_actPricePos_prop = mean(act_price_pos_num, na.rm = T)) %>% mutate(cluster_actLinePrice_prop = mean(price_line_pos, na.rm = T)) %>% mutate(cluster_actLinePrice_denom = sum(!is.na(price_line_pos))) %>% mutate(cluster_actLinePricePos_prop = mean(price_line, na.rm = T)) %>% mutate(cluster_actLinePricePos_denom = sum(!is.na(price_line))) %>% mutate(cluster_freeACT_prop = mean(free_act, na.rm = T)) %>% mutate(cluster_freeACT_denom = sum(!is.na(free_act))) %>% mutate(cluster_freeACTPos_prop = mean(free_act_pos, na.rm = T)) %>% mutate(cluster_freeACTPos_denom = sum(!is.na(free_act_pos))) %>% mutate(cluster_ses_prop = mean(PC1, na.rm = T)) %>% ### Added 12/11/23 for ordering Figure in manuscript filter(!(test_results %in% c("Invalid", "Dont know"))) %>% mutate(cluster_sec1_mosaic_prop = mean(outcome_sec_tested_expanded, na.rm = T)) %>% mutate(cluster_sec1_mosaic_denom = sum(!is.na(outcome_sec_tested_expanded), na.rm = T)) %>% ### select(shop_ni, arm, contains("_prop"), contains("_denom")) %>% distinct() %>% ungroup() %>% mutate(shop_ni = fct_reorder(shop_ni, cluster_sec1_prop)) %>% mutate(shop_ni = fct_reorder(shop_ni, as.numeric(arm))) %>% group_by(shop_ni) %>% mutate(shop_test_rank = cur_group_id()) %>% arrange(shop_test_rank) %>% group_by(arm) %>% mutate(shop_test_rank_in_arm = row_number()) %>% ### Added 12/11/23 for ordering Figure in manuscript ungroup() %>% mutate(shop_ni = fct_reorder(shop_ni, cluster_sec1_mosaic_prop)) %>% mutate(shop_ni = fct_reorder(shop_ni, as.numeric(arm))) %>% group_by(shop_ni) %>% mutate(shop_test_rank_mosaic = cur_group_id()) %>% arrange(shop_test_rank_mosaic) %>% group_by(arm) %>% mutate(shop_test_rank_in_arm_mosaic = row_number()) %>% ### ungroup() %>% mutate(shop_ni = fct_reorder(shop_ni, cluster_freeACTPos_prop)) %>% mutate(shop_ni = fct_reorder(shop_ni, as.numeric(arm))) ``` ```{r, eval = T} data_mosaic <- data4 %>% # select(test, ACT, test_results, test_else, test_else_res) %>% mutate(test_mosaic = as.factor( case_when( test_results == "Positive" ~ "Positive", test_results == "Negative" ~ "Negative", test == "No mRDT" ~ "No mRDT" ))) %>% mutate(test_mosaic = fct_relevel(test_mosaic, "Positive", "Negative", "No mRDT")) %>% mutate(ACT_mosaic = as.factor( case_when( ACT == "ACT" ~ "ACT", ACT == "No ACT" & other_AM == 1 ~ "Other (non-ACT) AM", ACT == "No ACT" & other_AM == 0 ~ "No AM" ) )) %>% mutate(arm = fct_relevel(as.factor(arm), "Control", "CD", "CDPD")) %>% left_join(shop_test_order, by = c("shop_ni", "arm")) %>% set_variable_labels( ACT_mosaic = "ACT purchasing", test_mosaic = "mRDT results" ) ``` \newpage ## Table 3 (Results paper). Testing and treatment decisions by arm ```{r Table3} data_table3 <- data_mosaic %>% mutate(AL_2 = factor(AL, levels = c(0, 1)), other_ACT_2 = factor(ifelse(AL == 1, 0, other_ACT), levels = c(0, 1)), other_AM_2 = factor(ifelse(AL == 1 | other_ACT == 1, 0, other_AM), levels = c(0, 1))) %>% set_variable_labels( AL_2 = "Took AL", other_ACT_2 = "Took ACT (not AL)", other_AM_2 = "Took other antimalarial", total_spent_na = "Overall expenditure" ) table3_all <- data_table3 %>% set_variable_labels(Interviewees = "Interviewees") %>% tbl_summary( include = c(Interviewees), by = arm, type = list(everything() ~ "dichotomous"), value = list(everything() ~ 1), statistic = list(everything() ~ "{n} ({p}%)", Interviewees ~ "{n}"), missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() table3_tested <- data_table3 %>% set_variable_labels(Interviewees = "Tested at outlet") %>% # filter(test_mosaic == "Positive" | test_mosaic == "Negative") %>% filter(test == "mRDT") %>% tbl_summary( include = c(Interviewees), by = arm, type = list(everything() ~ "dichotomous"), value = list(everything() ~ 1), statistic = list(everything() ~ "{n} ({p}%)", Interviewees ~ "{n}"), missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() table3_pos <- data_table3 %>% set_variable_labels(Interviewees = "Positive test at outlet") %>% filter(test_mosaic == "Positive") %>% tbl_summary( include = c(Interviewees, AL_2, other_ACT_2, other_AM_2), by = arm, type = list(everything() ~ "dichotomous"), value = list(everything() ~ 1), statistic = list(everything() ~ "{n} ({p}%)", Interviewees ~ "{n}"), missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() table3_neg <- data_table3 %>% set_variable_labels(Interviewees = "Negative test at outlet") %>% filter(test_mosaic == "Negative") %>% tbl_summary( include = c(Interviewees, AL_2, other_ACT_2, other_AM_2), by = arm, type = list(everything() ~ "dichotomous"), value = list(everything() ~ 1), statistic = list(everything() ~ "{n} ({p}%)", Interviewees ~ "{n}"), missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() table3_invalid <- data_table3 %>% set_variable_labels(Interviewees = "Invalid or unknown test result at outlet") %>% filter(test == "mRDT" & (test_results %in% c("Invalid", "Dont know") | is.na(test_results))) %>% tbl_summary( include = c(Interviewees), by = arm, type = list(everything() ~ "dichotomous"), value = list(everything() ~ 1), statistic = list(everything() ~ "{n} ({p}%)", Interviewees ~ "{n}"), missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() table3_untested <- data_table3 %>% set_variable_labels(Interviewees = "Not tested at outlet") %>% filter(test_mosaic == "No mRDT") %>% tbl_summary( include = c(Interviewees), by = arm, type = list(everything() ~ "dichotomous"), value = list(everything() ~ 1), statistic = list(everything() ~ "{n} ({p}%)", Interviewees ~ "{n}"), missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() table3_no_test <- data_table3 %>% set_variable_labels(Interviewees = "No test") %>% filter(test_mosaic == "No mRDT" & ((test_else == "No" ) | is.na(test_else))) %>% tbl_summary( include = c(Interviewees, AL_2, other_ACT_2, other_AM_2), by = arm, type = list(everything() ~ "dichotomous"), value = list(everything() ~ 1), statistic = list(everything() ~ "{n} ({p}%)", Interviewees ~ "{n}"), missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() table3_else_pos <- data_table3 %>% set_variable_labels(Interviewees = "Had positive test from elsewhere") %>% filter(test_mosaic == "No mRDT" & test_else != "No" & (str_detect(tolower(malaria_test_res), "positive"))) %>% tbl_summary( include = c(Interviewees, AL_2, other_ACT_2, other_AM_2), by = arm, type = list(everything() ~ "dichotomous"), value = list(everything() ~ 1), statistic = list(everything() ~ "{n} ({p}%)", Interviewees ~ "{n}"), missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() table3_else_neg <- data_table3 %>% set_variable_labels(Interviewees = "Had negative test from elsewhere") %>% filter(test_mosaic == "No mRDT" & (str_detect(tolower(malaria_test_res), "negative"))) %>% tbl_summary( include = c(Interviewees, AL_2, other_ACT_2, other_AM_2), by = arm, type = list(everything() ~ "dichotomous"), value = list(everything() ~ 1), statistic = list(everything() ~ "{n} ({p}%)", Interviewees ~ "{n}"), missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() table3_gt <- tbl_stack(list(table3_all, table3_tested, table3_pos, table3_neg, table3_invalid, table3_untested, table3_no_test, table3_else_pos, table3_else_neg)) ``` ```{r} table3_gt %>% as_flex_table() %>% footnote(i = 1, j = 1, value = as_paragraph("Includes only interviewees who consented and met all inclusion criteria."), ref_symbols = c("1"), part = "body") %>% flextable::set_caption('Testing and treatment decisions by arm.', autonum = officer::run_autonum(seq_id = "tab", bkm = "Table3Paper")) %>% bold(bold = TRUE, part = "header") %>% autofit() %>% set_table_properties(layout = "autofit") %>% font(fontname = "Calibri", part = "all") %>% fontsize(size = 10, part = "all") %>% theme_zebra() %>% align(j = -1, align = "center", part = "all") ``` \newpage ## Table 4 (Results paper). Summary of price variables among ACT purchasers. ```{r, ft.keepnext = FALSE} data_mosaic %>% filter(ACT == "ACT") %>% tbl_summary(include = c(arm, contains( "_cat"), altpay, discount), by = arm, missing_text = "Missing") %>% add_overall(last = TRUE) %>% add_stat_label() %>% as_flex_table() %>% flextable::set_caption('Summary of price variables, among ACT purchasers.', autonum = officer::run_autonum(seq_id = "tab", bkm = "discountPaySummariesPurchasers")) %>% bold(bold = TRUE, part = "header") %>% autofit() %>% set_table_properties(layout = "autofit") %>% font(fontname = "Calibri", part = "all") %>% fontsize(size = 10, part = "all") %>% align(j = -1, align = "center", part = "all") ``` ## Table 5 (Results paper). Total spent by test behavior and result. ```{r totalSpent} data_mosaic %>% tbl_summary(by = test_mosaic, include = total_spent_na, type = list(all_continuous() ~ "continuous2"), statistic = list(all_continuous2() ~ c("{mean} ({sd})", "{median} [{p25}, {p75}]")), missing_text = "Missing") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() %>% as_flex_table() %>% flextable::set_caption('Total spent by test behavior and result.', autonum = officer::run_autonum(seq_id = "tab", bkm = "totalSpent")) %>% bold(bold = TRUE, part = "header") %>% autofit() %>% set_table_properties(layout = "autofit") %>% font(fontname = "Calibri", part = "all") %>% fontsize(size = 10, part = "all") %>% align(j = -1, align = "center", part = "all") ``` \newpage # Tables included in paper supplement, other miscellaneous tables ```{r Stratification-function} # Set border style border_style <- officer::fp_border(color = "black", width = 1) data5 <- data_tab1 %>% mutate(across(c(ACT, case, test), as.factor)) %>% # mutate(across(c(ACT, case, test), recode_factor, "0" = "No", "1" = "Yes")) mutate(ACT = recode_factor(ACT, "1" = "ACT", "0" = "No ACT")) %>% mutate(case = recode_factor(case, "1" = "Suspected case", "0" = "Not suspected case")) %>% mutate(test = recode_factor(test, "Yes" = "mRDT", "No" = "No mRDT")) get_stratification <- function(strat_var){ reset_gtsummary_theme() # theme_gtsummary_journal(journal = "jama") theme_gtsummary_compact() tbl1 <- data5 %>% filter(!is.na({{strat_var}})) %>% tbl_strata( strata = {{strat_var}}, .header = "{strata}, N = {n}", .tbl_fun = ~ .x %>% tbl_summary(include = c(Interviewees, respondent, respondent_age, client_gender, respondent_gender), by = arm, type = list(Interviewees ~ "dichotomous"), value = list(Interviewees ~ 1), statistic = list(Interviewees ~ "{N}"), missing = "always", missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() ) tbl_adult <- data5 %>% filter(!is.na({{strat_var}})) %>% filter(respondent == "Adult client") %>% tbl_strata( strata = {{strat_var}}, .header = "{strata}, N = {n}", .tbl_fun = ~ .x %>% tbl_summary(include = c(adult_client_age, adult_client_gender), by = arm, missing = "always", missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() # , # .header = "**{strata}**, N = {n}" ) tbl_child <- data5 %>% filter(!is.na({{strat_var}})) %>% filter(respondent == "Guardian of child client") %>% tbl_strata( strata = {{strat_var}}, .header = "{strata}, N = {n}", .tbl_fun = ~ .x %>% tbl_summary(include = c(child_age, child_gender), by = arm, type = list(child_age ~ "continuous2"), statistic = list(all_continuous2() ~ c("{mean} ({sd})", "{median} [{p25}, {p75}]", "{min}, {max}")), missing = "always", missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() ) tbl_covariates <- data5 %>% filter(!is.na({{strat_var}})) %>% tbl_strata( strata = {{strat_var}}, .header = "{strata}, N = {n}", .tbl_fun = ~ .x %>% tbl_summary(include = c(school_2, wealth_quintile ), by = arm, statistic = list(all_continuous2() ~ c("{mean} ({sd})", "{median} [{p25}, {p75}]", "{min}, {max}")), missing = "always", missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() ) tbl_period <- data5 %>% filter(!is.na({{strat_var}})) %>% tbl_strata( strata = {{strat_var}}, .header = "{strata}, N = {n}", .tbl_fun = ~ .x %>% tbl_summary(include = c(timespan), by = arm, statistic = list(all_continuous2() ~ c("{mean} ({sd})", "{median} [{p25}, {p75}]", "{min}, {max}")), missing = "always", missing_text = "Missing") %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") %>% add_stat_label() ) tbl1_combined <- tbl_stack(list(tbl1, tbl_adult, tbl_child, tbl_covariates, tbl_period)) return(tbl1_combined) } ``` ## Table S1 (Results paper supplement). Secondary Outcomes- Sample characteristics by intervention arm and mRDT* use. ```{r Table-B1-4, cache = FALSE, ft.keepnext = FALSE} gt_strat_tested <- get_stratification(test) ft_strat_tested <- gt_strat_tested %>% as_flex_table() %>% flextable::set_caption('Secondary Outcomes 2 & 4 -- Sample characteristics by intervention arm and mRDT* use.', autonum = officer::run_autonum(seq_id = "tab", bkm = "Table1test")) %>% bold(bold = TRUE, part = "header") %>% autofit() %>% set_table_properties(layout = "autofit") %>% font(fontname = "Calibri", part = "all") %>% fontsize(size = 10, part = "all") %>% theme_zebra() %>% align(j = -1, align = "center", part = "all") %>% vline(j = 4, part = "all", border = border_style) %>% add_footer_row(values = as_paragraph("*Only mRDTs conducted at the shop are included"), colwidths = 7) ft_strat_tested ``` \newpage ## Table S2 (Results paper supplement). Adherence by client gender and test result, among mRDT purchasers. ```{r t9gender} adherence_all_gender <- data_mosaic %>% set_variable_labels( outcome_sec_adherence = "mRDT tested clients whose treatment adhered to test results" ) %>% tbl_summary( include = c(outcome_sec_adherence), by = client_gender, type = list(outcome_sec_adherence ~ "dichotomous"), # value = list(outcome_sec_adherence = "ACT"), statistic = list(all_dichotomous() ~ "{n} / {N} ({p}%)"), missing = "no", missing_text = "Missing") %>% add_stat_label() %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") adherence_pos_gender <- data_mosaic %>% filter(test_mosaic == "Positive") %>% set_variable_labels( outcome_sec_adherence = "Positive mRDT clients whose treatment adhered to test results" ) %>% tbl_summary( include = c(outcome_sec_adherence), by = client_gender, type = list(outcome_sec_adherence ~ "dichotomous"), # value = list(outcome_sec_adherence = "ACT"), statistic = list(all_dichotomous() ~ "{n} / {N} ({p}%)"), missing = "no", missing_text = "Missing") %>% add_stat_label() %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") adherence_neg_gender <- data_mosaic %>% filter(test_mosaic == "Negative") %>% set_variable_labels( outcome_sec_adherence = "Negative mRDT clients whose treatment adhered to test results" ) %>% tbl_summary( include = c(outcome_sec_adherence), by = client_gender, type = list(outcome_sec_adherence ~ "dichotomous"), # value = list(outcome_sec_adherence = "ACT"), statistic = list(all_dichotomous() ~ "{n} / {N} ({p}%)"), missing = "no", missing_text = "Missing") %>% add_stat_label() %>% modify_header(all_stat_cols() ~ "**{level}**") %>% modify_spanning_header(all_stat_cols() ~ "**Study Arm**") %>% add_overall(last = TRUE, col_label = "**Overall**") tbl_stack(list(adherence_all_gender, adherence_pos_gender, adherence_neg_gender)) %>% as_flex_table() %>% flextable::set_caption('Adherence by client gender and test result, among mRDT purchasers.', autonum = officer::run_autonum(seq_id = "tab", bkm = "t9gender")) %>% bold(bold = TRUE, part = "header") %>% autofit() %>% set_table_properties(layout = "autofit") %>% font(fontname = "Calibri", part = "all") %>% fontsize(size = 10, part = "all") %>% theme_zebra() %>% align(j = -1, align = "center", part = "all") ``` ## Table S3 (Results paper supplement). Proportion who purchased antibiotics, overall and by subgroups. ```{r table-antibiotics-CPHIA, ft.keepnext = FALSE} ab_interviewees <- data_mosaic %>% mutate(Interviewees = fct_recode(as.factor(Interviewees), "All interviewees" = "1")) %>% set_variable_labels(Interviewees = "Overall") %>% tbl_cross(row = Interviewees, col = antibiotic, statistic = "{n}/{N} ({p}%)", percent = "row", missing_text = "Missing", margin = NULL) ab_client_gender <- data_mosaic %>% tbl_cross(row = client_gender, col = antibiotic, statistic = "{n}/{N} ({p}%)", percent = "row", missing_text = "Missing", margin = NULL) ab_client_age <- data_mosaic %>% tbl_cross(row = client_age, col = antibiotic, statistic = "{n}/{N} ({p}%)", percent = "row", missing_text = "Missing", margin = NULL) ab_respondent_gender <- data_mosaic %>% tbl_cross(row = respondent_gender, col = antibiotic, statistic = "{n}/{N} ({p}%)", percent = "row", missing_text = "Missing", margin = NULL) ab_respondent_age <- data_mosaic %>% tbl_cross(row = respondent_age, col = antibiotic, statistic = "{n}/{N} ({p}%)", percent = "row", missing_text = "Missing", margin = NULL) ab_education <- data_mosaic %>% tbl_cross(row = school_2, col = antibiotic, statistic = "{n}/{N} ({p}%)", percent = "row", missing_text = "Missing", margin = NULL) ab_wealth <- data_mosaic %>% tbl_cross(row = wealth_quintile, col = antibiotic, statistic = "{n}/{N} ({p}%)", percent = "row", missing_text = "Missing", margin = NULL) ab_test <- data_mosaic %>% tbl_cross(row = test_mosaic, col = antibiotic, statistic = "{n}/{N} ({p}%)", percent = "row", missing_text = "Missing", margin = NULL) ab_act <- data_mosaic %>% tbl_cross(row = ACT_mosaic, col = antibiotic, statistic = "{n}/{N} ({p}%)", percent = "row", missing_text = "Missing", margin = NULL) ab_stacked <- tbl_stack(list(ab_interviewees, ab_client_gender, ab_client_age, ab_respondent_gender, ab_respondent_age, ab_education, ab_wealth, ab_test, ab_act)) %>% modify_column_hide(stat_1) %>% modify_header(stat_2 ~ "**Purchased antibiotic**") %>% modify_spanning_header(stat_2 ~ "") ab_stacked %>% as_flex_table() %>% flextable::set_caption('Proportion who purchased antibiotic, overall and by subgroups.', autonum = officer::run_autonum(seq_id = "tab", bkm = "antibiotic-summary")) %>% bold(bold = TRUE, part = "header") %>% autofit() %>% set_table_properties(layout = "autofit") %>% font(fontname = "Calibri", part = "all") %>% fontsize(size = 10, part = "all") %>% theme_zebra() %>% align(j = -1, align = "center", part = "all") ``` ## Table S4 (Results paper supplement). Injections summary. ```{r injections-table} data_injections <- data4 %>% set_variable_labels(injection_combined = "Any injection reported") %>% mutate(test_status = as.factor(case_when( test_results == "Positive" ~ "Positive test (any source)", test_results == "Negative" ~ "Negative test (shop)", test_else_res == "Positive" ~ "Positive test (any source)", (is.na(test_results) | test_results %in% c("Dont know", "Invalid")) & (test_else_res != "Positive" | is.na(test_else_res)) ~ "No test" ))) %>% mutate(test_status = fct_relevel(test_status, "No test", "Positive test (any source)", "Negative test (shop)")) %>% mutate(injection_art_oral = as.factor(case_when( injection_type_combined == "Identified as artemisinin injection" & ACT == "ACT" ~ "Yes", injection_combined == "Yes" ~ "No", ))) %>% mutate(injection_malaria_oral = as.factor(case_when( injection_type_combined == "Identified as 'malaria injection' or 'antimalarial injection' or quinine injection" & ACT == "ACT" ~ "Yes", injection_combined == "Yes" ~ "No", ))) %>% mutate(injection_unspecified_oral = as.factor(case_when( injection_type_combined == "Unspecified injection" & ACT == "ACT" ~ "Yes", injection_combined == "Yes" ~ "No", ))) %>% mutate(injection_combined_artemisinin = ifelse(injection_type_combined == "Identified as artemisinin injection", "Yes", "No"), injection_combined_malaria = ifelse(injection_type_combined == "Identified as 'malaria injection' or 'antimalarial injection' or quinine injection", "Yes", "No"), injection_combined_painkiller = ifelse(injection_type_combined == "Painkiller or antibiotic injection", "Yes", "No"), injection_combined_unspecified = ifelse(injection_type_combined == "Unspecified injection", "Yes", "No")) %>% mutate(across(starts_with("injection_combined_"), fct_na_value_to_level, level = "No")) %>% set_variable_labels( injection_art_oral = "Artemisinin injection plus oral ACT (AL or DP)", injection_malaria_oral = "'Malaria' injection plus oral ACT (AL or DP)", injection_unspecified_oral = "Unspecified injection plus oral ACT (AL or DP)", injection_combined_artemisinin = "Injection identified as artemisinin injection", injection_combined_malaria = "Injection identified as 'malaria injection' or 'antimalarial injection' or quinine injection", injection_combined_painkiller = "Painkiller or antibiotic injection", injection_combined_unspecified = "Unspecified injection" ) gt_injections_test <- data_injections %>% tbl_summary(include = c(injection_combined, injection_type_combined), by = test_status, type = list(injection_combined ~ "dichotomous"), # statistic = list(injection_combined ~ "{N}") missing = "no" ) gt_injections_arm <- data_injections %>% tbl_summary(include = c(injection_combined, injection_type_combined), by = arm, type = list(injection_combined ~ "dichotomous"), # statistic = list(injection_combined ~ "{N}") missing = "no" ) %>% add_overall(last = TRUE, col_label = "**Overall**") gt_injections_merged <- tbl_merge(list(gt_injections_test, gt_injections_arm), tab_spanner = c("**Test status**", "**Study Arm**")) gt_injections_merged %>% as_flex_table() %>% flextable::set_caption('Injections summary.', autonum = officer::run_autonum(seq_id = "tab", bkm = "InjectionsTable")) %>% bold(bold = TRUE, part = "header") %>% autofit() %>% set_table_properties(layout = "autofit") %>% font(fontname = "Calibri", part = "all") %>% fontsize(size = 10, part = "all") %>% theme_zebra() %>% align(j = -1, align = "center", part = "all") ``` ```{r data-dictionary, eval = FALSE} # Create write_excel_csv(codebook::codebook_table(data_gee) %>% select(-c(ordered, hist, format.stata)), file = paste0(repository_path, "/data_gee_codebook_ni.csv") ) ```