This notebook describes the 106,229 adult and pediatric hospitalized COVID-19 positive patients that were studied as part of the Consortium for Clinical Characterization of COVID-19 by EHR Neurology group. This analysis contains the results of patients from 21 different healthcare systems spanning 6 countries.

library(tidyverse)
library(gridGraphics)
library(gridExtra)
library(ggpubr)
library(DT)
library(kableExtra)
library(cowplot)
library(RColorBrewer)
library(glue)
library(egg)
source("R/plot_theme.R")
source("R/demo_tables.R")
source("R/utils.R")

Import Data from each Healthcare System

Each participating healthcare system ran the analysis locally on patient level data using our customized R package. The output of each local analysis consisted of aggregated counts and summary statistics (means, medians, hazard ratios, p-values, etc). Summary results from each healthcare system were then aggregated and are imported and then combined using the following code:

# read in files from results folder 
# this folder contains all of the local healthcare system level analyses
rdas <- list.files(
  path = "results",
  pattern = ".rda",
  full.names = TRUE
)


for (rda in rdas) {
  load(rda)
}

rm(rdas, rda)

# create a list of participating healthcare systems from our study tracking spreadsheet
site_google_url <- "https://docs.google.com/spreadsheets/d/1epcYNd_0jCUMktOHf8mz5v651zy1JALD6PgzobrGWDY/edit?usp=sharing"

# load site parameters
site_params <- googlesheets4::read_sheet(site_google_url, sheet = 1)
site_avails <- googlesheets4::read_sheet(site_google_url, sheet = 2)

# filter the list of sites who ran the analysis
sorted_sites <- site_avails %>%
  filter(!is.na(date_v4_received)) %>%
  pull(siteid) %>%
  paste("results", sep = "_")

# list sites without race
sites_wo_race <- site_params %>%
  filter(!include_race) %>%
  pull(siteid)

# combine all rda files with 'results' in name
results <- mget(ls(pattern = "results"))

## load in the pre-processed comorbidity and meta-data
load("processed/comorbidity_table_cnps.rda")
load("processed/neuro_pt_counts.rda") 

Pre-processing

The following code pre-processes the demographic and clinical course data in order to construct a Table 1.

1. Format primary demographic characteristics

# create list of hospitals for adult and pediatric analyses
adult_sites = sorted_sites[!sorted_sites %in% c("BCH_results", "GOSH_results")]

pediatric_sites = sorted_sites[!sorted_sites %in% c("VA1_results", "VA2_results", "VA3_results", "VA4_results", "VA5_results")]

demo_table_adult <- create_demo_tableone(sorted_sites = adult_sites, is_pediatric = FALSE)

demo_table_pediatric <- create_demo_tableone(sorted_sites = pediatric_sites, is_pediatric = TRUE)

demo_table_combine <- rbind(demo_table_adult, demo_table_pediatric)

2. Format clinical characteristics (i.e: continuous variables such as median time to discharge)

clinical_table_adult <- create_clinical_tableone(sorted_sites = adult_sites, is_pediatric = FALSE)

clinical_table_pediatric <- create_clinical_tableone(sorted_sites = pediatric_sites, is_pediatric = TRUE)

clinical_table_combine <- rbind(clinical_table_adult, clinical_table_pediatric)

3. Conduct additional pre-processing of clinical variables

clinical_table_adult_clean <- clean_clinical_tables(clinical_table = clinical_table_adult)

clinical_table_pediatric_clean <- clean_clinical_tables(clinical_table = clinical_table_pediatric)

clinical_table_clean_combine <- rbind(clinical_table_adult_clean, clinical_table_pediatric_clean)

4. Add Comorbidity data

Here we create separate lists of the top 4 comorbidities in the adult and pediatric populations

comorbidity_table_adults <- comorb_table_wide %>%
  filter(population == "Adult") %>% 
  arrange(desc(Comorb_Total)) %>% 
  slice(1:4) # return top comorbidities

comorbidity_table_pediatric <- comorb_table_wide %>%
  filter(population == "Pediatric") %>% 
  arrange(desc(Comorb_Total)) %>% 
  slice(1:4) # return top comorbidities

top_comorb_adults <- comorbidity_table_adults$Comorbidity
top_comorb_pediatrics <- comorbidity_table_pediatric$Comorbidity

Demographics

Table One - Combined

The below table contains the counts and summary stats of the entire cohort (adult & pediatric patients).

neuro_pt_counts_sum <- colSums(neuro_pt_counts[,-1]) %>% 
  data.frame() %>% 
  t() %>% 
  data.frame()

# specify the order of Table 1
row_order_adult <- c(
  "All Patients", "Female", "Male", "Unknown Sex",
  "0-2", "3-5", "6-11", "12-17", "18-25",
  "26-49", "50-69", "70-79", "80+", "Unknown Age",
  "American Indian", "Asian", "Black",
  "Hawaiian/Pacific Islander",
  "Hispanic/Latino", "White", "Other",
  top_comorb_adults, 
  "Median Elixhauser score  (sd) ",
  "Median pre admission cns  (sd) ",
  "Median pre admission pns  (sd) ",
  "Non-Severe", "Severe",
  "Median time to severe  (sd) ",
  "Alive", "Deceased",
  "Median time to death  (sd) ",
  "Discharged", "Not Discharged",
  "Median time to first discharge  (sd) ",
  "Not Readmitted", "Readmitted",
  "Median time to first readmission  (sd) ",
  "Median number of readmissions  (sd) "
  )


tableone_combine <- create_tableone(demo_table = demo_table_combine,
                                    clinical_table = clinical_table_clean_combine) %>%
  rbind(., comorb_table_wide %>%
          group_by(Comorbidity) %>%
          mutate(Comorb_Total = sum(Comorb_Total, na.rm = TRUE),
          None_sum = sum(None_Total, na.rm = TRUE),
          CNS_sum = sum(CNS_Total, na.rm = TRUE),
          PNS_sum = sum(PNS_Total, na.rm = TRUE),
          None_perc = round(None_sum/neuro_pt_counts_sum$None_n*100,1),
          Central_perc = round(CNS_sum/neuro_pt_counts_sum$CNS_n*100,1),
          Peripheral_perc = round(PNS_sum/neuro_pt_counts_sum$PNS_n*100,1),
          None = paste(None_sum, "(", None_perc, ")"),
          Central = paste(CNS_sum, "(", Central_perc, ")"),
          Peripheral = paste(PNS_sum, "(", Peripheral_perc, ")")) %>%
          distinct(Comorbidity, Comorb_Total, None, Central, Peripheral) %>%
          rename(`Table 1` = "Comorbidity",
          N = "Comorb_Total")) %>%
  slice(match(row_order_adult, `Table 1`))

kbl(tableone_combine %>%
      rename("No Neurological Condition (NNC)" = None)) %>%
      add_header_above(c(
      " ",
      "Total Patients" = 1,
      "Neurological Disease" = 3
      )) %>%
      kable_paper("striped", full_width = F) %>%
      pack_rows("Sex", 2, 4) %>%
      pack_rows("Age", 5, 13) %>%
      pack_rows("Race & Ethnicity", 14, 20) %>%
      pack_rows("Past Medical History", 21, 27) %>%
      pack_rows("Severity", 28, 30) %>%
      pack_rows("Survival", 31, 33) %>%
      pack_rows("Discharge", 34, 36) %>%
      pack_rows("Readmission", 37, 40)
Total Patients
Neurological Disease
Table 1 N No Neurological Condition (NNC) Central Peripheral
All Patients 106229 88340 (100%) 15101 (100%) 2788 (100%)
Sex
Female 37590 32399 (36.7%) 4247 (28.1%) 944 (33.9%)
Male 68638 55940 (63.3%) 10854 (71.9%) 1844 (66.1%)
Unknown Sex 1 1 (0%) 0 (0%) 0 (0%)
Age
0-2 769 747 (0.8%) 22 (0.1%) 0 (0%)
3-5 275 251 (0.3%) 24 (0.2%) 0 (0%)
6-11 403 368 (0.4%) 33 (0.2%) 2 (0.1%)
12-17 707 637 (0.7%) 60 (0.4%) 10 (0.4%)
18-25 2328 2138 (2.4%) 145 (1%) 45 (1.6%)
26-49 17799 16122 (18.3%) 1166 (7.7%) 511 (18.5%)
50-69 36885 31556 (35.7%) 4169 (27.7%) 1160 (41.9%)
70-79 24860 19672 (22.3%) 4508 (30%) 680 (24.6%)
80+ 22094 16813 (19%) 4920 (32.7%) 361 (13%)
Race & Ethnicity
American Indian 350 295 (0.3%) 55 (0.4%) 0 (0%)
Asian 1694 1464 (1.7%) 205 (1.4%) 25 (0.9%)
Black 16815 13368 (15.1%) 2995 (19.9%) 452 (16.5%)
Hawaiian/Pacific Islander 297 255 (0.3%) 41 (0.3%) 1 (0%)
Hispanic/Latino 870 819 (0.9%) 29 (0.2%) 22 (0.8%)
White 41871 33360 (37.8%) 7401 (49.1%) 1110 (40.5%)
Other 44220 38750 (43.9%) 4336 (28.8%) 1134 (41.3%)
Past Medical History
Hypertension 35587 27323 ( 30.9 ) 7277 ( 48.2 ) 987 ( 35.4 )
Alcohol abuse 30130 23065 ( 26.1 ) 6194 ( 41 ) 871 ( 31.2 )
Drug abuse 26596 20287 ( 23 ) 5528 ( 36.6 ) 781 ( 28 )
Diabetes 21969 16905 ( 19.1 ) 4429 ( 29.3 ) 635 ( 22.8 )
Median Elixhauser score (sd) 0.3 (0.7) 0.2 (0.5) 1.4 (2) 0.6 (1.7)
Median pre admission cns (sd) 0 (0) 0 (0) 0 (0) 0 (0)
Median pre admission pns (sd) 0 (0) 0 (0) 0 (0) 0 (0.1)
Severity
Non-Severe 66252 57771 (65.4%) 7007 (46.4%) 1474 (52.7%)
Severe 39985 30576 (34.6%) 8084 (53.6%) 1325 (47.3%)
Median time to severe (sd) 0.6 (1) 0.8 (1.3) 0.4 (0.7) 0.8 (1.1)
Survival
Alive 87376 74389 (84.2%) 10392 (68.8%) 2595 (93.1%)
Deceased 18849 13953 (15.8%) 4705 (31.2%) 191 (6.9%)
Median time to death (sd) 15.3 (6.4) 15.9 (5.8) 22.4 (32.5) 36.9 (41.4)
Discharge
Discharged 103165 85639 (96.9%) 14801 (98%) 2725 (97.7%)
Not Discharged 3063 2701 (3.1%) 297 (2%) 65 (2.3%)
Median time to first discharge (sd) 8.4 (4.2) 6.5 (3.6) 12.8 (6.1) 14.5 (18.3)
Readmission
Not Readmitted 91646 76877 (87%) 12435 (82.4%) 2334 (83.4%)
Readmitted 14569 11456 (13%) 2650 (17.6%) 463 (16.6%)
Median time to first readmission (sd) 28.1 (20.3) 26.4 (17.6) 31.9 (33.7) 38.1 (28.4)
Median number of readmissions (sd) 0.1 (0.3) 0 (0) 0.1 (0.3) 0.1 (0.3)
write.csv(tableone_combine, 'tables/table1_combined.csv', row.names = FALSE)

Table One - Adults

tableone_adult <- create_tableone(demo_table = demo_table_adult,
                                  clinical_table = clinical_table_adult_clean) %>%
                                  rbind(., comorbidity_table_adults %>%
                                          select(Comorbidity, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
                                          rename(`Table 1` = "Comorbidity",
                                                 N = "Comorb_Total",
                                                 None = "NNC_N_%",
                                                 Central = "CNS_N_%",
                                                 Peripheral = "PNS_N_%")) %>% 
  slice(match(row_order_adult, `Table 1`))

kbl(tableone_adult %>%
    rename("No Neurological Condition (NNC)" = None)) %>%
  add_header_above(c(
  " ",
  "Total Patients" = 1,
  "Neurological Disease" = 3
  )) %>%
  kable_paper("striped", full_width = F) %>%
  pack_rows("Sex", 2, 4) %>%
  pack_rows("Age", 5, 9) %>%
  pack_rows("Race & Ethnicity", 10, 16) %>%
  pack_rows("Past Medical History", 17, 23) %>%
  pack_rows("Severity", 24, 26) %>%
  pack_rows("Survival", 27, 29) %>%
  pack_rows("Discharge", 30, 32) %>%
  pack_rows("Readmission", 33, 36)
Total Patients
Neurological Disease
Table 1 N No Neurological Condition (NNC) Central Peripheral
All Patients 104031 86321 (100%) 14938 (100%) 2772 (100%)
Sex
Female 36564 31445 (36.4%) 4185 (28%) 934 (33.7%)
Male 67466 54875 (63.6%) 10753 (72%) 1838 (66.3%)
Unknown Sex 1 1 (0%) 0 (0%) 0 (0%)
Age
18-25 2328 2138 (2.5%) 145 (1%) 45 (1.6%)
26-49 17799 16122 (18.7%) 1166 (7.8%) 511 (18.5%)
50-69 36885 31556 (36.6%) 4169 (28%) 1160 (42.1%)
70-79 24860 19672 (22.8%) 4508 (30.2%) 680 (24.7%)
80+ 22094 16813 (19.5%) 4920 (33%) 361 (13.1%)
Race & Ethnicity
American Indian 349 294 (0.3%) 55 (0.4%) 0 (0%)
Asian 1633 1412 (1.6%) 196 (1.3%) 25 (0.9%)
Black 16631 13202 (15.3%) 2979 (20%) 450 (16.5%)
Hawaiian/Pacific Islander 296 254 (0.3%) 41 (0.3%) 1 (0%)
Hispanic/Latino 850 799 (0.9%) 29 (0.2%) 22 (0.8%)
White 41206 32752 (38%) 7354 (49.3%) 1100 (40.3%)
Other 42971 37587 (43.6%) 4254 (28.5%) 1130 (41.4%)
Past Medical History
Hypertension 35566 27302 ( 31.6 %) 7277 ( 48.7 %) 987 ( 35.6 %)
Alcohol abuse 29918 22879 ( 26.5 %) 6169 ( 41.3 %) 870 ( 31.4 %)
Drug abuse 26389 20097 ( 23.3 %) 5512 ( 36.9 %) 780 ( 28.1 %)
Diabetes 21952 16888 ( 19.6 %) 4429 ( 29.6 %) 635 ( 22.9 %)
Median Elixhauser score (sd) 0.3 (0.7) 0.2 (0.5) 1.4 (2) 0.6 (1.7)
Median pre admission cns (sd) 0 (0) 0 (0) 0 (0) 0 (0)
Median pre admission pns (sd) 0 (0) 0 (0) 0 (0) 0 (0.1)
Severity
Non-Severe 64578 56181 (65.1%) 6935 (46.4%) 1462 (52.6%)
Severe 39468 30140 (34.9%) 8008 (53.6%) 1320 (47.4%)
Median time to severe (sd) 0.6 (1) 0.8 (1.3) 0.4 (0.7) 0.8 (1.1)
Survival
Alive 85212 72388 (83.9%) 10247 (68.6%) 2577 (93.1%)
Deceased 18820 13933 (16.1%) 4696 (31.4%) 191 (6.9%)
Median time to death (sd) 15.3 (6.4) 15.9 (5.8) 22.4 (32.5) 36.9 (41.4)
Discharge
Discharged 101032 83675 (96.9%) 14650 (98%) 2707 (97.7%)
Not Discharged 3005 2645 (3.1%) 295 (2%) 65 (2.3%)
Median time to first discharge (sd) 8.4 (4.2) 6.5 (3.6) 12.8 (6.1) 14.5 (18.3)
Readmission
Not Readmitted 89802 75170 (87.1%) 12312 (82.4%) 2320 (83.5%)
Readmitted 14237 11151 (12.9%) 2626 (17.6%) 460 (16.5%)
Median time to first readmission (sd) 28.1 (20.3) 26.4 (17.6) 31.9 (33.7) 38.1 (28.4)
Median number of readmissions (sd) 0.1 (0.3) 0 (0) 0.1 (0.3) 0.1 (0.3)
write.csv(tableone_adult, 'tables/table1_adults.csv', row.names = FALSE)

Table One - Pediatrics

# specify the order of Table 1
row_order_pediatric <- c(
  "All Patients", "Female", "Male", "Unknown Sex",
  "0-2", "3-5", "6-11", "12-17", "18-25",
  "26-49", "50-69", "70-79", "80+", "Unknown Age",
  "American Indian", "Asian", "Black",
  "Hawaiian/Pacific Islander",
  "Hispanic/Latino", "White", "Other",
  top_comorb_pediatrics, 
  "Median Elixhauser score  (sd) ",
  "Median pre admission cns  (sd) ",
  "Median pre admission pns  (sd) ",
  "Non-Severe", "Severe",
  "Median time to severe  (sd) ",
  "Alive", "Deceased",
  "Median time to death  (sd) ",
  "Discharged", "Not Discharged",
  "Median time to first discharge  (sd) ",
  "Not Readmitted", "Readmitted",
  "Median time to first readmission  (sd) ",
  "Median number of readmissions  (sd) "
  )

tableone_pediatric <- create_tableone(demo_table = demo_table_pediatric,
                                      clinical_table = clinical_table_pediatric_clean)  %>%
  rbind(., comorbidity_table_pediatric %>%
          select(Comorbidity, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>% 
          rename(`Table 1` = "Comorbidity",
                  N = "Comorb_Total",
                  None = "NNC_N_%",
                  Central = "CNS_N_%",
                  Peripheral = "PNS_N_%")) %>% 
  slice(match(row_order_pediatric, `Table 1`))

kbl(tableone_pediatric %>%
      rename("No Neurological Condition (NNC)" = None)) %>%
  add_header_above(c(
  " ",
  "Total Patients" = 1,
  "Neurological Disease" = 3
  )) %>%
  kable_paper("striped", full_width = F) %>%
  pack_rows("Sex", 2, 4) %>%
  pack_rows("Age", 5, 8) %>%
  pack_rows("Race & Ethnicity", 9, 15) %>%
  pack_rows("Past Medical History", 16, 22) %>%
  pack_rows("Severity", 23, 25) %>%
  pack_rows("Survival", 26, 28) %>%
  pack_rows("Discharge", 29, 31) %>%
  pack_rows("Readmission", 32, 35)
Total Patients
Neurological Disease
Table 1 N No Neurological Condition (NNC) Central Peripheral
All Patients 2198 2019 (100%) 163 (100%) 16 (100%)
Sex
Female 1026 954 (47.3%) 62 (38%) 10 (62.5%)
Male 1172 1065 (52.7%) 101 (62%) 6 (37.5%)
Unknown Sex 0 0 (0%) 0 (0%) 0 (0%)
Age
0-2 769 747 (37.3%) 22 (15.8%) 0 (0%)
3-5 275 251 (12.5%) 24 (17.3%) 0 (0%)
6-11 403 368 (18.4%) 33 (23.7%) 2 (16.7%)
12-17 707 637 (31.8%) 60 (43.2%) 10 (83.3%)
Race & Ethnicity
American Indian 1 1 (0%) 0 (0%) 0 (0%)
Asian 61 52 (2.6%) 9 (5.8%) 0 (0%)
Black 184 166 (8.3%) 16 (10.4%) 2 (12.5%)
Hawaiian/Pacific Islander 1 1 (0%) 0 (0%) 0 (0%)
Hispanic/Latino 20 20 (1%) 0 (0%) 0 (0%)
White 665 608 (30.2%) 47 (30.5%) 10 (62.5%)
Other 1249 1163 (57.8%) 82 (53.2%) 4 (25%)
Past Medical History
Alcohol abuse 212 186 ( 9.2 %) 25 ( 15.3 %) 1 ( 6.2 %)
Drug abuse 207 190 ( 9.4 %) 16 ( 9.8 %) 1 ( 6.2 %)
Weight loss 123 109 ( 5.4 %) 13 ( 8 %) 1 ( 6.2 %)
Cardiac arrhythmias 109 94 ( 4.7 %) 12 ( 7.4 %) 3 ( 18.8 %)
Median Elixhauser score (sd) 2.2 (4.5) 0 (0) 4.4 (9.2) 2.5 (3.6)
Median pre admission cns (sd) 0.2 (0.7) 0.1 (0.2) 1.9 (3.9) 0 (0)
Median pre admission pns (sd) 0 (0) 0 (0) 0 (0) 0.2 (0.6)
Severity
Non-Severe 1674 1590 (78.5%) 72 (48.6%) 12 (70.6%)
Severe 517 436 (21.5%) 76 (51.4%) 5 (29.4%)
Median time to severe (sd) 2.4 (5.3) 3.8 (9.5) 10.8 (33.1) 0.9 (2.5)
Survival
Alive 2164 2001 (99%) 145 (94.2%) 18 (100%)
Deceased 29 20 (1%) 9 (5.8%) 0 (0%)
Median time to death (sd) 12.6 (36.3) 53.5 (76.3) 5.8 (10.2) 95 (268.7)
Discharge
Discharged 2133 1964 (97.2%) 151 (98.7%) 18 (100%)
Not Discharged 58 56 (2.8%) 2 (1.3%) 0 (0%)
Median time to first discharge (sd) 5.1 (4.2) 3.8 (4.2) 5.8 (2.7) 5.9 (4.5)
Readmission
Not Readmitted 1844 1707 (84.8%) 123 (83.7%) 14 (82.4%)
Readmitted 332 305 (15.2%) 24 (16.3%) 3 (17.6%)
Median time to first readmission (sd) 20 (20.2) 20.1 (19) 75.3 (86) 14.4 (22.8)
Median number of readmissions (sd) 0.1 (0.3) 0 (0) 0.2 (0.4) 0.4 (0.5)
write.csv(tableone_pediatric, 'tables/table1_pediatric.csv', row.names = FALSE)

Demographic & Clinical Course Analysis

Next we will evaluate characteristics among patients stratified by neurological status: 1) patients with no neurological condition (NNC), 2) patients with a central nervous system condition (CNS), and 3) patients with a peripheral nervous system condition (PNS).

Categorical variables will be evaluated with chi-square tests, while continuous variables will be evaluated with a non-parametric anova (Kruskal-Wallis) test.

1. Conduct chi-squared tests for categorical demographic variables

compute_chi_square <- function(tableone, demo_table) {
  
  N = tableone %>% select(N) %>% head(1) %>% as.numeric()

  # sum of all neuro condition groups
  cns_n = as.numeric(gsub( " .*$", "", tableone[1,"Central"]))
  pns_n = as.numeric(gsub( " .*$", "", tableone[1,"Peripheral"]))
  none_n = as.numeric(gsub( " .*$", "", tableone[1,"None"]))
  
  # sum up the total counts of each demographic variable across healthcare systems
  tableOne_sums <- demo_table %>%
    group_by(variable, Demo_var, Demo_var_i) %>%
    summarise(across(
      starts_with("n_var"),
      function(x) sum(x, na.rm = TRUE)
    ), .groups = "drop")
  
  # create list of demographic/clinical variables
  vars = tableOne_sums$variable
  
  # for binary variables, we should group by category 
  # Thus, only run the analysis for sex.male rather than both sex.female AND sex.male which would be redundant
  # we will also remove `age_group.unknown` since their is only 1 patient here
  exclude <- c("readmitted.false", "sex.female", "sex.other", "survival.deceased", "severity.non-severe",
               "covid_discharged.discharged", "age_group.Unknown")
  vars <- vars[!vars %in% exclude]
  
  # create empty list for chi.square test results
  chi_result_list = list()
  
  # for each variable, we will run the chi.square test
  for(i in vars) {
    test = calc_chisq(i, tableOne_sums, none_n, pns_n, cns_n)
    X2 <- round(test$statistic, 4)
    p_value <- test$p.value
    df <- test$parameter
    Variable = paste(i)
    
    chi_results <- cbind(Variable, X2, p_value, df) %>% 
      data.frame() %>% 
      mutate(p_value = as.numeric(p_value))
    
    rownames(chi_results) <- NULL
    
    chi_result_list[[i]] <- chi_results
  }
  
  # save list of chi.square test results
  chisq_results <- bind_rows(chi_result_list)
  
  return(chisq_results)
  
  
  }

Combined Adult & Pediatric Chi-Square Results

chisq_combine <- compute_chi_square(tableone = tableone_combine %>%
                                      filter(!`Table 1` == 'Discharged',
                                             !`Table 1` == 'Not Discharged'),
                                    demo_table = demo_table_combine %>%
                                      filter(!variable == 'covid_discharged.discharged',
                                             !variable == 'covid_discharged.not discharged'))

datatable(chisq_combine %>% arrange(p_value))

Adult Chi-Square Results

chisq_adult <- compute_chi_square(tableone = tableone_adult %>%
                                      filter(!`Table 1` == 'Discharged',
                                             !`Table 1` == 'Not Discharged'),
                                    demo_table = demo_table_adult %>%
                                      filter(!variable == 'covid_discharged.discharged',
                                             !variable == 'covid_discharged.not discharged'))

datatable(chisq_adult %>% arrange(p_value))

Pediatric Chi-Square Results

chisq_pediatric <- compute_chi_square(tableone = tableone_pediatric %>%
                                      filter(!`Table 1` == 'Discharged',
                                             !`Table 1` == 'Not Discharged'),
                                    demo_table = demo_table_pediatric %>%
                                      filter(!variable == 'covid_discharged.discharged',
                                             !variable == 'covid_discharged.not discharged'))

datatable(chisq_pediatric %>% arrange(p_value))

2. Conduct non-parametric anova (Kruskal-Wallis) tests for continuous clinical variables

compute_kruskal <- function(clinical_table) {

  # create empty list to save processed variables
  processed_list_figs <- list()
  
  # create a list of clinical variables to further pre-process
  vars_to_process <- unique(clinical_table$name)
  
  # for each variable, we will remove the [min, max] as before
  for (i in vars_to_process) {
    mod_table <- clinical_table %>%
      rename("var" = name) %>%
      filter(var == i) %>%
      mutate(
        site = toupper(site),
        None = sub("(\\(.*|\\[.*)", "", None),
        Peripheral = sub("(\\(.*|\\[.*)", "", Peripheral),
        Central = sub("(\\(.*|\\[.*)", "", Central)
      ) %>%
      mutate(across(None:Central, as.numeric))
  
    processed_list_figs[[i]] <- mod_table
  }
  
  clinical_table_anova <- bind_rows(processed_list_figs)
  
  # format the continuous variables
  cont_vars = clinical_table_anova %>%
    mutate(
      # create a variable without mean/median prefix
      grouped_var = gsub("Mean |Median | \\[Min, Max\\]| \\(SD\\)", "", var)
    ) %>%
    pivot_longer(
      cols = None:Central,
      names_to = "type"
    ) %>%
    mutate(type = factor(type, levels = c("None", "Central", "Peripheral"))) %>% 
    # select variables to analyze
    filter(grouped_var %in% c("Elixhauser score",
                              "pre admission cns", 
                              "pre admission pns", 
                              "time to death", 
                              "time to first discharge", 
                              "time to severe",
                              "number of readmissions",
                              "time to first readmission")) %>% 
    distinct()
  
  # create function to conduct anova
  cont_var_results <- function(variable) {
    
    df <- cont_vars %>% 
    filter(grouped_var == paste(variable))
    
    # kruskal.test calculates the kruskal-Wallis H-statistic 
    # does not assume normality between groups
    # also called the one-way ANOVA on ranks
    test = kruskal.test(df$value ~ df$type)
    
  }
  
  # create empty list for Kruskal-Wallis results
  cont_vars_list = list()
  
  # create list of unique variables
  outcome_cont_vars = unique(cont_vars$grouped_var)
  
  for(i in outcome_cont_vars) {
    
    test = cont_var_results(i)
    X2 <- round(test$statistic, 4)
    p_value <- test$p.value
    df <- test$parameter
    Variable = paste(i)
    
    kw <- cbind(Variable, X2, p_value, df) %>% 
      data.frame() 
    
    rownames(kw) <- NULL
    
    cont_vars_list[[i]] <- kw
  
    
  }
  
  anova_results <- bind_rows(cont_vars_list)
  
  }

Combined Adult & Pediatric Kruskal-Wallis Results

kw_combine <- compute_kruskal(clinical_table = clinical_table_combine)
datatable(kw_combine %>% arrange(p_value))

Adult Kruskal-Wallis Results

Note: median CNS is 0 for all sites (reason for the NA values)

kw_adult <- compute_kruskal(clinical_table = clinical_table_adult)
datatable(kw_adult %>% arrange(p_value))

Pediatric Kruskal-Wallis Results

kw_pediatric <- compute_kruskal(clinical_table = clinical_table_pediatric)
datatable(kw_pediatric %>% arrange(p_value))

3. Combine chi-square and Kruskal-Wallis results

We will evaluate significance when controlling for False Discovery Rate (FDR)

tableone_stats <- function(chisq_results, anova_results, population) {

  demo_stats <- rbind(chisq_results %>% select(Variable, p_value),
                      anova_results %>% select(Variable, p_value))
  
  # adjust p-values with FDR
  demo_stats$adj_p_value <- p.adjust(demo_stats$p_value, method = "fdr")
  
  # round & format p-value 
  demo_stats <- demo_stats %>% 
    mutate(p_value = as.numeric(p_value),
           pvalue = if_else(p_value < 0.001, "< 0.001", paste(round(p_value, 3))),
           adj.p_value = if_else(adj_p_value < 0.001, "< 0.001", paste(round(adj_p_value, 3))))
  
  # tidy up the demographics stat table
  demo_stats_tidy <- demo_stats %>% 
    rename(`Unadjusted P-value (raw)` = p_value,
           `FDR Adjusted P-value (raw)` = adj_p_value,
           `Unadjusted P-value (tidy)` = pvalue,
           `FDR Adjusted P-value (tidy)` = adj.p_value) %>% 
    select(Variable, `Unadjusted P-value (raw)`, `FDR Adjusted P-value (raw)`, `Unadjusted P-value (tidy)`, `FDR Adjusted P-value (tidy)`)

  write.csv(demo_stats_tidy, paste0("tables/Table1_pvals_", population, ".csv"), row.names = FALSE)
  
  return(demo_stats_tidy)
  
}

4. Incorporate Comorbidities

We will also add the top 4 comorbidities for each population to our table one. For the combined adult and pediatric table, we will use the top 4 comorbidities of the adult population.

# define matrix for each comorbidity
comorb_matrix <- function(comorbidity_table, comorbidity, population) {

    if(population=='Combined') {
      comorbidity_table = comorbidity_table
    } else if (population=="Adult") {
      comorbidity_table = comorbidity_table %>% 
        filter(population == "Adult")
    } else if (population=="Pediatric") {
      comorbidity_table = comorbidity_table %>% 
        filter(population == "Pediatric")
    } else {
      print('incorrect population specification')
    }
  
  mat_comorb <- comorbidity_table %>%
    filter(Comorbidity == paste(comorbidity)) %>%
    group_by(Comorbidity) %>% 
    mutate(None_comorb = sum(None_Total, na.rm = TRUE),
    CNS_comorb = sum(CNS_Total, na.rm = TRUE),
    PNS_comorb = sum(PNS_Total, na.rm = TRUE)) %>%
    ungroup() %>%
    distinct(None_comorb, CNS_comorb, PNS_comorb) %>%
    mutate(none_dif = neuro_pt_counts_sum$None_n - None_comorb,
    pns_dif = neuro_pt_counts_sum$PNS_n - PNS_comorb,
    cns_dif = neuro_pt_counts_sum$CNS_n - CNS_comorb) %>%
    data.frame() %>%
    #as.integer() %>%
    matrix(nrow = 2, ncol = 3, byrow = TRUE)

  comorb_results <- chisq.test(unlist(mat_comorb))
  
  results_df <- data.frame(Variable = paste(comorbidity),
                                X2 = round(comorb_results$statistic, 4),
                                p_value = comorb_results$p.value,
                                df = comorb_results$parameter)
  row.names(results_df) <- NULL
  
  return(results_df)
}

## combined comorbidities
comorbidity_combined_chi_list <- list()

for(i in top_comorb_adults) {
  
  comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Combined")
  comorbidity_combined_chi_list[[i]] <- comorb_chi_results
}

comorbidity_combined_chi <- comorbidity_combined_chi_list %>%
  bind_rows() 

chisq_combine_comorb <- chisq_combine %>%
  mutate(X2 = as.numeric(X2),
  df = as.numeric(df)) %>%
  bind_rows(comorbidity_combined_chi) 



## adult comorbidities
comorbidity_adult_chi_list <- list()

for(i in top_comorb_adults) {
  
  comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Adult")
  comorbidity_adult_chi_list[[i]] <- comorb_chi_results
}

comorbidity_adult_chi <- comorbidity_adult_chi_list %>%
  bind_rows() 

chisq_adult_comorb <- chisq_adult %>%
  mutate(X2 = as.numeric(X2),
  df = as.numeric(df)) %>%
  bind_rows(comorbidity_adult_chi) 

## pediatric comorbidities
comorbidity_pediatric_chi_list <- list()

for(i in top_comorb_pediatrics) {
  
  comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Pediatric")
  comorbidity_pediatric_chi_list[[i]] <- comorb_chi_results
}

comorbidity_pediatric_chi <- comorbidity_pediatric_chi_list %>%
  bind_rows() 

chisq_pediatric_comorb <- chisq_pediatric %>%
  mutate(X2 = as.numeric(X2),
  df = as.numeric(df)) %>%
  bind_rows(comorbidity_pediatric_chi) 

Table One - Combined Analysis

combine_tableone_stats <- tableone_stats(chisq_results = chisq_combine_comorb,
                                         anova_results = kw_combine,
                                         population = "combined") 
  

datatable(combine_tableone_stats)

Table One - Adult Analysis

adult_tableone_stats <- tableone_stats(chisq_results = chisq_adult_comorb,
                                         anova_results = kw_adult,
                                         population = "adult") 
  

datatable(adult_tableone_stats)

Table One - Pediatric Analysis

pediatric_tableone_stats <- tableone_stats(chisq_results = chisq_pediatric_comorb,
                                         anova_results = kw_pediatric,
                                         population = "pediatric") 
  

datatable(pediatric_tableone_stats)

Neurological Diagnosis Analysis

Next, we will evaluate the diagnoses captured with the CNS and PNS groups. Specifically, we will examine the frequency of CNS and PNS diagnoses by patient age group and by clinical outcomes (mortality and COVID-19 severity).

1. Format neurological icd code data

# import list of ICD-9/ICD-10 codes
neuro_icds_10 <-
  readxl::read_excel("public-data/2020-02-22_neuro-icd10_CNSvPNS.xlsx", sheet = 2) %>% 
  rename(
    "icd" = `ICD-10`,
    "pns_cns" = `Nervous system Involvement (1=central, 2=peripheral, 3=irrelevant)`,
    "type" = `ICD-10_type (1=first three alphanumeric code only, 2=digits after decimal point)`) %>%
  filter(type == 1) %>% 
  mutate(pns_cns = as.factor(pns_cns) %>% fct_recode(
    Central = "1",
    Peripheral = "2"
  )) %>%
  distinct(icd, `Neurological Disease Category`, pns_cns, `ICD-10 Description Revised`) %>%
  rename("description" = `ICD-10 Description Revised`) %>%
  mutate(concept_type = "icd-10")

neuro_icds_9 <- read.csv("public-data/icd9_tab_CNSvPNS.csv") %>%
  rename(
    "Neurological Disease Category" = "Neurological.Disease.Category",
    "pns_cns" = `Nervous.system.Involvement..1.central..2.peripheral.`,
    "icd_description" = `icd9_desc_revised`
  ) %>%
  mutate(
    pns_cns = as.factor(pns_cns) %>% fct_recode(
      Central = "1",
      Peripheral = "2"
    ),
    concept_type = "DIAG-ICD9"
  ) %>%
  select(icd, `Neurological Disease Category`, pns_cns, icd_description) %>%
  rename("description" = icd_description) %>%
  mutate(concept_type = "icd-9")

# bind list of ICD-9/ICD-10 codes
icds <- rbind(neuro_icds_10, neuro_icds_9)

2. Calculate total number of neurological diagnoses for each adults & pediatrics

compute_neuro_counts <- function(sorted_sites, sample_size, is_pediatric = FALSE) {
  
    # create empty list to save processed diagnostic data
    diag_table_list <- list()
  
    if(is_pediatric==FALSE) {
      population = "adults"
      } else {
      population = "pediatrics"
      }
  
  # for each healthcare system, we will calculate the total number of patients with each ICD code
  for (i in sorted_sites) {
    tmp <- get(i)
    tmp_diag <- tmp[[c(
      "first_hosp_results",
      "icd_tables",
      paste0("icd_tables_", population),
      "demo_table"
    )]]
    tmp_diag$site <- tmp[["site"]]
    diag_table_list[[i]] <- tmp_diag %>%
      filter(variable == "sex.female" | variable == "sex.male" | variable == "sex.other") %>%
      select(site, contains("n_var"))
  }
  
  diag_table_wide <- bind_rows(diag_table_list)
  
  # we will count the total number and percent across healthcare systems
  diag_counts <- colSums(Filter(is.numeric, diag_table_wide), na.rm = TRUE) %>% 
    data.frame() %>% 
    mutate(perc = round(./sample_size * 100, 1))
  
  # format diagnostic table
  diag_counts <- tibble::rownames_to_column(diag_counts, "icd") %>%
    mutate(icd = gsub("n_var_", "", icd)) %>%
    rename(`Number of Patients` = ".") %>%
    full_join(., icds, by = "icd") %>%
    dplyr::arrange(desc(`Number of Patients`)) %>% 
    mutate(`Number of Patients (% of Cohort)` = paste(`Number of Patients`, "(", perc, ")", sep = " ")) %>% 
    select(pns_cns, `Neurological Disease Category`, concept_type, 
           icd, description, `Number of Patients (% of Cohort)`) %>% 
      rename(Code = "icd",
           `Nervous System` = "pns_cns",
           Description = "description",
           `ICD Version` = "concept_type") %>% 
    mutate(`Number of Patients (% of Cohort)` = if_else(`Number of Patients (% of Cohort)` == "NA ( NA )", "0 ( 0 )", `Number of Patients (% of Cohort)`),
           Code = if_else(Code == "NN", "No Neurological Disease (NNC)", Code))
  
  return(diag_counts)
  
  }
# create vectors containing patient sample sizes
n_adult <- as.numeric(tableone_adult$N)[1]
n_pediatric <- as.numeric(tableone_pediatric$N)[1]

# calculate the total neurological patient counts 
neuro_counts_adult <- compute_neuro_counts(sorted_sites = adult_sites,
                                           sample_size = n_adult, 
                                           is_pediatric = FALSE)

neuro_counts_pediatric <- compute_neuro_counts(sorted_sites = pediatric_sites,
                                           sample_size = n_pediatric, 
                                           is_pediatric = TRUE)

neuro_counts_combined <- neuro_counts_adult %>% 
  rename(`Number of Adult Patients (% of Cohort)` = `Number of Patients (% of Cohort)`) %>% 
  left_join(., neuro_counts_pediatric %>% 
              select(`ICD Version`, Code, `Number of Patients (% of Cohort)`) %>% 
              rename("Number of Pediatric Patients (% of Cohort`)" = `Number of Patients (% of Cohort)`))

write.csv(neuro_counts_combined, paste0("tables/Supp.Table2_diagnoses", ".csv"), row.names = FALSE)

Number (%) of Adult Patients with each Neurological Diagnosis

datatable(neuro_counts_adult) 

Number (%) of Pediatric Patients with each Neurological Diagnosis

datatable(neuro_counts_pediatric) 

Evaluate Diagnoses by Age & Outcome

compute_stratified_neuro_counts <- function(tableone, sorted_sites, is_pediatric = FALSE) {
  
  # create empty list to save processed diagnostic data
    diag_table_list <- list()
  
    if(is_pediatric==FALSE) {
      population = "adults"
      } else {
      population = "pediatrics"
      }
  
  # for each healthcare system, we will calculate the total number of patients with each ICD code
  for (i in sorted_sites) {
    #print(i)
    tmp <- get(i)
    tmp_diag <- tmp[[c(
      "first_hosp_results",
      "icd_tables",
      paste0("icd_tables_", population),
      "demo_table"
    )]]
    tmp_diag$site <- tmp[["site"]]
    try(
    diag_table_list[[i]] <- tmp_diag %>%
      select(site, variable, starts_with("n_var")) %>% 
      pivot_longer(cols = starts_with("n_var"), names_to = "code", values_to = "freq") %>% 
      ungroup()
    )
  }

  diag_table_wide_all <- bind_rows(diag_table_list) %>% 
      group_by(variable, code) %>% 
    # total number of patients who had each code for each variable across sites
      mutate(total_n_var  = sum(freq, na.rm = TRUE)) %>% 
    distinct(variable, code, total_n_var) %>% 
    mutate(code = gsub("n_var_", "", code),
           variable = gsub("Severity.", "", variable),
           variable = gsub("Survival.", "", variable),
           variable = gsub("readmitted", "", variable),
           variable = gsub("age_group", "", variable),
           variable = gsub("covid_discharged", "", variable),
           variable = if_else(variable == ".TRUE", "Readmitted", variable)) %>% 
    left_join(., icds %>% rename(code = "icd"), by = "code") %>% 
    ungroup() %>% 
    filter(!concept_type == "icd-9")


  # refactor neuro status
  diag_table_wide_all$pns_cns <- factor(diag_table_wide_all$pns_cns,
                                        levels=c("Central", "Peripheral"),
                                        labels=c("CNS", "PNS"))
  
  
  return(diag_table_wide_all)
  
  }
stratified_neuro_counts_adult <- compute_stratified_neuro_counts(sorted_sites = adult_sites,
                                                                 is_pediatric = FALSE,
                                                                 tableone = tableone_adult)

stratified_neuro_counts_pediatric <- compute_stratified_neuro_counts(sorted_sites = pediatric_sites,
                                                                 is_pediatric = TRUE,
                                                                 tableone = tableone_pediatric)
compute_stratified_neuro_counts_age <- function(stratified_neuro_counts, tableone) {
  
  stratified_neuro_counts$variable <- factor(stratified_neuro_counts$variable, 
                                     levels=c(".00to02", ".03to05", ".06to11", ".12to17",
                                              ".18to25", ".26to49", ".50to69", ".70to79", ".80plus"), 
                                     labels=c("0-2 Years", "3-5 Years", "6-11 Years", "12-17 Years",
                                              "18-25 Years", "26-49 Years", "50-69 Years", "70-79 Years", "80+ Years"))
  
  # total counts
  n_0_2 <- get_table_n(tableone, var = "0-2")
  n_3_5 = get_table_n(tableone, var = "3-5")
  n_6_11 = get_table_n(tableone, var = "6-11")
  n_12_17 = get_table_n(tableone, var = "12-17")
  n_18_25 = get_table_n(tableone, var = "18-25")
  n_26_49 = get_table_n(tableone, var = "26-49")
  n_50_69 = get_table_n(tableone, var = "50-69")
  n_70_79 = get_table_n(tableone, var = "70-79")
  n_80 = get_table_n(tableone, var = "80+")
  
  age_df <- data.frame("variable" = c("0-2 Years",
                                    "3-5 Years", 
                                    "6-11 Years", 
                                    "12-17 Years",
                                    "18-25 Years", 
                                    "26-49 Years", 
                                    "50-69 Years", 
                                    "70-79 Years", 
                                    "80+ Years"),
                     "n_var" = c(n_0_2,
                               n_3_5,
                               n_6_11,
                               n_12_17,
                               n_18_25,
                               n_26_49,
                               n_50_69,
                               n_70_79,
                               n_80))

  
  stratified_neuro_counts <- stratified_neuro_counts %>% 
    filter(grepl("Years", variable),
                !code == "NN", 
                !total_n_var == 0) %>% 
         # total_n would be calculating total codes across each age group which is not true reflection of patient counts b/c patients can have more than one code
         #mutate(total_n = sum(total_n_var)) %>% 
         ungroup() %>% 
         left_join(., age_df, by = "variable") %>% 
         mutate(n_var = as.numeric(n_var),
                perc = as.character(round(total_n_var/n_var*100,1)),
                perc = if_else(perc < 0.1, '<0.1', perc),
                perc = paste0(perc, "%")) 
  
  return(stratified_neuro_counts)
  
  
}
stratified_neuro_counts_age_adult <- compute_stratified_neuro_counts_age(stratified_neuro_counts = stratified_neuro_counts_adult,
                                                                         tableone = tableone_adult)

stratified_neuro_counts_age_pediatric <- compute_stratified_neuro_counts_age(stratified_neuro_counts = stratified_neuro_counts_pediatric,
                                                                 tableone = tableone_pediatric)

Diagnoses by Adult and Pediatric Populations

n_0_2 <- get_table_n(tableone_combine, var = "0-2")
#n_3_5 <- get_table_n(tableone_combine, var = "3-5")
n_6_11 <- get_table_n(tableone_combine, var = "6-11")
n_12_17 <- get_table_n(tableone_combine, var = "12-17")

total_ped_age_count = as.numeric(n_0_2) +  as.numeric(n_6_11) + as.numeric(n_12_17)

stratified_neuro_counts_age_pediatric_reformat = stratified_neuro_counts_age_pediatric %>%
  select(variable, pns_cns, description, n_var, total_n_var) %>%
  mutate(variable = "0-17 Years") %>%
  group_by(pns_cns, description) %>%
  mutate(total_n_var = sum(total_n_var, na.rm = TRUE)) %>%
  ungroup() %>%
  # n_var is the total per `age_group` stratification
  select(-n_var) %>%
  distinct() %>%
  mutate(perc = round(total_n_var/total_ped_age_count*100,1),
         perc = if_else(perc < 0.1, '<0.1', as.character(perc)),
         perc = paste0(perc, "%"))

stratified_neuro_counts_age_combined <- stratified_neuro_counts_age_adult %>%
  select(variable, pns_cns, description, total_n_var, perc) %>%
  rbind(., stratified_neuro_counts_age_pediatric_reformat %>%
          select(variable, pns_cns, description, total_n_var, perc))

stratified_neuro_counts_age_combined$variable <- factor(stratified_neuro_counts_age_combined$variable,
                                                                      levels=c("0-17 Years", "18-25 Years", "26-49 Years", "50-69 Years", "70-79 Years", "80+ Years"))
group.colors <- c(NNC = "gray", PNS = "slateblue", CNS ="tomato")

adult_ped_combined_plot <- ggplot(stratified_neuro_counts_age_combined,
                                                aes(x = total_n_var, y = reorder(description,
                                                                                 total_n_var), 
                                                    fill = pns_cns)) +
  geom_bar(stat = "identity") +
  geom_text(
    aes(x = total_n_var, y = description, label = perc),
    hjust = -0.1,
    #hjust="inward",
    size = 6,
    inherit.aes = TRUE) +
  scale_x_continuous(expand = expansion(mult = c(0,0.2))) + # to ensure percent labels are within the figure
  facet_grid(pns_cns ~ variable, scales = "free", space = "free_y") + 
  scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +   
  scale_fill_manual(values = group.colors) +
  xlab("Number of Patients") +
  theme(legend.position = "none",
        strip.text.y = element_text(size = 40),
        strip.text.x = element_text(size = 35),
        axis.text.y.left = element_text(size = 25, face = "bold"),
        axis.text.x = element_text(size = 25),
        axis.title.x = element_text(size = 35))

ggsave("figures/Fig3_diagnoses_by_age_adult_ped_18group.png", adult_ped_combined_plot, width = 40, height = 19, dpi = 300)

Evaluate Diagnoses by Outcome

stratified_neuro_counts_adult$population = "adult"
stratified_neuro_counts_pediatric$population = "pediatric"

stratified_neuro_counts_combine <- rbind(stratified_neuro_counts_adult, stratified_neuro_counts_pediatric)

## determine total number of patients who died or severe
n_death_adult = get_table_n(df = tableone_adult, var = "Deceased") %>% as.numeric()
n_severe_adult = get_table_n(df = tableone_adult, var = "Severe") %>% as.numeric()

n_death_pediatric = get_table_n(df = tableone_pediatric, var = "Deceased") %>% as.numeric()
n_severe_pediatric = get_table_n(df = tableone_pediatric, var = "Severe") %>% as.numeric()

stratified_neuro_counts_outcomes <- stratified_neuro_counts_combine %>% 
  filter(variable %in% c("Severe", "Deceased"),
         !code == "NN", 
         !total_n_var == 0) %>%          
         # total_n would be calculating total codes across each outcome group which is not true reflection of patient counts b/c patients can have more than one code
         #mutate(total_n = sum(total_n_var)) %>% 
         group_by(variable, description, population) %>% 
  mutate(total_n_var = sum(total_n_var)) %>% 
  ungroup() %>% 
  mutate(perc = case_when(variable == "Severe" & population == "adult" ~ round(total_n_var/n_severe_adult*100,1),
                                 variable == "Deceased" & population == "adult" ~ round(total_n_var/n_death_adult*100,1),
                                 variable == "Severe" & population == "pediatric" ~ round(total_n_var/n_severe_pediatric*100,1),
                                 variable == "Deceased" & population == "pediatric" ~ round(total_n_var/n_death_pediatric*100,1)),
         perc = if_else(perc < 0.1, '<0.1', as.character(perc)),
       perc = paste0(perc, "%")) %>% 
  select(-code, -concept_type) %>% 
  distinct()
adult_outcomes = ggplot(stratified_neuro_counts_outcomes %>% 
         filter(population == "adult") %>% 
           mutate(variable = as.factor(variable) %>% 
                                          fct_recode(
                                            Mortality = "Deceased")),
       aes(x = total_n_var, y = reorder(description, total_n_var), fill = pns_cns)) +
  geom_bar(stat = "identity") +
 geom_text(
    aes(x = total_n_var, y = description, label = perc),
    hjust = -0.1,
    #hjust="inward",
    size = 5,
    inherit.aes = TRUE) +
  scale_x_continuous(expand = expansion(mult = c(0,0.2))) + 
  facet_grid(pns_cns ~ variable, scales = "free", space = "free_y") + 
  scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +   
  scale_fill_manual(values = group.colors) +
  xlab("Number of Patients") + 
  ggtitle("B. Adult") + 
  theme(legend.position = "none",
        plot.title = element_text(size = 44, face = "bold", hjust = -0.5), 
        strip.text.y = element_text(size = 35),
        strip.text.x = element_text(size = 30),
        axis.text.y.left = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(size = 15),
        axis.title.x = element_text(size = 35))

ggsave("figures/SuppFig1_diagnoses_by_outcomes_adult.png", adult_outcomes, width = 27, height = 15, dpi = 300)
pediatric_outcomes = ggplot(stratified_neuro_counts_outcomes %>% 
         filter(population == "pediatric") %>% 
           mutate(variable = as.factor(variable) %>% 
                                          fct_recode(
                                            Mortality = "Deceased")),
       aes(x = total_n_var, y = reorder(description, total_n_var), fill = pns_cns)) +
  geom_bar(stat = "identity") +
 geom_text(
    aes(x = total_n_var, y = description, label = perc),
    hjust = -0.1,
    #hjust="inward",
    size = 5,
    inherit.aes = TRUE) +
  scale_x_continuous(expand = expansion(mult = c(0,0.2))) + 
  facet_grid(pns_cns ~ variable, scales = "free", space = "free_y") + 
  scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +   
  scale_fill_manual(values = group.colors) +
  xlab("") + 
  ggtitle("A. Pediatric") + 
  theme(legend.position = "none",
        plot.title = element_text(size = 44, face = "bold", hjust = -0.5), 
        strip.text.y = element_text(size = 35),
        strip.text.x = element_text(size = 30),
        axis.text.y.left = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(size = 15),
        axis.title.x = element_text(size = 35))

ggsave("figures/SuppFig1_diagnoses_by_outcomes_pediatric.png", pediatric_outcomes, width = 27, height = 9, dpi = 300)

Evaluate patients with “Both” CNS and PNS diseases

These patients were excluded from the primary analysis due to potential confounding.

# create empty list to store the counts of "both" patients
both_counts_list <- list()

# for each healthcare system, retrieve the number of excluded "both" patients recorded
for (i in sorted_sites) {
  n_both <- results[[paste(i)]][["first_hosp_results"]][["both_counts"]]
  
  both_counts_list[[i]] <- n_both
}

both_counts <- bind_rows(both_counts_list) %>% t() %>% data.frame() %>% sum()

print(paste(both_counts, "(", round(both_counts/(as.numeric(tableone_combine$N[1])+both_counts)*100,2), "%)")) 
## [1] "1990 ( 1.84 %)"

Comorbidity Analysis

Next, we will evaluate the frequency of comorbidities in our population and the associated risk that comorbidities may incur for acquiring a CNS or PNS diagnosis during COVID-19 hospitalization.

1. Create a table of counts for each comorbidity and stratified by neurological status.

comorbidity_table_adult <- comorb_table_wide %>%
  select(Comorbidity, population, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
  filter(population == "Adult") %>% 
  rename(N = "Comorb_Total",
         NNC = "NNC_N_%",
         CNS = "CNS_N_%",
         PNS = "PNS_N_%") %>% 
  mutate(N = paste0(N, " (", round(N/n_adult, 2)*100, "%)"))

comorbidity_table_pediatric <- comorb_table_wide %>%
  select(Comorbidity, population, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
  filter(population == "Pediatric") %>% 
  rename(N = "Comorb_Total",
         NNC = "NNC_N_%",
         CNS = "CNS_N_%",
         PNS = "PNS_N_%") %>% 
  mutate(N = paste0(N, " (", round(N/n_pediatric, 2)*100, "%)"))

supp_comorbidity_table <- rbind(comorbidity_table_adult, comorbidity_table_pediatric) %>% 
  arrange(Comorbidity)

write.csv(supp_comorbidity_table, "tables/SuppTable_Comorbidity_Counts.csv", row.names = FALSE)

2. Create vector counts of patients with comorbidity data

neuro_pt_counts_sum_adult <- neuro_pt_counts %>% 
  filter(population == "Adult") %>% 
  select(-population)

neuro_pt_counts_sum_pediatric <- neuro_pt_counts %>% 
  filter(population == "Pediatric") %>% 
  select(-population)

none_n_adult = neuro_pt_counts_sum_adult$None_n
cns_n_adult = neuro_pt_counts_sum_adult$CNS_n
pns_n_adult = neuro_pt_counts_sum_adult$PNS_n

none_n_pediatric = neuro_pt_counts_sum_pediatric$None_n
cns_n_pediatric = neuro_pt_counts_sum_pediatric$CNS_n
pns_n_pediatric = neuro_pt_counts_sum_pediatric$PNS_n

3. Calculate the relative risk (RR) of developing a CNS or PNS diagnosis for patients with each comorbidity

calculate_rr <- function(is_pediatric = FALSE) {
  
  if(is_pediatric == FALSE) {
    cns_n = cns_n_adult
    pns_n = pns_n_adult
    neuro_pt_counts_sum = neuro_pt_counts_sum_adult
    pop = "Adult"
    # create a list of comorbidities
    list_of_comorbs <- comorb_table_wide %>% 
      filter(population == paste(pop)) %>% 
      distinct(Comorbidity) 
    list_of_comorbs <- list_of_comorbs$Comorbidity
    } else  {
    cns_n = cns_n_pediatric
    pns_n = pns_n_pediatric
    neuro_pt_counts_sum = neuro_pt_counts_sum_pediatric
    pop = "Pediatric"
    # create a list of comorbidities
    list_of_comorbs <- comorb_table_wide %>% 
      filter(population == paste(pop)) %>% 
      distinct(Comorbidity) %>% 
      as.vector()
    list_of_comorbs <- list_of_comorbs$Comorbidity
  }
  
  
  # create empty list to save relative risk calculations
  rr_calcs <- list()
  
  # for each comorbidity, we will calculate the relative risk of having a CNS and PNS diagnosis
  for (i in list_of_comorbs) {
  
    # https://www.cdc.gov/csels/dsepd/ss1978/lesson3/section5.html
    # create matrix as:
    # exposed group - with outcome, without outcome
    # non exposed group - with outcome, without outcome
  
    # where variables ending with `_Total` are the total number of CNS, PNS, or NNC patients with the respective comorbidity.
    # variables ending with `_n` are the total numbers in the entire population
  
    # to calculate confidence intervals
  #https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_confidence_intervals/bs704_confidence_intervals8.html
  
    rr_cns <- comorb_table_wide %>%
      filter(population == paste(pop),
             Comorbidity == paste(i)) %>%
      mutate(a = CNS_Total, # number CNS w/comorb
             b = Comorb_Total - CNS_Total, # number of comorb pts w/o CNS
             c = cns_n - CNS_Total, # number of CNS pts w/o comorb
             d = sum(neuro_pt_counts_sum) - Comorb_Total - c, # number of pts w/o comorb or CNS
             risk_cns = a/(a+b),
             risk_non_cns = c/(c+d),
             rr_CNS = risk_cns/risk_non_cns,
             # for confidence intervals
             ci_1 = (b/a)/(a+b), # (n1-x1)/n1 
             ci_2 = (d/c)/(c+d), # (n2-x2)/n2
             ci_root = sqrt(ci_1 + ci_2),
             ci_lower = log(rr_CNS) - (1.96*ci_root),
             ci_upper = log(rr_CNS) + (1.96*ci_root),
             l_ci_cns = exp(ci_lower),
             u_ci_cns = exp(ci_upper)) %>%
      select(Comorbidity, Comorb_Total, None_Total, CNS_Total, PNS_Total, rr_CNS, l_ci_cns, u_ci_cns)
  
      rr_pns <- comorb_table_wide %>%
      filter(population == paste(pop),
             Comorbidity == paste(i)) %>%
      mutate(a = PNS_Total, # number PNS w/comorb
             b = Comorb_Total - PNS_Total, # number of comorb pts w/o PNS
             c = pns_n - PNS_Total, # number of PNS pts w/o comorb
             d = sum(neuro_pt_counts_sum) - Comorb_Total - c, # number of pts w/o comorb or PNS
             risk_pns = a/(a+b),
             risk_non_pns = c/(c+d),
             rr_PNS = risk_pns/risk_non_pns,
             # for confidence intervals
             ci_1 = (b/a)/(a+b),
             ci_2 = (d/c)/(c+d),
             ci_root = sqrt(ci_1 + ci_2),
             ci_lower = log(rr_PNS) - (1.96*ci_root),
             ci_upper = log(rr_PNS) + (1.96*ci_root),
             l_ci_pns = exp(ci_lower),
             u_ci_pns = exp(ci_upper)) %>%
      select(Comorbidity, rr_PNS, l_ci_pns, u_ci_pns)
  
      rr <- rr_cns %>%
        left_join(., rr_pns, by = c("Comorbidity"))
  
    rr_calcs[[i]] <- rr
  
  }
  
  rr_results <- bind_rows(rr_calcs) %>%
      mutate(rr_CNS = round(rr_CNS, 2),
           l_ci_cns = round(l_ci_cns, 2),
           u_ci_cns = round(u_ci_cns, 2),
           rr_PNS = round(rr_PNS, 2),
           l_ci_pns = round(l_ci_pns, 2),
           u_ci_pns = round(u_ci_pns, 2),
           `RR CNS (95% CI)` = paste(rr_CNS, "(", l_ci_cns, ",", u_ci_cns, ")"),
           `RR PNS (95% CI)` = paste(rr_PNS, "(", l_ci_pns, ",", u_ci_pns, ")")) %>%
    select(Comorbidity, rr_CNS, l_ci_cns, u_ci_cns, `RR CNS (95% CI)`,
           rr_PNS, l_ci_pns, u_ci_pns, `RR PNS (95% CI)`)
  
  # tidy up data
  write.csv(rr_results %>%
              arrange(desc(rr_CNS)) %>%
              select(Comorbidity, `RR CNS (95% CI)`, `RR PNS (95% CI)`), 
            paste0("tables/Supp.Table4_RR_", pop, ".csv"), row.names = FALSE)
  
    # reformat data
  rr_results_tidy_rr <- rr_results %>%
    select(Comorbidity, rr_CNS, rr_PNS) %>%
    pivot_longer(cols = rr_CNS:rr_PNS, names_to = "Neuro Status", values_to = "RR") %>%
    mutate(`Neuro Status` = if_else(`Neuro Status` == "rr_CNS", "CNS", "PNS"))
  
  rr_results_tidy_l_ci <- rr_results %>%
    select(Comorbidity, l_ci_cns, l_ci_pns) %>%
    pivot_longer(cols = l_ci_cns:l_ci_pns, names_to = "Neuro Status", values_to = "l_CI") %>%
    mutate(`Neuro Status` = if_else(`Neuro Status` == "l_ci_cns", "CNS", "PNS"))
  
  rr_results_tidy_u_ci <- rr_results %>%
    select(Comorbidity, u_ci_cns, u_ci_pns) %>%
    pivot_longer(cols = u_ci_cns:u_ci_pns, names_to = "Neuro Status", values_to = "u_CI") %>%
    mutate(`Neuro Status` = if_else(`Neuro Status` == "u_ci_cns", "CNS", "PNS"))
  
  # combine all tidy results
  rr_results_tidy <- rr_results_tidy_rr %>%
    left_join(., rr_results_tidy_l_ci,
                               by = c("Comorbidity", "Neuro Status")) %>%
    left_join(., rr_results_tidy_u_ci,
                               by = c("Comorbidity", "Neuro Status"))
  
  # order by CNS diagnoses with higher RR
  cns_top_results <- rr_results_tidy %>%
    filter(`Neuro Status` == "CNS") %>%
    arrange(RR) %>%
    select(Comorbidity)
  
  rr_results_tidy$Comorbidity <-  ordered(rr_results_tidy$Comorbidity, levels = cns_top_results$Comorbidity)
  
  cns_rr_plot <- ggplot(rr_results_tidy %>%
           filter(`Neuro Status` == "CNS"),
         aes(x = RR, y = Comorbidity)) +
    geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
      geom_errorbarh(aes(xmax = u_CI, xmin = l_CI), size = 0.7, height = 0.2, color = "gray50") +
      geom_point(size = 1.5, color = "tomato") +
      scale_x_continuous(breaks = seq(0, 4, 1), labels = seq(0, 4, 1),
                         limits = c(0,4)) +
    ylab("") +
    xlab("Relative Risk") +
    ggtitle("CNS Patients") + 
    theme(axis.text.y = element_text(face = "bold"))
  
  # arrange by top PNS results
  pns_top_results <- rr_results_tidy %>%
    filter(`Neuro Status` == "PNS") %>%
    arrange(RR) %>%
    select(Comorbidity)
  
  rr_results_tidy$Comorbidity <-  ordered(rr_results_tidy$Comorbidity, levels = pns_top_results$Comorbidity)
  
  
  pns_rr_plot <- ggplot(rr_results_tidy %>%
           filter(`Neuro Status` == "PNS"),
         aes(x = RR, y = Comorbidity)) +
    geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
      geom_errorbarh(aes(xmax = u_CI, xmin = l_CI), size = 0.70, height = 0.2, color = "gray50") +
      geom_point(size = 1.5, color = "slateblue") +
      scale_x_continuous(breaks = seq(0, 1.5, 0.5), labels = seq(0, 1.5, 0.5),
                         limits = c(0,1.5)) +
    ylab("") +
    xlab("Relative Risk") +
    ggtitle("PNS Patients") + 
    theme(axis.text.y = element_text(face = "bold"))
  
  pns_rr_plot <- set_panel_size(pns_rr_plot,
                            width  = unit(7, "cm"),
                            height = unit(6.5, "in"))
  
  cns_rr_plot <- set_panel_size(cns_rr_plot,
                            width  = unit(7, "cm"),
                            height = unit(6.5, "in"))
  
  #grid.arrange(cns_rr_plot, pns_rr_plot, ncol=2)
  
  ggsave(paste0("figures/Figure4.relative_risk_", pop, ".png"), grid.arrange(cns_rr_plot, pns_rr_plot, ncol=2), width = 15, height = 8, dpi = 300)
  
  }

Relative Risk (RR)

Here we plot the relative risk of acquiring a CNS or PNS diagnosis in adult patients who have each comorbidity of the ECI compared to those who do not.

We are not able to compute this analysis in pediatric patients, due to the overall low incidence of comorbidities and neurological illness during COVID-19 hospitalization.

LPCA deviance explained

During the local analysis for each participating healthcare system, we computed the top 10 principal components using logistic principal component analysis (LPCA). LPCA was performed on the matrix of npatients x 29 ECI comorbidities. Across healthcare systems, 10 principal components explained at least 75% of the model deviance. 

This supplementary analysis was performed in order to determine the best way to adjust for each patient’s comorbidity burden in our downstream survival models.

compare_deviance <- function(sorted_sites, is_pediatric=FALSE) {
  
  pca_dev <- list()
  
  if(is_pediatric==FALSE) {
    population = "adults"
    sorted_sites = sorted_sites[!sorted_sites%in% c("GOSH_results", "BCH_results")]
    } else {
    population = "pediatrics"
    }

  # for each healthcare system, we will calculate the total number of patients with each ICD code
  for (i in sorted_sites) {
    print(i)
    tmp <- get(i)
    tmp_pca <- tmp[[c(
      "comorbidities",
      paste0("comorb_", population),
      "deviance_expl"
    )]] %>% 
      data.frame() %>% 
      rename(dev_expl = '.')
    tmp_pca$site <- tmp[["site"]]
    pca_dev[[i]] <- tmp_pca
    
  }
  
  pca_df <- bind_rows(pca_dev)
  
  return(pca_df)
  
}

pca_adult <- compare_deviance(sorted_sites = adult_sites, is_pediatric = FALSE)
## [1] "APHP_results"
## [1] "FRBDX_results"
## [1] "UKFR_results"
## [1] "ICSM_results"
## [1] "HPG23_results"
## [1] "NUH_results"
## [1] "MGB_results"
## [1] "UPENN_results"
## [1] "UMICH_results"
## [1] "NWU_results"
## [1] "UPITT_results"
## [1] "H12O_results"
## [1] "UKY_results"
## [1] "VA1_results"
## [1] "VA2_results"
## [1] "VA3_results"
## [1] "VA4_results"
## [1] "VA5_results"
## [1] "UCLA_results"
pca_data <- bind_rows(pca_adult)


theme_set(theme_classic())

pca_plot <- ggplot(pca_data, 
       aes(x = fct_reorder(site, -dev_expl), y = dev_expl, group = 1)) +
  geom_point(size = 2.5) +
  geom_line() +
  xlab("Healthcare System") + 
  ylab("Proportion of Deviance explained") +  
  ylim(0,1) + 
  theme(axis.text.x=element_text(angle=90, hjust=1, face = "bold", size = 12),
        axis.text.y=element_text(face = "bold"), 
        axis.title.y = element_text(face = "bold", size = 15)); pca_plot

ggsave("figures/SuppFig_pca_deviance.png", pca_plot, height = 6, width = 6)
---
title: "**Patient Characteristics**"
author: "Meg Hutch, Ji Son, Trang Le, Chuan Hong, Xuan Wang, Zahra Shakeri"
date: "04/11/2023"
output:
  html_document:
    toc: true
    toc_float: true
    code_download: true
    theme: spacelab
---

This notebook describes the **106,229** adult and pediatric hospitalized COVID-19 positive patients that were studied as part of the [Consortium for Clinical Characterization of COVID-19 by EHR](https://covidclinical.net/) Neurology group. This analysis contains the results of patients from 21 different healthcare systems spanning 6 countries.

<center>![](docs/assets/neuro-stratification-diagram.png){width="373"}</center>

```{r message=FALSE, warning=FALSE}
library(tidyverse)
library(gridGraphics)
library(gridExtra)
library(ggpubr)
library(DT)
library(kableExtra)
library(cowplot)
library(RColorBrewer)
library(glue)
library(egg)
source("R/plot_theme.R")
source("R/demo_tables.R")
source("R/utils.R")
```

# **Import Data from each Healthcare System**

Each participating healthcare system ran the analysis locally on patient level data using our [customized R package](https://github.com/covidclinical/Phase2.1NeuroRPackage). The output of each local analysis consisted of aggregated counts and summary statistics (means, medians, hazard ratios, p-values, etc). Summary results from each healthcare system were then aggregated and are imported and then combined using the following code:

```{r message=FALSE, warning=FALSE}
# read in files from results folder 
# this folder contains all of the local healthcare system level analyses
rdas <- list.files(
  path = "results",
  pattern = ".rda",
  full.names = TRUE
)


for (rda in rdas) {
  load(rda)
}

rm(rdas, rda)

# create a list of participating healthcare systems from our study tracking spreadsheet
site_google_url <- "https://docs.google.com/spreadsheets/d/1epcYNd_0jCUMktOHf8mz5v651zy1JALD6PgzobrGWDY/edit?usp=sharing"

# load site parameters
site_params <- googlesheets4::read_sheet(site_google_url, sheet = 1)
site_avails <- googlesheets4::read_sheet(site_google_url, sheet = 2)

# filter the list of sites who ran the analysis
sorted_sites <- site_avails %>%
  filter(!is.na(date_v4_received)) %>%
  pull(siteid) %>%
  paste("results", sep = "_")

# list sites without race
sites_wo_race <- site_params %>%
  filter(!include_race) %>%
  pull(siteid)

# combine all rda files with 'results' in name
results <- mget(ls(pattern = "results"))

## load in the pre-processed comorbidity and meta-data
load("processed/comorbidity_table_cnps.rda")
load("processed/neuro_pt_counts.rda") 
```

# **Pre-processing**

The following code pre-processes the demographic and clinical course data in order to construct a Table 1.

**1. Format primary demographic characteristics**

```{r message=FALSE, warning=FALSE}
# create list of hospitals for adult and pediatric analyses
adult_sites = sorted_sites[!sorted_sites %in% c("BCH_results", "GOSH_results")]

pediatric_sites = sorted_sites[!sorted_sites %in% c("VA1_results", "VA2_results", "VA3_results", "VA4_results", "VA5_results")]

demo_table_adult <- create_demo_tableone(sorted_sites = adult_sites, is_pediatric = FALSE)

demo_table_pediatric <- create_demo_tableone(sorted_sites = pediatric_sites, is_pediatric = TRUE)

demo_table_combine <- rbind(demo_table_adult, demo_table_pediatric)
```

**2. Format clinical characteristics (i.e: continuous variables such as median time to discharge)**

```{r message=FALSE, warning=FALSE}
clinical_table_adult <- create_clinical_tableone(sorted_sites = adult_sites, is_pediatric = FALSE)

clinical_table_pediatric <- create_clinical_tableone(sorted_sites = pediatric_sites, is_pediatric = TRUE)

clinical_table_combine <- rbind(clinical_table_adult, clinical_table_pediatric)
```

**3. Conduct additional pre-processing of clinical variables**

```{r message=FALSE, warning=FALSE}
clinical_table_adult_clean <- clean_clinical_tables(clinical_table = clinical_table_adult)

clinical_table_pediatric_clean <- clean_clinical_tables(clinical_table = clinical_table_pediatric)

clinical_table_clean_combine <- rbind(clinical_table_adult_clean, clinical_table_pediatric_clean)
```

**4. Add Comorbidity data**

Here we create separate lists of the top 4 comorbidities in the adult and pediatric populations

```{r}
comorbidity_table_adults <- comorb_table_wide %>%
  filter(population == "Adult") %>% 
  arrange(desc(Comorb_Total)) %>% 
  slice(1:4) # return top comorbidities

comorbidity_table_pediatric <- comorb_table_wide %>%
  filter(population == "Pediatric") %>% 
  arrange(desc(Comorb_Total)) %>% 
  slice(1:4) # return top comorbidities

top_comorb_adults <- comorbidity_table_adults$Comorbidity
top_comorb_pediatrics <- comorbidity_table_pediatric$Comorbidity
```

# **Demographics**

## Table One - Combined

The below table contains the counts and summary stats of the entire cohort (adult & pediatric patients).

```{r}
neuro_pt_counts_sum <- colSums(neuro_pt_counts[,-1]) %>% 
  data.frame() %>% 
  t() %>% 
  data.frame()

# specify the order of Table 1
row_order_adult <- c(
  "All Patients", "Female", "Male", "Unknown Sex",
  "0-2", "3-5", "6-11", "12-17", "18-25",
  "26-49", "50-69", "70-79", "80+", "Unknown Age",
  "American Indian", "Asian", "Black",
  "Hawaiian/Pacific Islander",
  "Hispanic/Latino", "White", "Other",
  top_comorb_adults, 
  "Median Elixhauser score  (sd) ",
  "Median pre admission cns  (sd) ",
  "Median pre admission pns  (sd) ",
  "Non-Severe", "Severe",
  "Median time to severe  (sd) ",
  "Alive", "Deceased",
  "Median time to death  (sd) ",
  "Discharged", "Not Discharged",
  "Median time to first discharge  (sd) ",
  "Not Readmitted", "Readmitted",
  "Median time to first readmission  (sd) ",
  "Median number of readmissions  (sd) "
  )


tableone_combine <- create_tableone(demo_table = demo_table_combine,
                                    clinical_table = clinical_table_clean_combine) %>%
  rbind(., comorb_table_wide %>%
          group_by(Comorbidity) %>%
          mutate(Comorb_Total = sum(Comorb_Total, na.rm = TRUE),
          None_sum = sum(None_Total, na.rm = TRUE),
          CNS_sum = sum(CNS_Total, na.rm = TRUE),
          PNS_sum = sum(PNS_Total, na.rm = TRUE),
          None_perc = round(None_sum/neuro_pt_counts_sum$None_n*100,1),
          Central_perc = round(CNS_sum/neuro_pt_counts_sum$CNS_n*100,1),
          Peripheral_perc = round(PNS_sum/neuro_pt_counts_sum$PNS_n*100,1),
          None = paste(None_sum, "(", None_perc, ")"),
          Central = paste(CNS_sum, "(", Central_perc, ")"),
          Peripheral = paste(PNS_sum, "(", Peripheral_perc, ")")) %>%
          distinct(Comorbidity, Comorb_Total, None, Central, Peripheral) %>%
          rename(`Table 1` = "Comorbidity",
          N = "Comorb_Total")) %>%
  slice(match(row_order_adult, `Table 1`))

kbl(tableone_combine %>%
      rename("No Neurological Condition (NNC)" = None)) %>%
      add_header_above(c(
      " ",
      "Total Patients" = 1,
      "Neurological Disease" = 3
      )) %>%
      kable_paper("striped", full_width = F) %>%
      pack_rows("Sex", 2, 4) %>%
      pack_rows("Age", 5, 13) %>%
      pack_rows("Race & Ethnicity", 14, 20) %>%
      pack_rows("Past Medical History", 21, 27) %>%
      pack_rows("Severity", 28, 30) %>%
      pack_rows("Survival", 31, 33) %>%
      pack_rows("Discharge", 34, 36) %>%
      pack_rows("Readmission", 37, 40)

write.csv(tableone_combine, 'tables/table1_combined.csv', row.names = FALSE)
```

## Table One - Adults

```{r}
tableone_adult <- create_tableone(demo_table = demo_table_adult,
                                  clinical_table = clinical_table_adult_clean) %>%
                                  rbind(., comorbidity_table_adults %>%
                                          select(Comorbidity, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
                                          rename(`Table 1` = "Comorbidity",
                                                 N = "Comorb_Total",
                                                 None = "NNC_N_%",
                                                 Central = "CNS_N_%",
                                                 Peripheral = "PNS_N_%")) %>% 
  slice(match(row_order_adult, `Table 1`))

kbl(tableone_adult %>%
    rename("No Neurological Condition (NNC)" = None)) %>%
  add_header_above(c(
  " ",
  "Total Patients" = 1,
  "Neurological Disease" = 3
  )) %>%
  kable_paper("striped", full_width = F) %>%
  pack_rows("Sex", 2, 4) %>%
  pack_rows("Age", 5, 9) %>%
  pack_rows("Race & Ethnicity", 10, 16) %>%
  pack_rows("Past Medical History", 17, 23) %>%
  pack_rows("Severity", 24, 26) %>%
  pack_rows("Survival", 27, 29) %>%
  pack_rows("Discharge", 30, 32) %>%
  pack_rows("Readmission", 33, 36)

write.csv(tableone_adult, 'tables/table1_adults.csv', row.names = FALSE)
```

## Table One - Pediatrics

```{r}
# specify the order of Table 1
row_order_pediatric <- c(
  "All Patients", "Female", "Male", "Unknown Sex",
  "0-2", "3-5", "6-11", "12-17", "18-25",
  "26-49", "50-69", "70-79", "80+", "Unknown Age",
  "American Indian", "Asian", "Black",
  "Hawaiian/Pacific Islander",
  "Hispanic/Latino", "White", "Other",
  top_comorb_pediatrics, 
  "Median Elixhauser score  (sd) ",
  "Median pre admission cns  (sd) ",
  "Median pre admission pns  (sd) ",
  "Non-Severe", "Severe",
  "Median time to severe  (sd) ",
  "Alive", "Deceased",
  "Median time to death  (sd) ",
  "Discharged", "Not Discharged",
  "Median time to first discharge  (sd) ",
  "Not Readmitted", "Readmitted",
  "Median time to first readmission  (sd) ",
  "Median number of readmissions  (sd) "
  )

tableone_pediatric <- create_tableone(demo_table = demo_table_pediatric,
                                      clinical_table = clinical_table_pediatric_clean)  %>%
  rbind(., comorbidity_table_pediatric %>%
          select(Comorbidity, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>% 
          rename(`Table 1` = "Comorbidity",
                  N = "Comorb_Total",
                  None = "NNC_N_%",
                  Central = "CNS_N_%",
                  Peripheral = "PNS_N_%")) %>% 
  slice(match(row_order_pediatric, `Table 1`))

kbl(tableone_pediatric %>%
      rename("No Neurological Condition (NNC)" = None)) %>%
  add_header_above(c(
  " ",
  "Total Patients" = 1,
  "Neurological Disease" = 3
  )) %>%
  kable_paper("striped", full_width = F) %>%
  pack_rows("Sex", 2, 4) %>%
  pack_rows("Age", 5, 8) %>%
  pack_rows("Race & Ethnicity", 9, 15) %>%
  pack_rows("Past Medical History", 16, 22) %>%
  pack_rows("Severity", 23, 25) %>%
  pack_rows("Survival", 26, 28) %>%
  pack_rows("Discharge", 29, 31) %>%
  pack_rows("Readmission", 32, 35)

write.csv(tableone_pediatric, 'tables/table1_pediatric.csv', row.names = FALSE)
```

# **Demographic & Clinical Course Analysis**

Next we will evaluate characteristics among patients stratified by neurological status: 1) patients with no neurological condition (NNC), 2) patients with a central nervous system condition (CNS), and 3) patients with a peripheral nervous system condition (PNS).

Categorical variables will be evaluated with chi-square tests, while continuous variables will be evaluated with a non-parametric anova (Kruskal-Wallis) test.

**1. Conduct chi-squared tests for categorical demographic variables**

```{r}
compute_chi_square <- function(tableone, demo_table) {
  
  N = tableone %>% select(N) %>% head(1) %>% as.numeric()

  # sum of all neuro condition groups
  cns_n = as.numeric(gsub( " .*$", "", tableone[1,"Central"]))
  pns_n = as.numeric(gsub( " .*$", "", tableone[1,"Peripheral"]))
  none_n = as.numeric(gsub( " .*$", "", tableone[1,"None"]))
  
  # sum up the total counts of each demographic variable across healthcare systems
  tableOne_sums <- demo_table %>%
    group_by(variable, Demo_var, Demo_var_i) %>%
    summarise(across(
      starts_with("n_var"),
      function(x) sum(x, na.rm = TRUE)
    ), .groups = "drop")
  
  # create list of demographic/clinical variables
  vars = tableOne_sums$variable
  
  # for binary variables, we should group by category 
  # Thus, only run the analysis for sex.male rather than both sex.female AND sex.male which would be redundant
  # we will also remove `age_group.unknown` since their is only 1 patient here
  exclude <- c("readmitted.false", "sex.female", "sex.other", "survival.deceased", "severity.non-severe",
               "covid_discharged.discharged", "age_group.Unknown")
  vars <- vars[!vars %in% exclude]
  
  # create empty list for chi.square test results
  chi_result_list = list()
  
  # for each variable, we will run the chi.square test
  for(i in vars) {
    test = calc_chisq(i, tableOne_sums, none_n, pns_n, cns_n)
    X2 <- round(test$statistic, 4)
    p_value <- test$p.value
    df <- test$parameter
    Variable = paste(i)
    
    chi_results <- cbind(Variable, X2, p_value, df) %>% 
      data.frame() %>% 
      mutate(p_value = as.numeric(p_value))
    
    rownames(chi_results) <- NULL
    
    chi_result_list[[i]] <- chi_results
  }
  
  # save list of chi.square test results
  chisq_results <- bind_rows(chi_result_list)
  
  return(chisq_results)
  
  
  }
```

**Combined Adult & Pediatric Chi-Square Results**

```{r message=FALSE, warning=FALSE}
chisq_combine <- compute_chi_square(tableone = tableone_combine %>%
                                      filter(!`Table 1` == 'Discharged',
                                             !`Table 1` == 'Not Discharged'),
                                    demo_table = demo_table_combine %>%
                                      filter(!variable == 'covid_discharged.discharged',
                                             !variable == 'covid_discharged.not discharged'))

datatable(chisq_combine %>% arrange(p_value))
```

**Adult Chi-Square Results**

```{r message=FALSE, warning=FALSE}
chisq_adult <- compute_chi_square(tableone = tableone_adult %>%
                                      filter(!`Table 1` == 'Discharged',
                                             !`Table 1` == 'Not Discharged'),
                                    demo_table = demo_table_adult %>%
                                      filter(!variable == 'covid_discharged.discharged',
                                             !variable == 'covid_discharged.not discharged'))

datatable(chisq_adult %>% arrange(p_value))
```

**Pediatric Chi-Square Results**

```{r message=FALSE, warning=FALSE}
chisq_pediatric <- compute_chi_square(tableone = tableone_pediatric %>%
                                      filter(!`Table 1` == 'Discharged',
                                             !`Table 1` == 'Not Discharged'),
                                    demo_table = demo_table_pediatric %>%
                                      filter(!variable == 'covid_discharged.discharged',
                                             !variable == 'covid_discharged.not discharged'))

datatable(chisq_pediatric %>% arrange(p_value))
```

**2. Conduct non-parametric anova (Kruskal-Wallis) tests for continuous clinical variables**

```{r}
compute_kruskal <- function(clinical_table) {

  # create empty list to save processed variables
  processed_list_figs <- list()
  
  # create a list of clinical variables to further pre-process
  vars_to_process <- unique(clinical_table$name)
  
  # for each variable, we will remove the [min, max] as before
  for (i in vars_to_process) {
    mod_table <- clinical_table %>%
      rename("var" = name) %>%
      filter(var == i) %>%
      mutate(
        site = toupper(site),
        None = sub("(\\(.*|\\[.*)", "", None),
        Peripheral = sub("(\\(.*|\\[.*)", "", Peripheral),
        Central = sub("(\\(.*|\\[.*)", "", Central)
      ) %>%
      mutate(across(None:Central, as.numeric))
  
    processed_list_figs[[i]] <- mod_table
  }
  
  clinical_table_anova <- bind_rows(processed_list_figs)
  
  # format the continuous variables
  cont_vars = clinical_table_anova %>%
    mutate(
      # create a variable without mean/median prefix
      grouped_var = gsub("Mean |Median | \\[Min, Max\\]| \\(SD\\)", "", var)
    ) %>%
    pivot_longer(
      cols = None:Central,
      names_to = "type"
    ) %>%
    mutate(type = factor(type, levels = c("None", "Central", "Peripheral"))) %>% 
    # select variables to analyze
    filter(grouped_var %in% c("Elixhauser score",
                              "pre admission cns", 
                              "pre admission pns", 
                              "time to death", 
                              "time to first discharge", 
                              "time to severe",
                              "number of readmissions",
                              "time to first readmission")) %>% 
    distinct()
  
  # create function to conduct anova
  cont_var_results <- function(variable) {
    
    df <- cont_vars %>% 
    filter(grouped_var == paste(variable))
    
    # kruskal.test calculates the kruskal-Wallis H-statistic 
    # does not assume normality between groups
    # also called the one-way ANOVA on ranks
    test = kruskal.test(df$value ~ df$type)
    
  }
  
  # create empty list for Kruskal-Wallis results
  cont_vars_list = list()
  
  # create list of unique variables
  outcome_cont_vars = unique(cont_vars$grouped_var)
  
  for(i in outcome_cont_vars) {
    
    test = cont_var_results(i)
    X2 <- round(test$statistic, 4)
    p_value <- test$p.value
    df <- test$parameter
    Variable = paste(i)
    
    kw <- cbind(Variable, X2, p_value, df) %>% 
      data.frame() 
    
    rownames(kw) <- NULL
    
    cont_vars_list[[i]] <- kw
  
    
  }
  
  anova_results <- bind_rows(cont_vars_list)
  
  }
```

**Combined Adult & Pediatric Kruskal-Wallis Results**

```{r message=FALSE, warning=FALSE}

kw_combine <- compute_kruskal(clinical_table = clinical_table_combine)
datatable(kw_combine %>% arrange(p_value))
```

**Adult Kruskal-Wallis Results**

*Note: median CNS is 0 for all sites (reason for the NA values)*

```{r message=FALSE, warning=FALSE}

kw_adult <- compute_kruskal(clinical_table = clinical_table_adult)
datatable(kw_adult %>% arrange(p_value))
```

**Pediatric Kruskal-Wallis Results**

```{r message=FALSE, warning=FALSE}

kw_pediatric <- compute_kruskal(clinical_table = clinical_table_pediatric)
datatable(kw_pediatric %>% arrange(p_value))
```

**3. Combine chi-square and Kruskal-Wallis results**

We will evaluate significance when controlling for False Discovery Rate (FDR)

```{r}
tableone_stats <- function(chisq_results, anova_results, population) {

  demo_stats <- rbind(chisq_results %>% select(Variable, p_value),
                      anova_results %>% select(Variable, p_value))
  
  # adjust p-values with FDR
  demo_stats$adj_p_value <- p.adjust(demo_stats$p_value, method = "fdr")
  
  # round & format p-value 
  demo_stats <- demo_stats %>% 
    mutate(p_value = as.numeric(p_value),
           pvalue = if_else(p_value < 0.001, "< 0.001", paste(round(p_value, 3))),
           adj.p_value = if_else(adj_p_value < 0.001, "< 0.001", paste(round(adj_p_value, 3))))
  
  # tidy up the demographics stat table
  demo_stats_tidy <- demo_stats %>% 
    rename(`Unadjusted P-value (raw)` = p_value,
           `FDR Adjusted P-value (raw)` = adj_p_value,
           `Unadjusted P-value (tidy)` = pvalue,
           `FDR Adjusted P-value (tidy)` = adj.p_value) %>% 
    select(Variable, `Unadjusted P-value (raw)`, `FDR Adjusted P-value (raw)`, `Unadjusted P-value (tidy)`, `FDR Adjusted P-value (tidy)`)

  write.csv(demo_stats_tidy, paste0("tables/Table1_pvals_", population, ".csv"), row.names = FALSE)
  
  return(demo_stats_tidy)
  
}
```

**4. Incorporate Comorbidities**

We will also add the top 4 comorbidities for each population to our table one. For the combined adult and pediatric table, we will use the top 4 comorbidities of the adult population.

```{r}
# define matrix for each comorbidity
comorb_matrix <- function(comorbidity_table, comorbidity, population) {

    if(population=='Combined') {
      comorbidity_table = comorbidity_table
    } else if (population=="Adult") {
      comorbidity_table = comorbidity_table %>% 
        filter(population == "Adult")
    } else if (population=="Pediatric") {
      comorbidity_table = comorbidity_table %>% 
        filter(population == "Pediatric")
    } else {
      print('incorrect population specification')
    }
  
  mat_comorb <- comorbidity_table %>%
    filter(Comorbidity == paste(comorbidity)) %>%
    group_by(Comorbidity) %>% 
    mutate(None_comorb = sum(None_Total, na.rm = TRUE),
    CNS_comorb = sum(CNS_Total, na.rm = TRUE),
    PNS_comorb = sum(PNS_Total, na.rm = TRUE)) %>%
    ungroup() %>%
    distinct(None_comorb, CNS_comorb, PNS_comorb) %>%
    mutate(none_dif = neuro_pt_counts_sum$None_n - None_comorb,
    pns_dif = neuro_pt_counts_sum$PNS_n - PNS_comorb,
    cns_dif = neuro_pt_counts_sum$CNS_n - CNS_comorb) %>%
    data.frame() %>%
    #as.integer() %>%
    matrix(nrow = 2, ncol = 3, byrow = TRUE)

  comorb_results <- chisq.test(unlist(mat_comorb))
  
  results_df <- data.frame(Variable = paste(comorbidity),
                                X2 = round(comorb_results$statistic, 4),
                                p_value = comorb_results$p.value,
                                df = comorb_results$parameter)
  row.names(results_df) <- NULL
  
  return(results_df)
}

## combined comorbidities
comorbidity_combined_chi_list <- list()

for(i in top_comorb_adults) {
  
  comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Combined")
  comorbidity_combined_chi_list[[i]] <- comorb_chi_results
}

comorbidity_combined_chi <- comorbidity_combined_chi_list %>%
  bind_rows() 

chisq_combine_comorb <- chisq_combine %>%
  mutate(X2 = as.numeric(X2),
  df = as.numeric(df)) %>%
  bind_rows(comorbidity_combined_chi) 



## adult comorbidities
comorbidity_adult_chi_list <- list()

for(i in top_comorb_adults) {
  
  comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Adult")
  comorbidity_adult_chi_list[[i]] <- comorb_chi_results
}

comorbidity_adult_chi <- comorbidity_adult_chi_list %>%
  bind_rows() 

chisq_adult_comorb <- chisq_adult %>%
  mutate(X2 = as.numeric(X2),
  df = as.numeric(df)) %>%
  bind_rows(comorbidity_adult_chi) 

## pediatric comorbidities
comorbidity_pediatric_chi_list <- list()

for(i in top_comorb_pediatrics) {
  
  comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Pediatric")
  comorbidity_pediatric_chi_list[[i]] <- comorb_chi_results
}

comorbidity_pediatric_chi <- comorbidity_pediatric_chi_list %>%
  bind_rows() 

chisq_pediatric_comorb <- chisq_pediatric %>%
  mutate(X2 = as.numeric(X2),
  df = as.numeric(df)) %>%
  bind_rows(comorbidity_pediatric_chi) 
```

## Table One - Combined Analysis

```{r}
combine_tableone_stats <- tableone_stats(chisq_results = chisq_combine_comorb,
                                         anova_results = kw_combine,
                                         population = "combined") 
  

datatable(combine_tableone_stats)
```

## Table One - Adult Analysis

```{r}
adult_tableone_stats <- tableone_stats(chisq_results = chisq_adult_comorb,
                                         anova_results = kw_adult,
                                         population = "adult") 
  

datatable(adult_tableone_stats)
```

## Table One - Pediatric Analysis

```{r}
pediatric_tableone_stats <- tableone_stats(chisq_results = chisq_pediatric_comorb,
                                         anova_results = kw_pediatric,
                                         population = "pediatric") 
  

datatable(pediatric_tableone_stats)
```

# **Neurological Diagnosis Analysis**

Next, we will evaluate the diagnoses captured with the CNS and PNS groups. Specifically, we will examine the frequency of CNS and PNS diagnoses by patient age group and by clinical outcomes (mortality and COVID-19 severity).

**1. Format neurological icd code data**

```{r fig.width=12}
# import list of ICD-9/ICD-10 codes
neuro_icds_10 <-
  readxl::read_excel("public-data/2020-02-22_neuro-icd10_CNSvPNS.xlsx", sheet = 2) %>% 
  rename(
    "icd" = `ICD-10`,
    "pns_cns" = `Nervous system Involvement (1=central, 2=peripheral, 3=irrelevant)`,
    "type" = `ICD-10_type (1=first three alphanumeric code only, 2=digits after decimal point)`) %>%
  filter(type == 1) %>% 
  mutate(pns_cns = as.factor(pns_cns) %>% fct_recode(
    Central = "1",
    Peripheral = "2"
  )) %>%
  distinct(icd, `Neurological Disease Category`, pns_cns, `ICD-10 Description Revised`) %>%
  rename("description" = `ICD-10 Description Revised`) %>%
  mutate(concept_type = "icd-10")

neuro_icds_9 <- read.csv("public-data/icd9_tab_CNSvPNS.csv") %>%
  rename(
    "Neurological Disease Category" = "Neurological.Disease.Category",
    "pns_cns" = `Nervous.system.Involvement..1.central..2.peripheral.`,
    "icd_description" = `icd9_desc_revised`
  ) %>%
  mutate(
    pns_cns = as.factor(pns_cns) %>% fct_recode(
      Central = "1",
      Peripheral = "2"
    ),
    concept_type = "DIAG-ICD9"
  ) %>%
  select(icd, `Neurological Disease Category`, pns_cns, icd_description) %>%
  rename("description" = icd_description) %>%
  mutate(concept_type = "icd-9")

# bind list of ICD-9/ICD-10 codes
icds <- rbind(neuro_icds_10, neuro_icds_9)
```

**2. Calculate total number of neurological diagnoses for each adults & pediatrics**

```{r}
compute_neuro_counts <- function(sorted_sites, sample_size, is_pediatric = FALSE) {
  
    # create empty list to save processed diagnostic data
    diag_table_list <- list()
  
    if(is_pediatric==FALSE) {
      population = "adults"
      } else {
      population = "pediatrics"
      }
  
  # for each healthcare system, we will calculate the total number of patients with each ICD code
  for (i in sorted_sites) {
    tmp <- get(i)
    tmp_diag <- tmp[[c(
      "first_hosp_results",
      "icd_tables",
      paste0("icd_tables_", population),
      "demo_table"
    )]]
    tmp_diag$site <- tmp[["site"]]
    diag_table_list[[i]] <- tmp_diag %>%
      filter(variable == "sex.female" | variable == "sex.male" | variable == "sex.other") %>%
      select(site, contains("n_var"))
  }
  
  diag_table_wide <- bind_rows(diag_table_list)
  
  # we will count the total number and percent across healthcare systems
  diag_counts <- colSums(Filter(is.numeric, diag_table_wide), na.rm = TRUE) %>% 
    data.frame() %>% 
    mutate(perc = round(./sample_size * 100, 1))
  
  # format diagnostic table
  diag_counts <- tibble::rownames_to_column(diag_counts, "icd") %>%
    mutate(icd = gsub("n_var_", "", icd)) %>%
    rename(`Number of Patients` = ".") %>%
    full_join(., icds, by = "icd") %>%
    dplyr::arrange(desc(`Number of Patients`)) %>% 
    mutate(`Number of Patients (% of Cohort)` = paste(`Number of Patients`, "(", perc, ")", sep = " ")) %>% 
    select(pns_cns, `Neurological Disease Category`, concept_type, 
           icd, description, `Number of Patients (% of Cohort)`) %>% 
      rename(Code = "icd",
           `Nervous System` = "pns_cns",
           Description = "description",
           `ICD Version` = "concept_type") %>% 
    mutate(`Number of Patients (% of Cohort)` = if_else(`Number of Patients (% of Cohort)` == "NA ( NA )", "0 ( 0 )", `Number of Patients (% of Cohort)`),
           Code = if_else(Code == "NN", "No Neurological Disease (NNC)", Code))
  
  return(diag_counts)
  
  }

```

```{r message=FALSE, warning=FALSE}
# create vectors containing patient sample sizes
n_adult <- as.numeric(tableone_adult$N)[1]
n_pediatric <- as.numeric(tableone_pediatric$N)[1]

# calculate the total neurological patient counts 
neuro_counts_adult <- compute_neuro_counts(sorted_sites = adult_sites,
                                           sample_size = n_adult, 
                                           is_pediatric = FALSE)

neuro_counts_pediatric <- compute_neuro_counts(sorted_sites = pediatric_sites,
                                           sample_size = n_pediatric, 
                                           is_pediatric = TRUE)

neuro_counts_combined <- neuro_counts_adult %>% 
  rename(`Number of Adult Patients (% of Cohort)` = `Number of Patients (% of Cohort)`) %>% 
  left_join(., neuro_counts_pediatric %>% 
              select(`ICD Version`, Code, `Number of Patients (% of Cohort)`) %>% 
              rename("Number of Pediatric Patients (% of Cohort`)" = `Number of Patients (% of Cohort)`))

write.csv(neuro_counts_combined, paste0("tables/Supp.Table2_diagnoses", ".csv"), row.names = FALSE)
  
```

## Number (%) of Adult Patients with each Neurological Diagnosis

```{r}
datatable(neuro_counts_adult) 
```

## Number (%) of Pediatric Patients with each Neurological Diagnosis

```{r}
datatable(neuro_counts_pediatric) 
```

## Evaluate Diagnoses by Age & Outcome

```{r}
compute_stratified_neuro_counts <- function(tableone, sorted_sites, is_pediatric = FALSE) {
  
  # create empty list to save processed diagnostic data
    diag_table_list <- list()
  
    if(is_pediatric==FALSE) {
      population = "adults"
      } else {
      population = "pediatrics"
      }
  
  # for each healthcare system, we will calculate the total number of patients with each ICD code
  for (i in sorted_sites) {
    #print(i)
    tmp <- get(i)
    tmp_diag <- tmp[[c(
      "first_hosp_results",
      "icd_tables",
      paste0("icd_tables_", population),
      "demo_table"
    )]]
    tmp_diag$site <- tmp[["site"]]
    try(
    diag_table_list[[i]] <- tmp_diag %>%
      select(site, variable, starts_with("n_var")) %>% 
      pivot_longer(cols = starts_with("n_var"), names_to = "code", values_to = "freq") %>% 
      ungroup()
    )
  }

  diag_table_wide_all <- bind_rows(diag_table_list) %>% 
      group_by(variable, code) %>% 
    # total number of patients who had each code for each variable across sites
      mutate(total_n_var  = sum(freq, na.rm = TRUE)) %>% 
    distinct(variable, code, total_n_var) %>% 
    mutate(code = gsub("n_var_", "", code),
           variable = gsub("Severity.", "", variable),
           variable = gsub("Survival.", "", variable),
           variable = gsub("readmitted", "", variable),
           variable = gsub("age_group", "", variable),
           variable = gsub("covid_discharged", "", variable),
           variable = if_else(variable == ".TRUE", "Readmitted", variable)) %>% 
    left_join(., icds %>% rename(code = "icd"), by = "code") %>% 
    ungroup() %>% 
    filter(!concept_type == "icd-9")


  # refactor neuro status
  diag_table_wide_all$pns_cns <- factor(diag_table_wide_all$pns_cns,
                                        levels=c("Central", "Peripheral"),
                                        labels=c("CNS", "PNS"))
  
  
  return(diag_table_wide_all)
  
  }
```

```{r}
stratified_neuro_counts_adult <- compute_stratified_neuro_counts(sorted_sites = adult_sites,
                                                                 is_pediatric = FALSE,
                                                                 tableone = tableone_adult)

stratified_neuro_counts_pediatric <- compute_stratified_neuro_counts(sorted_sites = pediatric_sites,
                                                                 is_pediatric = TRUE,
                                                                 tableone = tableone_pediatric)
```

```{r}
compute_stratified_neuro_counts_age <- function(stratified_neuro_counts, tableone) {
  
  stratified_neuro_counts$variable <- factor(stratified_neuro_counts$variable, 
                                     levels=c(".00to02", ".03to05", ".06to11", ".12to17",
                                              ".18to25", ".26to49", ".50to69", ".70to79", ".80plus"), 
                                     labels=c("0-2 Years", "3-5 Years", "6-11 Years", "12-17 Years",
                                              "18-25 Years", "26-49 Years", "50-69 Years", "70-79 Years", "80+ Years"))
  
  # total counts
  n_0_2 <- get_table_n(tableone, var = "0-2")
  n_3_5 = get_table_n(tableone, var = "3-5")
  n_6_11 = get_table_n(tableone, var = "6-11")
  n_12_17 = get_table_n(tableone, var = "12-17")
  n_18_25 = get_table_n(tableone, var = "18-25")
  n_26_49 = get_table_n(tableone, var = "26-49")
  n_50_69 = get_table_n(tableone, var = "50-69")
  n_70_79 = get_table_n(tableone, var = "70-79")
  n_80 = get_table_n(tableone, var = "80+")
  
  age_df <- data.frame("variable" = c("0-2 Years",
                                    "3-5 Years", 
                                    "6-11 Years", 
                                    "12-17 Years",
                                    "18-25 Years", 
                                    "26-49 Years", 
                                    "50-69 Years", 
                                    "70-79 Years", 
                                    "80+ Years"),
                     "n_var" = c(n_0_2,
                               n_3_5,
                               n_6_11,
                               n_12_17,
                               n_18_25,
                               n_26_49,
                               n_50_69,
                               n_70_79,
                               n_80))

  
  stratified_neuro_counts <- stratified_neuro_counts %>% 
    filter(grepl("Years", variable),
                !code == "NN", 
                !total_n_var == 0) %>% 
         # total_n would be calculating total codes across each age group which is not true reflection of patient counts b/c patients can have more than one code
         #mutate(total_n = sum(total_n_var)) %>% 
         ungroup() %>% 
         left_join(., age_df, by = "variable") %>% 
         mutate(n_var = as.numeric(n_var),
                perc = as.character(round(total_n_var/n_var*100,1)),
                perc = if_else(perc < 0.1, '<0.1', perc),
                perc = paste0(perc, "%")) 
  
  return(stratified_neuro_counts)
  
  
}


```

```{r}
stratified_neuro_counts_age_adult <- compute_stratified_neuro_counts_age(stratified_neuro_counts = stratified_neuro_counts_adult,
                                                                         tableone = tableone_adult)

stratified_neuro_counts_age_pediatric <- compute_stratified_neuro_counts_age(stratified_neuro_counts = stratified_neuro_counts_pediatric,
                                                                 tableone = tableone_pediatric)
```

**Diagnoses by Adult and Pediatric Populations**

```{r fig.height=20, fig.width=15}
n_0_2 <- get_table_n(tableone_combine, var = "0-2")
#n_3_5 <- get_table_n(tableone_combine, var = "3-5")
n_6_11 <- get_table_n(tableone_combine, var = "6-11")
n_12_17 <- get_table_n(tableone_combine, var = "12-17")

total_ped_age_count = as.numeric(n_0_2) +  as.numeric(n_6_11) + as.numeric(n_12_17)

stratified_neuro_counts_age_pediatric_reformat = stratified_neuro_counts_age_pediatric %>%
  select(variable, pns_cns, description, n_var, total_n_var) %>%
  mutate(variable = "0-17 Years") %>%
  group_by(pns_cns, description) %>%
  mutate(total_n_var = sum(total_n_var, na.rm = TRUE)) %>%
  ungroup() %>%
  # n_var is the total per `age_group` stratification
  select(-n_var) %>%
  distinct() %>%
  mutate(perc = round(total_n_var/total_ped_age_count*100,1),
         perc = if_else(perc < 0.1, '<0.1', as.character(perc)),
         perc = paste0(perc, "%"))

stratified_neuro_counts_age_combined <- stratified_neuro_counts_age_adult %>%
  select(variable, pns_cns, description, total_n_var, perc) %>%
  rbind(., stratified_neuro_counts_age_pediatric_reformat %>%
          select(variable, pns_cns, description, total_n_var, perc))

stratified_neuro_counts_age_combined$variable <- factor(stratified_neuro_counts_age_combined$variable,
                                                                      levels=c("0-17 Years", "18-25 Years", "26-49 Years", "50-69 Years", "70-79 Years", "80+ Years"))
```

```{r fig.height=20, fig.width=12}
group.colors <- c(NNC = "gray", PNS = "slateblue", CNS ="tomato")

adult_ped_combined_plot <- ggplot(stratified_neuro_counts_age_combined,
                                                aes(x = total_n_var, y = reorder(description,
                                                                                 total_n_var), 
                                                    fill = pns_cns)) +
  geom_bar(stat = "identity") +
  geom_text(
    aes(x = total_n_var, y = description, label = perc),
    hjust = -0.1,
    #hjust="inward",
    size = 6,
    inherit.aes = TRUE) +
  scale_x_continuous(expand = expansion(mult = c(0,0.2))) + # to ensure percent labels are within the figure
  facet_grid(pns_cns ~ variable, scales = "free", space = "free_y") + 
  scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +   
  scale_fill_manual(values = group.colors) +
  xlab("Number of Patients") +
  theme(legend.position = "none",
        strip.text.y = element_text(size = 40),
        strip.text.x = element_text(size = 35),
        axis.text.y.left = element_text(size = 25, face = "bold"),
        axis.text.x = element_text(size = 25),
        axis.title.x = element_text(size = 35))

ggsave("figures/Fig3_diagnoses_by_age_adult_ped_18group.png", adult_ped_combined_plot, width = 40, height = 19, dpi = 300)
```

![](figures/Fig3_diagnoses_by_age_adult_ped_18group.png){width="1031" height="337"}

## Evaluate Diagnoses by Outcome

```{r}

stratified_neuro_counts_adult$population = "adult"
stratified_neuro_counts_pediatric$population = "pediatric"

stratified_neuro_counts_combine <- rbind(stratified_neuro_counts_adult, stratified_neuro_counts_pediatric)

## determine total number of patients who died or severe
n_death_adult = get_table_n(df = tableone_adult, var = "Deceased") %>% as.numeric()
n_severe_adult = get_table_n(df = tableone_adult, var = "Severe") %>% as.numeric()

n_death_pediatric = get_table_n(df = tableone_pediatric, var = "Deceased") %>% as.numeric()
n_severe_pediatric = get_table_n(df = tableone_pediatric, var = "Severe") %>% as.numeric()

stratified_neuro_counts_outcomes <- stratified_neuro_counts_combine %>% 
  filter(variable %in% c("Severe", "Deceased"),
         !code == "NN", 
         !total_n_var == 0) %>%          
         # total_n would be calculating total codes across each outcome group which is not true reflection of patient counts b/c patients can have more than one code
         #mutate(total_n = sum(total_n_var)) %>% 
         group_by(variable, description, population) %>% 
  mutate(total_n_var = sum(total_n_var)) %>% 
  ungroup() %>% 
  mutate(perc = case_when(variable == "Severe" & population == "adult" ~ round(total_n_var/n_severe_adult*100,1),
                                 variable == "Deceased" & population == "adult" ~ round(total_n_var/n_death_adult*100,1),
                                 variable == "Severe" & population == "pediatric" ~ round(total_n_var/n_severe_pediatric*100,1),
                                 variable == "Deceased" & population == "pediatric" ~ round(total_n_var/n_death_pediatric*100,1)),
         perc = if_else(perc < 0.1, '<0.1', as.character(perc)),
       perc = paste0(perc, "%")) %>% 
  select(-code, -concept_type) %>% 
  distinct()
```

```{r}
adult_outcomes = ggplot(stratified_neuro_counts_outcomes %>% 
         filter(population == "adult") %>% 
           mutate(variable = as.factor(variable) %>% 
                                          fct_recode(
                                            Mortality = "Deceased")),
       aes(x = total_n_var, y = reorder(description, total_n_var), fill = pns_cns)) +
  geom_bar(stat = "identity") +
 geom_text(
    aes(x = total_n_var, y = description, label = perc),
    hjust = -0.1,
    #hjust="inward",
    size = 5,
    inherit.aes = TRUE) +
  scale_x_continuous(expand = expansion(mult = c(0,0.2))) + 
  facet_grid(pns_cns ~ variable, scales = "free", space = "free_y") + 
  scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +   
  scale_fill_manual(values = group.colors) +
  xlab("Number of Patients") + 
  ggtitle("B. Adult") + 
  theme(legend.position = "none",
        plot.title = element_text(size = 44, face = "bold", hjust = -0.5), 
        strip.text.y = element_text(size = 35),
        strip.text.x = element_text(size = 30),
        axis.text.y.left = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(size = 15),
        axis.title.x = element_text(size = 35))

ggsave("figures/SuppFig1_diagnoses_by_outcomes_adult.png", adult_outcomes, width = 27, height = 15, dpi = 300)
```

```{r}
pediatric_outcomes = ggplot(stratified_neuro_counts_outcomes %>% 
         filter(population == "pediatric") %>% 
           mutate(variable = as.factor(variable) %>% 
                                          fct_recode(
                                            Mortality = "Deceased")),
       aes(x = total_n_var, y = reorder(description, total_n_var), fill = pns_cns)) +
  geom_bar(stat = "identity") +
 geom_text(
    aes(x = total_n_var, y = description, label = perc),
    hjust = -0.1,
    #hjust="inward",
    size = 5,
    inherit.aes = TRUE) +
  scale_x_continuous(expand = expansion(mult = c(0,0.2))) + 
  facet_grid(pns_cns ~ variable, scales = "free", space = "free_y") + 
  scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +   
  scale_fill_manual(values = group.colors) +
  xlab("") + 
  ggtitle("A. Pediatric") + 
  theme(legend.position = "none",
        plot.title = element_text(size = 44, face = "bold", hjust = -0.5), 
        strip.text.y = element_text(size = 35),
        strip.text.x = element_text(size = 30),
        axis.text.y.left = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(size = 15),
        axis.title.x = element_text(size = 35))

ggsave("figures/SuppFig1_diagnoses_by_outcomes_pediatric.png", pediatric_outcomes, width = 27, height = 9, dpi = 300)
```

```{r include=FALSE}
#grid.arrange(pediatric_outcomes, adult_outcomes, ncol=1)

ggsave("figures/SuppFig1_diagnoses_by_outcome_combined.png", grid.arrange(pediatric_outcomes, adult_outcomes, ncol=1), width = 27, height = 25, dpi = 300)
```

![](figures/SuppFig1_diagnoses_by_outcome_combined.png){.class width="800" height="800px"}

## Evaluate patients with "Both" CNS and PNS diseases

These patients were excluded from the primary analysis due to potential confounding.

```{r}
# create empty list to store the counts of "both" patients
both_counts_list <- list()

# for each healthcare system, retrieve the number of excluded "both" patients recorded
for (i in sorted_sites) {
  n_both <- results[[paste(i)]][["first_hosp_results"]][["both_counts"]]
  
  both_counts_list[[i]] <- n_both
}

both_counts <- bind_rows(both_counts_list) %>% t() %>% data.frame() %>% sum()

print(paste(both_counts, "(", round(both_counts/(as.numeric(tableone_combine$N[1])+both_counts)*100,2), "%)")) 
```

# **Comorbidity Analysis**

Next, we will evaluate the frequency of comorbidities in our population and the associated risk that comorbidities may incur for acquiring a CNS or PNS diagnosis during COVID-19 hospitalization.

**1. Create a table of counts for each comorbidity and stratified by neurological status.**

```{r}
comorbidity_table_adult <- comorb_table_wide %>%
  select(Comorbidity, population, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
  filter(population == "Adult") %>% 
  rename(N = "Comorb_Total",
         NNC = "NNC_N_%",
         CNS = "CNS_N_%",
         PNS = "PNS_N_%") %>% 
  mutate(N = paste0(N, " (", round(N/n_adult, 2)*100, "%)"))

comorbidity_table_pediatric <- comorb_table_wide %>%
  select(Comorbidity, population, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
  filter(population == "Pediatric") %>% 
  rename(N = "Comorb_Total",
         NNC = "NNC_N_%",
         CNS = "CNS_N_%",
         PNS = "PNS_N_%") %>% 
  mutate(N = paste0(N, " (", round(N/n_pediatric, 2)*100, "%)"))

supp_comorbidity_table <- rbind(comorbidity_table_adult, comorbidity_table_pediatric) %>% 
  arrange(Comorbidity)

write.csv(supp_comorbidity_table, "tables/SuppTable_Comorbidity_Counts.csv", row.names = FALSE)
```

**2. Create vector counts of patients with comorbidity data**

```{r}
neuro_pt_counts_sum_adult <- neuro_pt_counts %>% 
  filter(population == "Adult") %>% 
  select(-population)

neuro_pt_counts_sum_pediatric <- neuro_pt_counts %>% 
  filter(population == "Pediatric") %>% 
  select(-population)

none_n_adult = neuro_pt_counts_sum_adult$None_n
cns_n_adult = neuro_pt_counts_sum_adult$CNS_n
pns_n_adult = neuro_pt_counts_sum_adult$PNS_n

none_n_pediatric = neuro_pt_counts_sum_pediatric$None_n
cns_n_pediatric = neuro_pt_counts_sum_pediatric$CNS_n
pns_n_pediatric = neuro_pt_counts_sum_pediatric$PNS_n
```

**3. Calculate the relative risk (RR) of developing a CNS or PNS diagnosis for patients with each comorbidity**

```{r}
calculate_rr <- function(is_pediatric = FALSE) {
  
  if(is_pediatric == FALSE) {
    cns_n = cns_n_adult
    pns_n = pns_n_adult
    neuro_pt_counts_sum = neuro_pt_counts_sum_adult
    pop = "Adult"
    # create a list of comorbidities
    list_of_comorbs <- comorb_table_wide %>% 
      filter(population == paste(pop)) %>% 
      distinct(Comorbidity) 
    list_of_comorbs <- list_of_comorbs$Comorbidity
    } else  {
    cns_n = cns_n_pediatric
    pns_n = pns_n_pediatric
    neuro_pt_counts_sum = neuro_pt_counts_sum_pediatric
    pop = "Pediatric"
    # create a list of comorbidities
    list_of_comorbs <- comorb_table_wide %>% 
      filter(population == paste(pop)) %>% 
      distinct(Comorbidity) %>% 
      as.vector()
    list_of_comorbs <- list_of_comorbs$Comorbidity
  }
  
  
  # create empty list to save relative risk calculations
  rr_calcs <- list()
  
  # for each comorbidity, we will calculate the relative risk of having a CNS and PNS diagnosis
  for (i in list_of_comorbs) {
  
    # https://www.cdc.gov/csels/dsepd/ss1978/lesson3/section5.html
    # create matrix as:
    # exposed group - with outcome, without outcome
    # non exposed group - with outcome, without outcome
  
    # where variables ending with `_Total` are the total number of CNS, PNS, or NNC patients with the respective comorbidity.
    # variables ending with `_n` are the total numbers in the entire population
  
    # to calculate confidence intervals
  #https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_confidence_intervals/bs704_confidence_intervals8.html
  
    rr_cns <- comorb_table_wide %>%
      filter(population == paste(pop),
             Comorbidity == paste(i)) %>%
      mutate(a = CNS_Total, # number CNS w/comorb
             b = Comorb_Total - CNS_Total, # number of comorb pts w/o CNS
             c = cns_n - CNS_Total, # number of CNS pts w/o comorb
             d = sum(neuro_pt_counts_sum) - Comorb_Total - c, # number of pts w/o comorb or CNS
             risk_cns = a/(a+b),
             risk_non_cns = c/(c+d),
             rr_CNS = risk_cns/risk_non_cns,
             # for confidence intervals
             ci_1 = (b/a)/(a+b), # (n1-x1)/n1 
             ci_2 = (d/c)/(c+d), # (n2-x2)/n2
             ci_root = sqrt(ci_1 + ci_2),
             ci_lower = log(rr_CNS) - (1.96*ci_root),
             ci_upper = log(rr_CNS) + (1.96*ci_root),
             l_ci_cns = exp(ci_lower),
             u_ci_cns = exp(ci_upper)) %>%
      select(Comorbidity, Comorb_Total, None_Total, CNS_Total, PNS_Total, rr_CNS, l_ci_cns, u_ci_cns)
  
      rr_pns <- comorb_table_wide %>%
      filter(population == paste(pop),
             Comorbidity == paste(i)) %>%
      mutate(a = PNS_Total, # number PNS w/comorb
             b = Comorb_Total - PNS_Total, # number of comorb pts w/o PNS
             c = pns_n - PNS_Total, # number of PNS pts w/o comorb
             d = sum(neuro_pt_counts_sum) - Comorb_Total - c, # number of pts w/o comorb or PNS
             risk_pns = a/(a+b),
             risk_non_pns = c/(c+d),
             rr_PNS = risk_pns/risk_non_pns,
             # for confidence intervals
             ci_1 = (b/a)/(a+b),
             ci_2 = (d/c)/(c+d),
             ci_root = sqrt(ci_1 + ci_2),
             ci_lower = log(rr_PNS) - (1.96*ci_root),
             ci_upper = log(rr_PNS) + (1.96*ci_root),
             l_ci_pns = exp(ci_lower),
             u_ci_pns = exp(ci_upper)) %>%
      select(Comorbidity, rr_PNS, l_ci_pns, u_ci_pns)
  
      rr <- rr_cns %>%
        left_join(., rr_pns, by = c("Comorbidity"))
  
    rr_calcs[[i]] <- rr
  
  }
  
  rr_results <- bind_rows(rr_calcs) %>%
      mutate(rr_CNS = round(rr_CNS, 2),
           l_ci_cns = round(l_ci_cns, 2),
           u_ci_cns = round(u_ci_cns, 2),
           rr_PNS = round(rr_PNS, 2),
           l_ci_pns = round(l_ci_pns, 2),
           u_ci_pns = round(u_ci_pns, 2),
           `RR CNS (95% CI)` = paste(rr_CNS, "(", l_ci_cns, ",", u_ci_cns, ")"),
           `RR PNS (95% CI)` = paste(rr_PNS, "(", l_ci_pns, ",", u_ci_pns, ")")) %>%
    select(Comorbidity, rr_CNS, l_ci_cns, u_ci_cns, `RR CNS (95% CI)`,
           rr_PNS, l_ci_pns, u_ci_pns, `RR PNS (95% CI)`)
  
  # tidy up data
  write.csv(rr_results %>%
              arrange(desc(rr_CNS)) %>%
              select(Comorbidity, `RR CNS (95% CI)`, `RR PNS (95% CI)`), 
            paste0("tables/Supp.Table4_RR_", pop, ".csv"), row.names = FALSE)
  
    # reformat data
  rr_results_tidy_rr <- rr_results %>%
    select(Comorbidity, rr_CNS, rr_PNS) %>%
    pivot_longer(cols = rr_CNS:rr_PNS, names_to = "Neuro Status", values_to = "RR") %>%
    mutate(`Neuro Status` = if_else(`Neuro Status` == "rr_CNS", "CNS", "PNS"))
  
  rr_results_tidy_l_ci <- rr_results %>%
    select(Comorbidity, l_ci_cns, l_ci_pns) %>%
    pivot_longer(cols = l_ci_cns:l_ci_pns, names_to = "Neuro Status", values_to = "l_CI") %>%
    mutate(`Neuro Status` = if_else(`Neuro Status` == "l_ci_cns", "CNS", "PNS"))
  
  rr_results_tidy_u_ci <- rr_results %>%
    select(Comorbidity, u_ci_cns, u_ci_pns) %>%
    pivot_longer(cols = u_ci_cns:u_ci_pns, names_to = "Neuro Status", values_to = "u_CI") %>%
    mutate(`Neuro Status` = if_else(`Neuro Status` == "u_ci_cns", "CNS", "PNS"))
  
  # combine all tidy results
  rr_results_tidy <- rr_results_tidy_rr %>%
    left_join(., rr_results_tidy_l_ci,
                               by = c("Comorbidity", "Neuro Status")) %>%
    left_join(., rr_results_tidy_u_ci,
                               by = c("Comorbidity", "Neuro Status"))
  
  # order by CNS diagnoses with higher RR
  cns_top_results <- rr_results_tidy %>%
    filter(`Neuro Status` == "CNS") %>%
    arrange(RR) %>%
    select(Comorbidity)
  
  rr_results_tidy$Comorbidity <-  ordered(rr_results_tidy$Comorbidity, levels = cns_top_results$Comorbidity)
  
  cns_rr_plot <- ggplot(rr_results_tidy %>%
           filter(`Neuro Status` == "CNS"),
         aes(x = RR, y = Comorbidity)) +
    geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
      geom_errorbarh(aes(xmax = u_CI, xmin = l_CI), size = 0.7, height = 0.2, color = "gray50") +
      geom_point(size = 1.5, color = "tomato") +
      scale_x_continuous(breaks = seq(0, 4, 1), labels = seq(0, 4, 1),
                         limits = c(0,4)) +
    ylab("") +
    xlab("Relative Risk") +
    ggtitle("CNS Patients") + 
    theme(axis.text.y = element_text(face = "bold"))
  
  # arrange by top PNS results
  pns_top_results <- rr_results_tidy %>%
    filter(`Neuro Status` == "PNS") %>%
    arrange(RR) %>%
    select(Comorbidity)
  
  rr_results_tidy$Comorbidity <-  ordered(rr_results_tidy$Comorbidity, levels = pns_top_results$Comorbidity)
  
  
  pns_rr_plot <- ggplot(rr_results_tidy %>%
           filter(`Neuro Status` == "PNS"),
         aes(x = RR, y = Comorbidity)) +
    geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
      geom_errorbarh(aes(xmax = u_CI, xmin = l_CI), size = 0.70, height = 0.2, color = "gray50") +
      geom_point(size = 1.5, color = "slateblue") +
      scale_x_continuous(breaks = seq(0, 1.5, 0.5), labels = seq(0, 1.5, 0.5),
                         limits = c(0,1.5)) +
    ylab("") +
    xlab("Relative Risk") +
    ggtitle("PNS Patients") + 
    theme(axis.text.y = element_text(face = "bold"))
  
  pns_rr_plot <- set_panel_size(pns_rr_plot,
                            width  = unit(7, "cm"),
                            height = unit(6.5, "in"))
  
  cns_rr_plot <- set_panel_size(cns_rr_plot,
                            width  = unit(7, "cm"),
                            height = unit(6.5, "in"))
  
  #grid.arrange(cns_rr_plot, pns_rr_plot, ncol=2)
  
  ggsave(paste0("figures/Figure4.relative_risk_", pop, ".png"), grid.arrange(cns_rr_plot, pns_rr_plot, ncol=2), width = 15, height = 8, dpi = 300)
  
  }
```

### Relative Risk (RR)

Here we plot the relative risk of acquiring a CNS or PNS diagnosis in adult patients who have each comorbidity of the ECI compared to those who do not.

We are not able to compute this analysis in pediatric patients, due to the overall low incidence of comorbidities and neurological illness during COVID-19 hospitalization.

```{r eval=TRUE, include=FALSE}
rr_adult <- calculate_rr(is_pediatric = FALSE)
#rr_pediatric <- calculate_rr(is_pediatric = TRUE)
```

![](figures/Figure4.relative_risk_Adult.png){.class width="827" height="452"}

### LPCA deviance explained

During the local analysis for each participating healthcare system, we computed the top 10 principal components using logistic principal component analysis (LPCA). LPCA was performed on the matrix of npatients x 29 ECI comorbidities. Across healthcare systems, 10 principal components explained at least 75% of the model deviance. 

This supplementary analysis was performed in order to determine the best way to adjust for each patient's comorbidity burden in our downstream survival models.

```{r message=FALSE, warning=FALSE}
compare_deviance <- function(sorted_sites, is_pediatric=FALSE) {
  
  pca_dev <- list()
  
  if(is_pediatric==FALSE) {
    population = "adults"
    sorted_sites = sorted_sites[!sorted_sites%in% c("GOSH_results", "BCH_results")]
    } else {
    population = "pediatrics"
    }

  # for each healthcare system, we will calculate the total number of patients with each ICD code
  for (i in sorted_sites) {
    print(i)
    tmp <- get(i)
    tmp_pca <- tmp[[c(
      "comorbidities",
      paste0("comorb_", population),
      "deviance_expl"
    )]] %>% 
      data.frame() %>% 
      rename(dev_expl = '.')
    tmp_pca$site <- tmp[["site"]]
    pca_dev[[i]] <- tmp_pca
    
  }
  
  pca_df <- bind_rows(pca_dev)
  
  return(pca_df)
  
}

pca_adult <- compare_deviance(sorted_sites = adult_sites, is_pediatric = FALSE)


pca_data <- bind_rows(pca_adult)


theme_set(theme_classic())

pca_plot <- ggplot(pca_data, 
       aes(x = fct_reorder(site, -dev_expl), y = dev_expl, group = 1)) +
  geom_point(size = 2.5) +
  geom_line() +
  xlab("Healthcare System") + 
  ylab("Proportion of Deviance explained") +  
  ylim(0,1) + 
  theme(axis.text.x=element_text(angle=90, hjust=1, face = "bold", size = 12),
        axis.text.y=element_text(face = "bold"), 
        axis.title.y = element_text(face = "bold", size = 15)); pca_plot

ggsave("figures/SuppFig_pca_deviance.png", pca_plot, height = 6, width = 6)
```
