This notebook describes the 106,229 adult and
pediatric hospitalized COVID-19 positive patients that were studied as
part of the Consortium for Clinical
Characterization of COVID-19 by EHR Neurology group. This analysis
contains the results of patients from 21 different healthcare systems
spanning 6 countries.
library(tidyverse)
library(gridGraphics)
library(gridExtra)
library(ggpubr)
library(DT)
library(kableExtra)
library(cowplot)
library(RColorBrewer)
library(glue)
library(egg)
source("R/plot_theme.R")
source("R/demo_tables.R")
source("R/utils.R")
Import Data from each Healthcare System
Each participating healthcare system ran the analysis locally on
patient level data using our customized
R package. The output of each local analysis consisted of aggregated
counts and summary statistics (means, medians, hazard ratios, p-values,
etc). Summary results from each healthcare system were then aggregated
and are imported and then combined using the following code:
# read in files from results folder
# this folder contains all of the local healthcare system level analyses
rdas <- list.files(
path = "results",
pattern = ".rda",
full.names = TRUE
)
for (rda in rdas) {
load(rda)
}
rm(rdas, rda)
# create a list of participating healthcare systems from our study tracking spreadsheet
site_google_url <- "https://docs.google.com/spreadsheets/d/1epcYNd_0jCUMktOHf8mz5v651zy1JALD6PgzobrGWDY/edit?usp=sharing"
# load site parameters
site_params <- googlesheets4::read_sheet(site_google_url, sheet = 1)
site_avails <- googlesheets4::read_sheet(site_google_url, sheet = 2)
# filter the list of sites who ran the analysis
sorted_sites <- site_avails %>%
filter(!is.na(date_v4_received)) %>%
pull(siteid) %>%
paste("results", sep = "_")
# list sites without race
sites_wo_race <- site_params %>%
filter(!include_race) %>%
pull(siteid)
# combine all rda files with 'results' in name
results <- mget(ls(pattern = "results"))
## load in the pre-processed comorbidity and meta-data
load("processed/comorbidity_table_cnps.rda")
load("processed/neuro_pt_counts.rda")
Pre-processing
The following code pre-processes the demographic and clinical course
data in order to construct a Table 1.
1. Format primary demographic characteristics
# create list of hospitals for adult and pediatric analyses
adult_sites = sorted_sites[!sorted_sites %in% c("BCH_results", "GOSH_results")]
pediatric_sites = sorted_sites[!sorted_sites %in% c("VA1_results", "VA2_results", "VA3_results", "VA4_results", "VA5_results")]
demo_table_adult <- create_demo_tableone(sorted_sites = adult_sites, is_pediatric = FALSE)
demo_table_pediatric <- create_demo_tableone(sorted_sites = pediatric_sites, is_pediatric = TRUE)
demo_table_combine <- rbind(demo_table_adult, demo_table_pediatric)
2. Format clinical characteristics (i.e: continuous variables
such as median time to discharge)
clinical_table_adult <- create_clinical_tableone(sorted_sites = adult_sites, is_pediatric = FALSE)
clinical_table_pediatric <- create_clinical_tableone(sorted_sites = pediatric_sites, is_pediatric = TRUE)
clinical_table_combine <- rbind(clinical_table_adult, clinical_table_pediatric)
3. Conduct additional pre-processing of clinical
variables
clinical_table_adult_clean <- clean_clinical_tables(clinical_table = clinical_table_adult)
clinical_table_pediatric_clean <- clean_clinical_tables(clinical_table = clinical_table_pediatric)
clinical_table_clean_combine <- rbind(clinical_table_adult_clean, clinical_table_pediatric_clean)
4. Add Comorbidity data
Here we create separate lists of the top 4 comorbidities in the adult
and pediatric populations
comorbidity_table_adults <- comorb_table_wide %>%
filter(population == "Adult") %>%
arrange(desc(Comorb_Total)) %>%
slice(1:4) # return top comorbidities
comorbidity_table_pediatric <- comorb_table_wide %>%
filter(population == "Pediatric") %>%
arrange(desc(Comorb_Total)) %>%
slice(1:4) # return top comorbidities
top_comorb_adults <- comorbidity_table_adults$Comorbidity
top_comorb_pediatrics <- comorbidity_table_pediatric$Comorbidity
Demographics
Table One - Combined
The below table contains the counts and summary stats of the entire
cohort (adult & pediatric patients).
neuro_pt_counts_sum <- colSums(neuro_pt_counts[,-1]) %>%
data.frame() %>%
t() %>%
data.frame()
# specify the order of Table 1
row_order_adult <- c(
"All Patients", "Female", "Male", "Unknown Sex",
"0-2", "3-5", "6-11", "12-17", "18-25",
"26-49", "50-69", "70-79", "80+", "Unknown Age",
"American Indian", "Asian", "Black",
"Hawaiian/Pacific Islander",
"Hispanic/Latino", "White", "Other",
top_comorb_adults,
"Median Elixhauser score (sd) ",
"Median pre admission cns (sd) ",
"Median pre admission pns (sd) ",
"Non-Severe", "Severe",
"Median time to severe (sd) ",
"Alive", "Deceased",
"Median time to death (sd) ",
"Discharged", "Not Discharged",
"Median time to first discharge (sd) ",
"Not Readmitted", "Readmitted",
"Median time to first readmission (sd) ",
"Median number of readmissions (sd) "
)
tableone_combine <- create_tableone(demo_table = demo_table_combine,
clinical_table = clinical_table_clean_combine) %>%
rbind(., comorb_table_wide %>%
group_by(Comorbidity) %>%
mutate(Comorb_Total = sum(Comorb_Total, na.rm = TRUE),
None_sum = sum(None_Total, na.rm = TRUE),
CNS_sum = sum(CNS_Total, na.rm = TRUE),
PNS_sum = sum(PNS_Total, na.rm = TRUE),
None_perc = round(None_sum/neuro_pt_counts_sum$None_n*100,1),
Central_perc = round(CNS_sum/neuro_pt_counts_sum$CNS_n*100,1),
Peripheral_perc = round(PNS_sum/neuro_pt_counts_sum$PNS_n*100,1),
None = paste(None_sum, "(", None_perc, ")"),
Central = paste(CNS_sum, "(", Central_perc, ")"),
Peripheral = paste(PNS_sum, "(", Peripheral_perc, ")")) %>%
distinct(Comorbidity, Comorb_Total, None, Central, Peripheral) %>%
rename(`Table 1` = "Comorbidity",
N = "Comorb_Total")) %>%
slice(match(row_order_adult, `Table 1`))
kbl(tableone_combine %>%
rename("No Neurological Condition (NNC)" = None)) %>%
add_header_above(c(
" ",
"Total Patients" = 1,
"Neurological Disease" = 3
)) %>%
kable_paper("striped", full_width = F) %>%
pack_rows("Sex", 2, 4) %>%
pack_rows("Age", 5, 13) %>%
pack_rows("Race & Ethnicity", 14, 20) %>%
pack_rows("Past Medical History", 21, 27) %>%
pack_rows("Severity", 28, 30) %>%
pack_rows("Survival", 31, 33) %>%
pack_rows("Discharge", 34, 36) %>%
pack_rows("Readmission", 37, 40)
|
Total Patients
|
Neurological Disease
|
Table 1
|
N
|
No Neurological Condition (NNC)
|
Central
|
Peripheral
|
All Patients
|
106229
|
88340 (100%)
|
15101 (100%)
|
2788 (100%)
|
Sex
|
Female
|
37590
|
32399 (36.7%)
|
4247 (28.1%)
|
944 (33.9%)
|
Male
|
68638
|
55940 (63.3%)
|
10854 (71.9%)
|
1844 (66.1%)
|
Unknown Sex
|
1
|
1 (0%)
|
0 (0%)
|
0 (0%)
|
Age
|
0-2
|
769
|
747 (0.8%)
|
22 (0.1%)
|
0 (0%)
|
3-5
|
275
|
251 (0.3%)
|
24 (0.2%)
|
0 (0%)
|
6-11
|
403
|
368 (0.4%)
|
33 (0.2%)
|
2 (0.1%)
|
12-17
|
707
|
637 (0.7%)
|
60 (0.4%)
|
10 (0.4%)
|
18-25
|
2328
|
2138 (2.4%)
|
145 (1%)
|
45 (1.6%)
|
26-49
|
17799
|
16122 (18.3%)
|
1166 (7.7%)
|
511 (18.5%)
|
50-69
|
36885
|
31556 (35.7%)
|
4169 (27.7%)
|
1160 (41.9%)
|
70-79
|
24860
|
19672 (22.3%)
|
4508 (30%)
|
680 (24.6%)
|
80+
|
22094
|
16813 (19%)
|
4920 (32.7%)
|
361 (13%)
|
Race & Ethnicity
|
American Indian
|
350
|
295 (0.3%)
|
55 (0.4%)
|
0 (0%)
|
Asian
|
1694
|
1464 (1.7%)
|
205 (1.4%)
|
25 (0.9%)
|
Black
|
16815
|
13368 (15.1%)
|
2995 (19.9%)
|
452 (16.5%)
|
Hawaiian/Pacific Islander
|
297
|
255 (0.3%)
|
41 (0.3%)
|
1 (0%)
|
Hispanic/Latino
|
870
|
819 (0.9%)
|
29 (0.2%)
|
22 (0.8%)
|
White
|
41871
|
33360 (37.8%)
|
7401 (49.1%)
|
1110 (40.5%)
|
Other
|
44220
|
38750 (43.9%)
|
4336 (28.8%)
|
1134 (41.3%)
|
Past Medical History
|
Hypertension
|
35587
|
27323 ( 30.9 )
|
7277 ( 48.2 )
|
987 ( 35.4 )
|
Alcohol abuse
|
30130
|
23065 ( 26.1 )
|
6194 ( 41 )
|
871 ( 31.2 )
|
Drug abuse
|
26596
|
20287 ( 23 )
|
5528 ( 36.6 )
|
781 ( 28 )
|
Diabetes
|
21969
|
16905 ( 19.1 )
|
4429 ( 29.3 )
|
635 ( 22.8 )
|
Median Elixhauser score (sd)
|
0.3 (0.7)
|
0.2 (0.5)
|
1.4 (2)
|
0.6 (1.7)
|
Median pre admission cns (sd)
|
0 (0)
|
0 (0)
|
0 (0)
|
0 (0)
|
Median pre admission pns (sd)
|
0 (0)
|
0 (0)
|
0 (0)
|
0 (0.1)
|
Severity
|
Non-Severe
|
66252
|
57771 (65.4%)
|
7007 (46.4%)
|
1474 (52.7%)
|
Severe
|
39985
|
30576 (34.6%)
|
8084 (53.6%)
|
1325 (47.3%)
|
Median time to severe (sd)
|
0.6 (1)
|
0.8 (1.3)
|
0.4 (0.7)
|
0.8 (1.1)
|
Survival
|
Alive
|
87376
|
74389 (84.2%)
|
10392 (68.8%)
|
2595 (93.1%)
|
Deceased
|
18849
|
13953 (15.8%)
|
4705 (31.2%)
|
191 (6.9%)
|
Median time to death (sd)
|
15.3 (6.4)
|
15.9 (5.8)
|
22.4 (32.5)
|
36.9 (41.4)
|
Discharge
|
Discharged
|
103165
|
85639 (96.9%)
|
14801 (98%)
|
2725 (97.7%)
|
Not Discharged
|
3063
|
2701 (3.1%)
|
297 (2%)
|
65 (2.3%)
|
Median time to first discharge (sd)
|
8.4 (4.2)
|
6.5 (3.6)
|
12.8 (6.1)
|
14.5 (18.3)
|
Readmission
|
Not Readmitted
|
91646
|
76877 (87%)
|
12435 (82.4%)
|
2334 (83.4%)
|
Readmitted
|
14569
|
11456 (13%)
|
2650 (17.6%)
|
463 (16.6%)
|
Median time to first readmission (sd)
|
28.1 (20.3)
|
26.4 (17.6)
|
31.9 (33.7)
|
38.1 (28.4)
|
Median number of readmissions (sd)
|
0.1 (0.3)
|
0 (0)
|
0.1 (0.3)
|
0.1 (0.3)
|
write.csv(tableone_combine, 'tables/table1_combined.csv', row.names = FALSE)
Table One - Adults
tableone_adult <- create_tableone(demo_table = demo_table_adult,
clinical_table = clinical_table_adult_clean) %>%
rbind(., comorbidity_table_adults %>%
select(Comorbidity, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
rename(`Table 1` = "Comorbidity",
N = "Comorb_Total",
None = "NNC_N_%",
Central = "CNS_N_%",
Peripheral = "PNS_N_%")) %>%
slice(match(row_order_adult, `Table 1`))
kbl(tableone_adult %>%
rename("No Neurological Condition (NNC)" = None)) %>%
add_header_above(c(
" ",
"Total Patients" = 1,
"Neurological Disease" = 3
)) %>%
kable_paper("striped", full_width = F) %>%
pack_rows("Sex", 2, 4) %>%
pack_rows("Age", 5, 9) %>%
pack_rows("Race & Ethnicity", 10, 16) %>%
pack_rows("Past Medical History", 17, 23) %>%
pack_rows("Severity", 24, 26) %>%
pack_rows("Survival", 27, 29) %>%
pack_rows("Discharge", 30, 32) %>%
pack_rows("Readmission", 33, 36)
|
Total Patients
|
Neurological Disease
|
Table 1
|
N
|
No Neurological Condition (NNC)
|
Central
|
Peripheral
|
All Patients
|
104031
|
86321 (100%)
|
14938 (100%)
|
2772 (100%)
|
Sex
|
Female
|
36564
|
31445 (36.4%)
|
4185 (28%)
|
934 (33.7%)
|
Male
|
67466
|
54875 (63.6%)
|
10753 (72%)
|
1838 (66.3%)
|
Unknown Sex
|
1
|
1 (0%)
|
0 (0%)
|
0 (0%)
|
Age
|
18-25
|
2328
|
2138 (2.5%)
|
145 (1%)
|
45 (1.6%)
|
26-49
|
17799
|
16122 (18.7%)
|
1166 (7.8%)
|
511 (18.5%)
|
50-69
|
36885
|
31556 (36.6%)
|
4169 (28%)
|
1160 (42.1%)
|
70-79
|
24860
|
19672 (22.8%)
|
4508 (30.2%)
|
680 (24.7%)
|
80+
|
22094
|
16813 (19.5%)
|
4920 (33%)
|
361 (13.1%)
|
Race & Ethnicity
|
American Indian
|
349
|
294 (0.3%)
|
55 (0.4%)
|
0 (0%)
|
Asian
|
1633
|
1412 (1.6%)
|
196 (1.3%)
|
25 (0.9%)
|
Black
|
16631
|
13202 (15.3%)
|
2979 (20%)
|
450 (16.5%)
|
Hawaiian/Pacific Islander
|
296
|
254 (0.3%)
|
41 (0.3%)
|
1 (0%)
|
Hispanic/Latino
|
850
|
799 (0.9%)
|
29 (0.2%)
|
22 (0.8%)
|
White
|
41206
|
32752 (38%)
|
7354 (49.3%)
|
1100 (40.3%)
|
Other
|
42971
|
37587 (43.6%)
|
4254 (28.5%)
|
1130 (41.4%)
|
Past Medical History
|
Hypertension
|
35566
|
27302 ( 31.6 %)
|
7277 ( 48.7 %)
|
987 ( 35.6 %)
|
Alcohol abuse
|
29918
|
22879 ( 26.5 %)
|
6169 ( 41.3 %)
|
870 ( 31.4 %)
|
Drug abuse
|
26389
|
20097 ( 23.3 %)
|
5512 ( 36.9 %)
|
780 ( 28.1 %)
|
Diabetes
|
21952
|
16888 ( 19.6 %)
|
4429 ( 29.6 %)
|
635 ( 22.9 %)
|
Median Elixhauser score (sd)
|
0.3 (0.7)
|
0.2 (0.5)
|
1.4 (2)
|
0.6 (1.7)
|
Median pre admission cns (sd)
|
0 (0)
|
0 (0)
|
0 (0)
|
0 (0)
|
Median pre admission pns (sd)
|
0 (0)
|
0 (0)
|
0 (0)
|
0 (0.1)
|
Severity
|
Non-Severe
|
64578
|
56181 (65.1%)
|
6935 (46.4%)
|
1462 (52.6%)
|
Severe
|
39468
|
30140 (34.9%)
|
8008 (53.6%)
|
1320 (47.4%)
|
Median time to severe (sd)
|
0.6 (1)
|
0.8 (1.3)
|
0.4 (0.7)
|
0.8 (1.1)
|
Survival
|
Alive
|
85212
|
72388 (83.9%)
|
10247 (68.6%)
|
2577 (93.1%)
|
Deceased
|
18820
|
13933 (16.1%)
|
4696 (31.4%)
|
191 (6.9%)
|
Median time to death (sd)
|
15.3 (6.4)
|
15.9 (5.8)
|
22.4 (32.5)
|
36.9 (41.4)
|
Discharge
|
Discharged
|
101032
|
83675 (96.9%)
|
14650 (98%)
|
2707 (97.7%)
|
Not Discharged
|
3005
|
2645 (3.1%)
|
295 (2%)
|
65 (2.3%)
|
Median time to first discharge (sd)
|
8.4 (4.2)
|
6.5 (3.6)
|
12.8 (6.1)
|
14.5 (18.3)
|
Readmission
|
Not Readmitted
|
89802
|
75170 (87.1%)
|
12312 (82.4%)
|
2320 (83.5%)
|
Readmitted
|
14237
|
11151 (12.9%)
|
2626 (17.6%)
|
460 (16.5%)
|
Median time to first readmission (sd)
|
28.1 (20.3)
|
26.4 (17.6)
|
31.9 (33.7)
|
38.1 (28.4)
|
Median number of readmissions (sd)
|
0.1 (0.3)
|
0 (0)
|
0.1 (0.3)
|
0.1 (0.3)
|
write.csv(tableone_adult, 'tables/table1_adults.csv', row.names = FALSE)
Table One - Pediatrics
# specify the order of Table 1
row_order_pediatric <- c(
"All Patients", "Female", "Male", "Unknown Sex",
"0-2", "3-5", "6-11", "12-17", "18-25",
"26-49", "50-69", "70-79", "80+", "Unknown Age",
"American Indian", "Asian", "Black",
"Hawaiian/Pacific Islander",
"Hispanic/Latino", "White", "Other",
top_comorb_pediatrics,
"Median Elixhauser score (sd) ",
"Median pre admission cns (sd) ",
"Median pre admission pns (sd) ",
"Non-Severe", "Severe",
"Median time to severe (sd) ",
"Alive", "Deceased",
"Median time to death (sd) ",
"Discharged", "Not Discharged",
"Median time to first discharge (sd) ",
"Not Readmitted", "Readmitted",
"Median time to first readmission (sd) ",
"Median number of readmissions (sd) "
)
tableone_pediatric <- create_tableone(demo_table = demo_table_pediatric,
clinical_table = clinical_table_pediatric_clean) %>%
rbind(., comorbidity_table_pediatric %>%
select(Comorbidity, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
rename(`Table 1` = "Comorbidity",
N = "Comorb_Total",
None = "NNC_N_%",
Central = "CNS_N_%",
Peripheral = "PNS_N_%")) %>%
slice(match(row_order_pediatric, `Table 1`))
kbl(tableone_pediatric %>%
rename("No Neurological Condition (NNC)" = None)) %>%
add_header_above(c(
" ",
"Total Patients" = 1,
"Neurological Disease" = 3
)) %>%
kable_paper("striped", full_width = F) %>%
pack_rows("Sex", 2, 4) %>%
pack_rows("Age", 5, 8) %>%
pack_rows("Race & Ethnicity", 9, 15) %>%
pack_rows("Past Medical History", 16, 22) %>%
pack_rows("Severity", 23, 25) %>%
pack_rows("Survival", 26, 28) %>%
pack_rows("Discharge", 29, 31) %>%
pack_rows("Readmission", 32, 35)
|
Total Patients
|
Neurological Disease
|
Table 1
|
N
|
No Neurological Condition (NNC)
|
Central
|
Peripheral
|
All Patients
|
2198
|
2019 (100%)
|
163 (100%)
|
16 (100%)
|
Sex
|
Female
|
1026
|
954 (47.3%)
|
62 (38%)
|
10 (62.5%)
|
Male
|
1172
|
1065 (52.7%)
|
101 (62%)
|
6 (37.5%)
|
Unknown Sex
|
0
|
0 (0%)
|
0 (0%)
|
0 (0%)
|
Age
|
0-2
|
769
|
747 (37.3%)
|
22 (15.8%)
|
0 (0%)
|
3-5
|
275
|
251 (12.5%)
|
24 (17.3%)
|
0 (0%)
|
6-11
|
403
|
368 (18.4%)
|
33 (23.7%)
|
2 (16.7%)
|
12-17
|
707
|
637 (31.8%)
|
60 (43.2%)
|
10 (83.3%)
|
Race & Ethnicity
|
American Indian
|
1
|
1 (0%)
|
0 (0%)
|
0 (0%)
|
Asian
|
61
|
52 (2.6%)
|
9 (5.8%)
|
0 (0%)
|
Black
|
184
|
166 (8.3%)
|
16 (10.4%)
|
2 (12.5%)
|
Hawaiian/Pacific Islander
|
1
|
1 (0%)
|
0 (0%)
|
0 (0%)
|
Hispanic/Latino
|
20
|
20 (1%)
|
0 (0%)
|
0 (0%)
|
White
|
665
|
608 (30.2%)
|
47 (30.5%)
|
10 (62.5%)
|
Other
|
1249
|
1163 (57.8%)
|
82 (53.2%)
|
4 (25%)
|
Past Medical History
|
Alcohol abuse
|
212
|
186 ( 9.2 %)
|
25 ( 15.3 %)
|
1 ( 6.2 %)
|
Drug abuse
|
207
|
190 ( 9.4 %)
|
16 ( 9.8 %)
|
1 ( 6.2 %)
|
Weight loss
|
123
|
109 ( 5.4 %)
|
13 ( 8 %)
|
1 ( 6.2 %)
|
Cardiac arrhythmias
|
109
|
94 ( 4.7 %)
|
12 ( 7.4 %)
|
3 ( 18.8 %)
|
Median Elixhauser score (sd)
|
2.2 (4.5)
|
0 (0)
|
4.4 (9.2)
|
2.5 (3.6)
|
Median pre admission cns (sd)
|
0.2 (0.7)
|
0.1 (0.2)
|
1.9 (3.9)
|
0 (0)
|
Median pre admission pns (sd)
|
0 (0)
|
0 (0)
|
0 (0)
|
0.2 (0.6)
|
Severity
|
Non-Severe
|
1674
|
1590 (78.5%)
|
72 (48.6%)
|
12 (70.6%)
|
Severe
|
517
|
436 (21.5%)
|
76 (51.4%)
|
5 (29.4%)
|
Median time to severe (sd)
|
2.4 (5.3)
|
3.8 (9.5)
|
10.8 (33.1)
|
0.9 (2.5)
|
Survival
|
Alive
|
2164
|
2001 (99%)
|
145 (94.2%)
|
18 (100%)
|
Deceased
|
29
|
20 (1%)
|
9 (5.8%)
|
0 (0%)
|
Median time to death (sd)
|
12.6 (36.3)
|
53.5 (76.3)
|
5.8 (10.2)
|
95 (268.7)
|
Discharge
|
Discharged
|
2133
|
1964 (97.2%)
|
151 (98.7%)
|
18 (100%)
|
Not Discharged
|
58
|
56 (2.8%)
|
2 (1.3%)
|
0 (0%)
|
Median time to first discharge (sd)
|
5.1 (4.2)
|
3.8 (4.2)
|
5.8 (2.7)
|
5.9 (4.5)
|
Readmission
|
Not Readmitted
|
1844
|
1707 (84.8%)
|
123 (83.7%)
|
14 (82.4%)
|
Readmitted
|
332
|
305 (15.2%)
|
24 (16.3%)
|
3 (17.6%)
|
Median time to first readmission (sd)
|
20 (20.2)
|
20.1 (19)
|
75.3 (86)
|
14.4 (22.8)
|
Median number of readmissions (sd)
|
0.1 (0.3)
|
0 (0)
|
0.2 (0.4)
|
0.4 (0.5)
|
write.csv(tableone_pediatric, 'tables/table1_pediatric.csv', row.names = FALSE)
Demographic & Clinical Course Analysis
Next we will evaluate characteristics among patients stratified by
neurological status: 1) patients with no neurological condition (NNC),
2) patients with a central nervous system condition (CNS), and 3)
patients with a peripheral nervous system condition (PNS).
Categorical variables will be evaluated with chi-square tests, while
continuous variables will be evaluated with a non-parametric anova
(Kruskal-Wallis) test.
1. Conduct chi-squared tests for categorical demographic
variables
compute_chi_square <- function(tableone, demo_table) {
N = tableone %>% select(N) %>% head(1) %>% as.numeric()
# sum of all neuro condition groups
cns_n = as.numeric(gsub( " .*$", "", tableone[1,"Central"]))
pns_n = as.numeric(gsub( " .*$", "", tableone[1,"Peripheral"]))
none_n = as.numeric(gsub( " .*$", "", tableone[1,"None"]))
# sum up the total counts of each demographic variable across healthcare systems
tableOne_sums <- demo_table %>%
group_by(variable, Demo_var, Demo_var_i) %>%
summarise(across(
starts_with("n_var"),
function(x) sum(x, na.rm = TRUE)
), .groups = "drop")
# create list of demographic/clinical variables
vars = tableOne_sums$variable
# for binary variables, we should group by category
# Thus, only run the analysis for sex.male rather than both sex.female AND sex.male which would be redundant
# we will also remove `age_group.unknown` since their is only 1 patient here
exclude <- c("readmitted.false", "sex.female", "sex.other", "survival.deceased", "severity.non-severe",
"covid_discharged.discharged", "age_group.Unknown")
vars <- vars[!vars %in% exclude]
# create empty list for chi.square test results
chi_result_list = list()
# for each variable, we will run the chi.square test
for(i in vars) {
test = calc_chisq(i, tableOne_sums, none_n, pns_n, cns_n)
X2 <- round(test$statistic, 4)
p_value <- test$p.value
df <- test$parameter
Variable = paste(i)
chi_results <- cbind(Variable, X2, p_value, df) %>%
data.frame() %>%
mutate(p_value = as.numeric(p_value))
rownames(chi_results) <- NULL
chi_result_list[[i]] <- chi_results
}
# save list of chi.square test results
chisq_results <- bind_rows(chi_result_list)
return(chisq_results)
}
Combined Adult & Pediatric Chi-Square
Results
chisq_combine <- compute_chi_square(tableone = tableone_combine %>%
filter(!`Table 1` == 'Discharged',
!`Table 1` == 'Not Discharged'),
demo_table = demo_table_combine %>%
filter(!variable == 'covid_discharged.discharged',
!variable == 'covid_discharged.not discharged'))
datatable(chisq_combine %>% arrange(p_value))
Adult Chi-Square Results
chisq_adult <- compute_chi_square(tableone = tableone_adult %>%
filter(!`Table 1` == 'Discharged',
!`Table 1` == 'Not Discharged'),
demo_table = demo_table_adult %>%
filter(!variable == 'covid_discharged.discharged',
!variable == 'covid_discharged.not discharged'))
datatable(chisq_adult %>% arrange(p_value))
Pediatric Chi-Square Results
chisq_pediatric <- compute_chi_square(tableone = tableone_pediatric %>%
filter(!`Table 1` == 'Discharged',
!`Table 1` == 'Not Discharged'),
demo_table = demo_table_pediatric %>%
filter(!variable == 'covid_discharged.discharged',
!variable == 'covid_discharged.not discharged'))
datatable(chisq_pediatric %>% arrange(p_value))
2. Conduct non-parametric anova (Kruskal-Wallis) tests for
continuous clinical variables
compute_kruskal <- function(clinical_table) {
# create empty list to save processed variables
processed_list_figs <- list()
# create a list of clinical variables to further pre-process
vars_to_process <- unique(clinical_table$name)
# for each variable, we will remove the [min, max] as before
for (i in vars_to_process) {
mod_table <- clinical_table %>%
rename("var" = name) %>%
filter(var == i) %>%
mutate(
site = toupper(site),
None = sub("(\\(.*|\\[.*)", "", None),
Peripheral = sub("(\\(.*|\\[.*)", "", Peripheral),
Central = sub("(\\(.*|\\[.*)", "", Central)
) %>%
mutate(across(None:Central, as.numeric))
processed_list_figs[[i]] <- mod_table
}
clinical_table_anova <- bind_rows(processed_list_figs)
# format the continuous variables
cont_vars = clinical_table_anova %>%
mutate(
# create a variable without mean/median prefix
grouped_var = gsub("Mean |Median | \\[Min, Max\\]| \\(SD\\)", "", var)
) %>%
pivot_longer(
cols = None:Central,
names_to = "type"
) %>%
mutate(type = factor(type, levels = c("None", "Central", "Peripheral"))) %>%
# select variables to analyze
filter(grouped_var %in% c("Elixhauser score",
"pre admission cns",
"pre admission pns",
"time to death",
"time to first discharge",
"time to severe",
"number of readmissions",
"time to first readmission")) %>%
distinct()
# create function to conduct anova
cont_var_results <- function(variable) {
df <- cont_vars %>%
filter(grouped_var == paste(variable))
# kruskal.test calculates the kruskal-Wallis H-statistic
# does not assume normality between groups
# also called the one-way ANOVA on ranks
test = kruskal.test(df$value ~ df$type)
}
# create empty list for Kruskal-Wallis results
cont_vars_list = list()
# create list of unique variables
outcome_cont_vars = unique(cont_vars$grouped_var)
for(i in outcome_cont_vars) {
test = cont_var_results(i)
X2 <- round(test$statistic, 4)
p_value <- test$p.value
df <- test$parameter
Variable = paste(i)
kw <- cbind(Variable, X2, p_value, df) %>%
data.frame()
rownames(kw) <- NULL
cont_vars_list[[i]] <- kw
}
anova_results <- bind_rows(cont_vars_list)
}
Combined Adult & Pediatric Kruskal-Wallis
Results
kw_combine <- compute_kruskal(clinical_table = clinical_table_combine)
datatable(kw_combine %>% arrange(p_value))
Adult Kruskal-Wallis Results
Note: median CNS is 0 for all sites (reason for the NA
values)
kw_adult <- compute_kruskal(clinical_table = clinical_table_adult)
datatable(kw_adult %>% arrange(p_value))
Pediatric Kruskal-Wallis Results
kw_pediatric <- compute_kruskal(clinical_table = clinical_table_pediatric)
datatable(kw_pediatric %>% arrange(p_value))
3. Combine chi-square and Kruskal-Wallis results
We will evaluate significance when controlling for False Discovery
Rate (FDR)
tableone_stats <- function(chisq_results, anova_results, population) {
demo_stats <- rbind(chisq_results %>% select(Variable, p_value),
anova_results %>% select(Variable, p_value))
# adjust p-values with FDR
demo_stats$adj_p_value <- p.adjust(demo_stats$p_value, method = "fdr")
# round & format p-value
demo_stats <- demo_stats %>%
mutate(p_value = as.numeric(p_value),
pvalue = if_else(p_value < 0.001, "< 0.001", paste(round(p_value, 3))),
adj.p_value = if_else(adj_p_value < 0.001, "< 0.001", paste(round(adj_p_value, 3))))
# tidy up the demographics stat table
demo_stats_tidy <- demo_stats %>%
rename(`Unadjusted P-value (raw)` = p_value,
`FDR Adjusted P-value (raw)` = adj_p_value,
`Unadjusted P-value (tidy)` = pvalue,
`FDR Adjusted P-value (tidy)` = adj.p_value) %>%
select(Variable, `Unadjusted P-value (raw)`, `FDR Adjusted P-value (raw)`, `Unadjusted P-value (tidy)`, `FDR Adjusted P-value (tidy)`)
write.csv(demo_stats_tidy, paste0("tables/Table1_pvals_", population, ".csv"), row.names = FALSE)
return(demo_stats_tidy)
}
4. Incorporate Comorbidities
We will also add the top 4 comorbidities for each population to our
table one. For the combined adult and pediatric table, we will use the
top 4 comorbidities of the adult population.
# define matrix for each comorbidity
comorb_matrix <- function(comorbidity_table, comorbidity, population) {
if(population=='Combined') {
comorbidity_table = comorbidity_table
} else if (population=="Adult") {
comorbidity_table = comorbidity_table %>%
filter(population == "Adult")
} else if (population=="Pediatric") {
comorbidity_table = comorbidity_table %>%
filter(population == "Pediatric")
} else {
print('incorrect population specification')
}
mat_comorb <- comorbidity_table %>%
filter(Comorbidity == paste(comorbidity)) %>%
group_by(Comorbidity) %>%
mutate(None_comorb = sum(None_Total, na.rm = TRUE),
CNS_comorb = sum(CNS_Total, na.rm = TRUE),
PNS_comorb = sum(PNS_Total, na.rm = TRUE)) %>%
ungroup() %>%
distinct(None_comorb, CNS_comorb, PNS_comorb) %>%
mutate(none_dif = neuro_pt_counts_sum$None_n - None_comorb,
pns_dif = neuro_pt_counts_sum$PNS_n - PNS_comorb,
cns_dif = neuro_pt_counts_sum$CNS_n - CNS_comorb) %>%
data.frame() %>%
#as.integer() %>%
matrix(nrow = 2, ncol = 3, byrow = TRUE)
comorb_results <- chisq.test(unlist(mat_comorb))
results_df <- data.frame(Variable = paste(comorbidity),
X2 = round(comorb_results$statistic, 4),
p_value = comorb_results$p.value,
df = comorb_results$parameter)
row.names(results_df) <- NULL
return(results_df)
}
## combined comorbidities
comorbidity_combined_chi_list <- list()
for(i in top_comorb_adults) {
comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Combined")
comorbidity_combined_chi_list[[i]] <- comorb_chi_results
}
comorbidity_combined_chi <- comorbidity_combined_chi_list %>%
bind_rows()
chisq_combine_comorb <- chisq_combine %>%
mutate(X2 = as.numeric(X2),
df = as.numeric(df)) %>%
bind_rows(comorbidity_combined_chi)
## adult comorbidities
comorbidity_adult_chi_list <- list()
for(i in top_comorb_adults) {
comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Adult")
comorbidity_adult_chi_list[[i]] <- comorb_chi_results
}
comorbidity_adult_chi <- comorbidity_adult_chi_list %>%
bind_rows()
chisq_adult_comorb <- chisq_adult %>%
mutate(X2 = as.numeric(X2),
df = as.numeric(df)) %>%
bind_rows(comorbidity_adult_chi)
## pediatric comorbidities
comorbidity_pediatric_chi_list <- list()
for(i in top_comorb_pediatrics) {
comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Pediatric")
comorbidity_pediatric_chi_list[[i]] <- comorb_chi_results
}
comorbidity_pediatric_chi <- comorbidity_pediatric_chi_list %>%
bind_rows()
chisq_pediatric_comorb <- chisq_pediatric %>%
mutate(X2 = as.numeric(X2),
df = as.numeric(df)) %>%
bind_rows(comorbidity_pediatric_chi)
Table One - Combined Analysis
combine_tableone_stats <- tableone_stats(chisq_results = chisq_combine_comorb,
anova_results = kw_combine,
population = "combined")
datatable(combine_tableone_stats)
Table One - Adult Analysis
adult_tableone_stats <- tableone_stats(chisq_results = chisq_adult_comorb,
anova_results = kw_adult,
population = "adult")
datatable(adult_tableone_stats)
Table One - Pediatric Analysis
pediatric_tableone_stats <- tableone_stats(chisq_results = chisq_pediatric_comorb,
anova_results = kw_pediatric,
population = "pediatric")
datatable(pediatric_tableone_stats)
Neurological Diagnosis Analysis
Next, we will evaluate the diagnoses captured with the CNS and PNS
groups. Specifically, we will examine the frequency of CNS and PNS
diagnoses by patient age group and by clinical outcomes (mortality and
COVID-19 severity).
1. Format neurological icd code data
# import list of ICD-9/ICD-10 codes
neuro_icds_10 <-
readxl::read_excel("public-data/2020-02-22_neuro-icd10_CNSvPNS.xlsx", sheet = 2) %>%
rename(
"icd" = `ICD-10`,
"pns_cns" = `Nervous system Involvement (1=central, 2=peripheral, 3=irrelevant)`,
"type" = `ICD-10_type (1=first three alphanumeric code only, 2=digits after decimal point)`) %>%
filter(type == 1) %>%
mutate(pns_cns = as.factor(pns_cns) %>% fct_recode(
Central = "1",
Peripheral = "2"
)) %>%
distinct(icd, `Neurological Disease Category`, pns_cns, `ICD-10 Description Revised`) %>%
rename("description" = `ICD-10 Description Revised`) %>%
mutate(concept_type = "icd-10")
neuro_icds_9 <- read.csv("public-data/icd9_tab_CNSvPNS.csv") %>%
rename(
"Neurological Disease Category" = "Neurological.Disease.Category",
"pns_cns" = `Nervous.system.Involvement..1.central..2.peripheral.`,
"icd_description" = `icd9_desc_revised`
) %>%
mutate(
pns_cns = as.factor(pns_cns) %>% fct_recode(
Central = "1",
Peripheral = "2"
),
concept_type = "DIAG-ICD9"
) %>%
select(icd, `Neurological Disease Category`, pns_cns, icd_description) %>%
rename("description" = icd_description) %>%
mutate(concept_type = "icd-9")
# bind list of ICD-9/ICD-10 codes
icds <- rbind(neuro_icds_10, neuro_icds_9)
2. Calculate total number of neurological diagnoses for each
adults & pediatrics
compute_neuro_counts <- function(sorted_sites, sample_size, is_pediatric = FALSE) {
# create empty list to save processed diagnostic data
diag_table_list <- list()
if(is_pediatric==FALSE) {
population = "adults"
} else {
population = "pediatrics"
}
# for each healthcare system, we will calculate the total number of patients with each ICD code
for (i in sorted_sites) {
tmp <- get(i)
tmp_diag <- tmp[[c(
"first_hosp_results",
"icd_tables",
paste0("icd_tables_", population),
"demo_table"
)]]
tmp_diag$site <- tmp[["site"]]
diag_table_list[[i]] <- tmp_diag %>%
filter(variable == "sex.female" | variable == "sex.male" | variable == "sex.other") %>%
select(site, contains("n_var"))
}
diag_table_wide <- bind_rows(diag_table_list)
# we will count the total number and percent across healthcare systems
diag_counts <- colSums(Filter(is.numeric, diag_table_wide), na.rm = TRUE) %>%
data.frame() %>%
mutate(perc = round(./sample_size * 100, 1))
# format diagnostic table
diag_counts <- tibble::rownames_to_column(diag_counts, "icd") %>%
mutate(icd = gsub("n_var_", "", icd)) %>%
rename(`Number of Patients` = ".") %>%
full_join(., icds, by = "icd") %>%
dplyr::arrange(desc(`Number of Patients`)) %>%
mutate(`Number of Patients (% of Cohort)` = paste(`Number of Patients`, "(", perc, ")", sep = " ")) %>%
select(pns_cns, `Neurological Disease Category`, concept_type,
icd, description, `Number of Patients (% of Cohort)`) %>%
rename(Code = "icd",
`Nervous System` = "pns_cns",
Description = "description",
`ICD Version` = "concept_type") %>%
mutate(`Number of Patients (% of Cohort)` = if_else(`Number of Patients (% of Cohort)` == "NA ( NA )", "0 ( 0 )", `Number of Patients (% of Cohort)`),
Code = if_else(Code == "NN", "No Neurological Disease (NNC)", Code))
return(diag_counts)
}
# create vectors containing patient sample sizes
n_adult <- as.numeric(tableone_adult$N)[1]
n_pediatric <- as.numeric(tableone_pediatric$N)[1]
# calculate the total neurological patient counts
neuro_counts_adult <- compute_neuro_counts(sorted_sites = adult_sites,
sample_size = n_adult,
is_pediatric = FALSE)
neuro_counts_pediatric <- compute_neuro_counts(sorted_sites = pediatric_sites,
sample_size = n_pediatric,
is_pediatric = TRUE)
neuro_counts_combined <- neuro_counts_adult %>%
rename(`Number of Adult Patients (% of Cohort)` = `Number of Patients (% of Cohort)`) %>%
left_join(., neuro_counts_pediatric %>%
select(`ICD Version`, Code, `Number of Patients (% of Cohort)`) %>%
rename("Number of Pediatric Patients (% of Cohort`)" = `Number of Patients (% of Cohort)`))
write.csv(neuro_counts_combined, paste0("tables/Supp.Table2_diagnoses", ".csv"), row.names = FALSE)
Number (%) of Adult Patients with each Neurological Diagnosis
datatable(neuro_counts_adult)
Number (%) of Pediatric Patients with each Neurological
Diagnosis
datatable(neuro_counts_pediatric)
Evaluate Diagnoses by Age & Outcome
compute_stratified_neuro_counts <- function(tableone, sorted_sites, is_pediatric = FALSE) {
# create empty list to save processed diagnostic data
diag_table_list <- list()
if(is_pediatric==FALSE) {
population = "adults"
} else {
population = "pediatrics"
}
# for each healthcare system, we will calculate the total number of patients with each ICD code
for (i in sorted_sites) {
#print(i)
tmp <- get(i)
tmp_diag <- tmp[[c(
"first_hosp_results",
"icd_tables",
paste0("icd_tables_", population),
"demo_table"
)]]
tmp_diag$site <- tmp[["site"]]
try(
diag_table_list[[i]] <- tmp_diag %>%
select(site, variable, starts_with("n_var")) %>%
pivot_longer(cols = starts_with("n_var"), names_to = "code", values_to = "freq") %>%
ungroup()
)
}
diag_table_wide_all <- bind_rows(diag_table_list) %>%
group_by(variable, code) %>%
# total number of patients who had each code for each variable across sites
mutate(total_n_var = sum(freq, na.rm = TRUE)) %>%
distinct(variable, code, total_n_var) %>%
mutate(code = gsub("n_var_", "", code),
variable = gsub("Severity.", "", variable),
variable = gsub("Survival.", "", variable),
variable = gsub("readmitted", "", variable),
variable = gsub("age_group", "", variable),
variable = gsub("covid_discharged", "", variable),
variable = if_else(variable == ".TRUE", "Readmitted", variable)) %>%
left_join(., icds %>% rename(code = "icd"), by = "code") %>%
ungroup() %>%
filter(!concept_type == "icd-9")
# refactor neuro status
diag_table_wide_all$pns_cns <- factor(diag_table_wide_all$pns_cns,
levels=c("Central", "Peripheral"),
labels=c("CNS", "PNS"))
return(diag_table_wide_all)
}
stratified_neuro_counts_adult <- compute_stratified_neuro_counts(sorted_sites = adult_sites,
is_pediatric = FALSE,
tableone = tableone_adult)
stratified_neuro_counts_pediatric <- compute_stratified_neuro_counts(sorted_sites = pediatric_sites,
is_pediatric = TRUE,
tableone = tableone_pediatric)
compute_stratified_neuro_counts_age <- function(stratified_neuro_counts, tableone) {
stratified_neuro_counts$variable <- factor(stratified_neuro_counts$variable,
levels=c(".00to02", ".03to05", ".06to11", ".12to17",
".18to25", ".26to49", ".50to69", ".70to79", ".80plus"),
labels=c("0-2 Years", "3-5 Years", "6-11 Years", "12-17 Years",
"18-25 Years", "26-49 Years", "50-69 Years", "70-79 Years", "80+ Years"))
# total counts
n_0_2 <- get_table_n(tableone, var = "0-2")
n_3_5 = get_table_n(tableone, var = "3-5")
n_6_11 = get_table_n(tableone, var = "6-11")
n_12_17 = get_table_n(tableone, var = "12-17")
n_18_25 = get_table_n(tableone, var = "18-25")
n_26_49 = get_table_n(tableone, var = "26-49")
n_50_69 = get_table_n(tableone, var = "50-69")
n_70_79 = get_table_n(tableone, var = "70-79")
n_80 = get_table_n(tableone, var = "80+")
age_df <- data.frame("variable" = c("0-2 Years",
"3-5 Years",
"6-11 Years",
"12-17 Years",
"18-25 Years",
"26-49 Years",
"50-69 Years",
"70-79 Years",
"80+ Years"),
"n_var" = c(n_0_2,
n_3_5,
n_6_11,
n_12_17,
n_18_25,
n_26_49,
n_50_69,
n_70_79,
n_80))
stratified_neuro_counts <- stratified_neuro_counts %>%
filter(grepl("Years", variable),
!code == "NN",
!total_n_var == 0) %>%
# total_n would be calculating total codes across each age group which is not true reflection of patient counts b/c patients can have more than one code
#mutate(total_n = sum(total_n_var)) %>%
ungroup() %>%
left_join(., age_df, by = "variable") %>%
mutate(n_var = as.numeric(n_var),
perc = as.character(round(total_n_var/n_var*100,1)),
perc = if_else(perc < 0.1, '<0.1', perc),
perc = paste0(perc, "%"))
return(stratified_neuro_counts)
}
stratified_neuro_counts_age_adult <- compute_stratified_neuro_counts_age(stratified_neuro_counts = stratified_neuro_counts_adult,
tableone = tableone_adult)
stratified_neuro_counts_age_pediatric <- compute_stratified_neuro_counts_age(stratified_neuro_counts = stratified_neuro_counts_pediatric,
tableone = tableone_pediatric)
Diagnoses by Adult and Pediatric Populations
n_0_2 <- get_table_n(tableone_combine, var = "0-2")
#n_3_5 <- get_table_n(tableone_combine, var = "3-5")
n_6_11 <- get_table_n(tableone_combine, var = "6-11")
n_12_17 <- get_table_n(tableone_combine, var = "12-17")
total_ped_age_count = as.numeric(n_0_2) + as.numeric(n_6_11) + as.numeric(n_12_17)
stratified_neuro_counts_age_pediatric_reformat = stratified_neuro_counts_age_pediatric %>%
select(variable, pns_cns, description, n_var, total_n_var) %>%
mutate(variable = "0-17 Years") %>%
group_by(pns_cns, description) %>%
mutate(total_n_var = sum(total_n_var, na.rm = TRUE)) %>%
ungroup() %>%
# n_var is the total per `age_group` stratification
select(-n_var) %>%
distinct() %>%
mutate(perc = round(total_n_var/total_ped_age_count*100,1),
perc = if_else(perc < 0.1, '<0.1', as.character(perc)),
perc = paste0(perc, "%"))
stratified_neuro_counts_age_combined <- stratified_neuro_counts_age_adult %>%
select(variable, pns_cns, description, total_n_var, perc) %>%
rbind(., stratified_neuro_counts_age_pediatric_reformat %>%
select(variable, pns_cns, description, total_n_var, perc))
stratified_neuro_counts_age_combined$variable <- factor(stratified_neuro_counts_age_combined$variable,
levels=c("0-17 Years", "18-25 Years", "26-49 Years", "50-69 Years", "70-79 Years", "80+ Years"))
group.colors <- c(NNC = "gray", PNS = "slateblue", CNS ="tomato")
adult_ped_combined_plot <- ggplot(stratified_neuro_counts_age_combined,
aes(x = total_n_var, y = reorder(description,
total_n_var),
fill = pns_cns)) +
geom_bar(stat = "identity") +
geom_text(
aes(x = total_n_var, y = description, label = perc),
hjust = -0.1,
#hjust="inward",
size = 6,
inherit.aes = TRUE) +
scale_x_continuous(expand = expansion(mult = c(0,0.2))) + # to ensure percent labels are within the figure
facet_grid(pns_cns ~ variable, scales = "free", space = "free_y") +
scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +
scale_fill_manual(values = group.colors) +
xlab("Number of Patients") +
theme(legend.position = "none",
strip.text.y = element_text(size = 40),
strip.text.x = element_text(size = 35),
axis.text.y.left = element_text(size = 25, face = "bold"),
axis.text.x = element_text(size = 25),
axis.title.x = element_text(size = 35))
ggsave("figures/Fig3_diagnoses_by_age_adult_ped_18group.png", adult_ped_combined_plot, width = 40, height = 19, dpi = 300)
Evaluate Diagnoses by Outcome
stratified_neuro_counts_adult$population = "adult"
stratified_neuro_counts_pediatric$population = "pediatric"
stratified_neuro_counts_combine <- rbind(stratified_neuro_counts_adult, stratified_neuro_counts_pediatric)
## determine total number of patients who died or severe
n_death_adult = get_table_n(df = tableone_adult, var = "Deceased") %>% as.numeric()
n_severe_adult = get_table_n(df = tableone_adult, var = "Severe") %>% as.numeric()
n_death_pediatric = get_table_n(df = tableone_pediatric, var = "Deceased") %>% as.numeric()
n_severe_pediatric = get_table_n(df = tableone_pediatric, var = "Severe") %>% as.numeric()
stratified_neuro_counts_outcomes <- stratified_neuro_counts_combine %>%
filter(variable %in% c("Severe", "Deceased"),
!code == "NN",
!total_n_var == 0) %>%
# total_n would be calculating total codes across each outcome group which is not true reflection of patient counts b/c patients can have more than one code
#mutate(total_n = sum(total_n_var)) %>%
group_by(variable, description, population) %>%
mutate(total_n_var = sum(total_n_var)) %>%
ungroup() %>%
mutate(perc = case_when(variable == "Severe" & population == "adult" ~ round(total_n_var/n_severe_adult*100,1),
variable == "Deceased" & population == "adult" ~ round(total_n_var/n_death_adult*100,1),
variable == "Severe" & population == "pediatric" ~ round(total_n_var/n_severe_pediatric*100,1),
variable == "Deceased" & population == "pediatric" ~ round(total_n_var/n_death_pediatric*100,1)),
perc = if_else(perc < 0.1, '<0.1', as.character(perc)),
perc = paste0(perc, "%")) %>%
select(-code, -concept_type) %>%
distinct()
adult_outcomes = ggplot(stratified_neuro_counts_outcomes %>%
filter(population == "adult") %>%
mutate(variable = as.factor(variable) %>%
fct_recode(
Mortality = "Deceased")),
aes(x = total_n_var, y = reorder(description, total_n_var), fill = pns_cns)) +
geom_bar(stat = "identity") +
geom_text(
aes(x = total_n_var, y = description, label = perc),
hjust = -0.1,
#hjust="inward",
size = 5,
inherit.aes = TRUE) +
scale_x_continuous(expand = expansion(mult = c(0,0.2))) +
facet_grid(pns_cns ~ variable, scales = "free", space = "free_y") +
scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +
scale_fill_manual(values = group.colors) +
xlab("Number of Patients") +
ggtitle("B. Adult") +
theme(legend.position = "none",
plot.title = element_text(size = 44, face = "bold", hjust = -0.5),
strip.text.y = element_text(size = 35),
strip.text.x = element_text(size = 30),
axis.text.y.left = element_text(size = 20, face = "bold"),
axis.text.x = element_text(size = 15),
axis.title.x = element_text(size = 35))
ggsave("figures/SuppFig1_diagnoses_by_outcomes_adult.png", adult_outcomes, width = 27, height = 15, dpi = 300)
pediatric_outcomes = ggplot(stratified_neuro_counts_outcomes %>%
filter(population == "pediatric") %>%
mutate(variable = as.factor(variable) %>%
fct_recode(
Mortality = "Deceased")),
aes(x = total_n_var, y = reorder(description, total_n_var), fill = pns_cns)) +
geom_bar(stat = "identity") +
geom_text(
aes(x = total_n_var, y = description, label = perc),
hjust = -0.1,
#hjust="inward",
size = 5,
inherit.aes = TRUE) +
scale_x_continuous(expand = expansion(mult = c(0,0.2))) +
facet_grid(pns_cns ~ variable, scales = "free", space = "free_y") +
scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +
scale_fill_manual(values = group.colors) +
xlab("") +
ggtitle("A. Pediatric") +
theme(legend.position = "none",
plot.title = element_text(size = 44, face = "bold", hjust = -0.5),
strip.text.y = element_text(size = 35),
strip.text.x = element_text(size = 30),
axis.text.y.left = element_text(size = 20, face = "bold"),
axis.text.x = element_text(size = 15),
axis.title.x = element_text(size = 35))
ggsave("figures/SuppFig1_diagnoses_by_outcomes_pediatric.png", pediatric_outcomes, width = 27, height = 9, dpi = 300)
Evaluate patients with “Both” CNS and PNS diseases
These patients were excluded from the primary analysis due to
potential confounding.
# create empty list to store the counts of "both" patients
both_counts_list <- list()
# for each healthcare system, retrieve the number of excluded "both" patients recorded
for (i in sorted_sites) {
n_both <- results[[paste(i)]][["first_hosp_results"]][["both_counts"]]
both_counts_list[[i]] <- n_both
}
both_counts <- bind_rows(both_counts_list) %>% t() %>% data.frame() %>% sum()
print(paste(both_counts, "(", round(both_counts/(as.numeric(tableone_combine$N[1])+both_counts)*100,2), "%)"))
## [1] "1990 ( 1.84 %)"
Comorbidity Analysis
Next, we will evaluate the frequency of comorbidities in our
population and the associated risk that comorbidities may incur for
acquiring a CNS or PNS diagnosis during COVID-19 hospitalization.
1. Create a table of counts for each comorbidity and
stratified by neurological status.
comorbidity_table_adult <- comorb_table_wide %>%
select(Comorbidity, population, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
filter(population == "Adult") %>%
rename(N = "Comorb_Total",
NNC = "NNC_N_%",
CNS = "CNS_N_%",
PNS = "PNS_N_%") %>%
mutate(N = paste0(N, " (", round(N/n_adult, 2)*100, "%)"))
comorbidity_table_pediatric <- comorb_table_wide %>%
select(Comorbidity, population, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
filter(population == "Pediatric") %>%
rename(N = "Comorb_Total",
NNC = "NNC_N_%",
CNS = "CNS_N_%",
PNS = "PNS_N_%") %>%
mutate(N = paste0(N, " (", round(N/n_pediatric, 2)*100, "%)"))
supp_comorbidity_table <- rbind(comorbidity_table_adult, comorbidity_table_pediatric) %>%
arrange(Comorbidity)
write.csv(supp_comorbidity_table, "tables/SuppTable_Comorbidity_Counts.csv", row.names = FALSE)
2. Create vector counts of patients with comorbidity
data
neuro_pt_counts_sum_adult <- neuro_pt_counts %>%
filter(population == "Adult") %>%
select(-population)
neuro_pt_counts_sum_pediatric <- neuro_pt_counts %>%
filter(population == "Pediatric") %>%
select(-population)
none_n_adult = neuro_pt_counts_sum_adult$None_n
cns_n_adult = neuro_pt_counts_sum_adult$CNS_n
pns_n_adult = neuro_pt_counts_sum_adult$PNS_n
none_n_pediatric = neuro_pt_counts_sum_pediatric$None_n
cns_n_pediatric = neuro_pt_counts_sum_pediatric$CNS_n
pns_n_pediatric = neuro_pt_counts_sum_pediatric$PNS_n
3. Calculate the relative risk (RR) of developing a CNS or
PNS diagnosis for patients with each comorbidity
calculate_rr <- function(is_pediatric = FALSE) {
if(is_pediatric == FALSE) {
cns_n = cns_n_adult
pns_n = pns_n_adult
neuro_pt_counts_sum = neuro_pt_counts_sum_adult
pop = "Adult"
# create a list of comorbidities
list_of_comorbs <- comorb_table_wide %>%
filter(population == paste(pop)) %>%
distinct(Comorbidity)
list_of_comorbs <- list_of_comorbs$Comorbidity
} else {
cns_n = cns_n_pediatric
pns_n = pns_n_pediatric
neuro_pt_counts_sum = neuro_pt_counts_sum_pediatric
pop = "Pediatric"
# create a list of comorbidities
list_of_comorbs <- comorb_table_wide %>%
filter(population == paste(pop)) %>%
distinct(Comorbidity) %>%
as.vector()
list_of_comorbs <- list_of_comorbs$Comorbidity
}
# create empty list to save relative risk calculations
rr_calcs <- list()
# for each comorbidity, we will calculate the relative risk of having a CNS and PNS diagnosis
for (i in list_of_comorbs) {
# https://www.cdc.gov/csels/dsepd/ss1978/lesson3/section5.html
# create matrix as:
# exposed group - with outcome, without outcome
# non exposed group - with outcome, without outcome
# where variables ending with `_Total` are the total number of CNS, PNS, or NNC patients with the respective comorbidity.
# variables ending with `_n` are the total numbers in the entire population
# to calculate confidence intervals
#https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_confidence_intervals/bs704_confidence_intervals8.html
rr_cns <- comorb_table_wide %>%
filter(population == paste(pop),
Comorbidity == paste(i)) %>%
mutate(a = CNS_Total, # number CNS w/comorb
b = Comorb_Total - CNS_Total, # number of comorb pts w/o CNS
c = cns_n - CNS_Total, # number of CNS pts w/o comorb
d = sum(neuro_pt_counts_sum) - Comorb_Total - c, # number of pts w/o comorb or CNS
risk_cns = a/(a+b),
risk_non_cns = c/(c+d),
rr_CNS = risk_cns/risk_non_cns,
# for confidence intervals
ci_1 = (b/a)/(a+b), # (n1-x1)/n1
ci_2 = (d/c)/(c+d), # (n2-x2)/n2
ci_root = sqrt(ci_1 + ci_2),
ci_lower = log(rr_CNS) - (1.96*ci_root),
ci_upper = log(rr_CNS) + (1.96*ci_root),
l_ci_cns = exp(ci_lower),
u_ci_cns = exp(ci_upper)) %>%
select(Comorbidity, Comorb_Total, None_Total, CNS_Total, PNS_Total, rr_CNS, l_ci_cns, u_ci_cns)
rr_pns <- comorb_table_wide %>%
filter(population == paste(pop),
Comorbidity == paste(i)) %>%
mutate(a = PNS_Total, # number PNS w/comorb
b = Comorb_Total - PNS_Total, # number of comorb pts w/o PNS
c = pns_n - PNS_Total, # number of PNS pts w/o comorb
d = sum(neuro_pt_counts_sum) - Comorb_Total - c, # number of pts w/o comorb or PNS
risk_pns = a/(a+b),
risk_non_pns = c/(c+d),
rr_PNS = risk_pns/risk_non_pns,
# for confidence intervals
ci_1 = (b/a)/(a+b),
ci_2 = (d/c)/(c+d),
ci_root = sqrt(ci_1 + ci_2),
ci_lower = log(rr_PNS) - (1.96*ci_root),
ci_upper = log(rr_PNS) + (1.96*ci_root),
l_ci_pns = exp(ci_lower),
u_ci_pns = exp(ci_upper)) %>%
select(Comorbidity, rr_PNS, l_ci_pns, u_ci_pns)
rr <- rr_cns %>%
left_join(., rr_pns, by = c("Comorbidity"))
rr_calcs[[i]] <- rr
}
rr_results <- bind_rows(rr_calcs) %>%
mutate(rr_CNS = round(rr_CNS, 2),
l_ci_cns = round(l_ci_cns, 2),
u_ci_cns = round(u_ci_cns, 2),
rr_PNS = round(rr_PNS, 2),
l_ci_pns = round(l_ci_pns, 2),
u_ci_pns = round(u_ci_pns, 2),
`RR CNS (95% CI)` = paste(rr_CNS, "(", l_ci_cns, ",", u_ci_cns, ")"),
`RR PNS (95% CI)` = paste(rr_PNS, "(", l_ci_pns, ",", u_ci_pns, ")")) %>%
select(Comorbidity, rr_CNS, l_ci_cns, u_ci_cns, `RR CNS (95% CI)`,
rr_PNS, l_ci_pns, u_ci_pns, `RR PNS (95% CI)`)
# tidy up data
write.csv(rr_results %>%
arrange(desc(rr_CNS)) %>%
select(Comorbidity, `RR CNS (95% CI)`, `RR PNS (95% CI)`),
paste0("tables/Supp.Table4_RR_", pop, ".csv"), row.names = FALSE)
# reformat data
rr_results_tidy_rr <- rr_results %>%
select(Comorbidity, rr_CNS, rr_PNS) %>%
pivot_longer(cols = rr_CNS:rr_PNS, names_to = "Neuro Status", values_to = "RR") %>%
mutate(`Neuro Status` = if_else(`Neuro Status` == "rr_CNS", "CNS", "PNS"))
rr_results_tidy_l_ci <- rr_results %>%
select(Comorbidity, l_ci_cns, l_ci_pns) %>%
pivot_longer(cols = l_ci_cns:l_ci_pns, names_to = "Neuro Status", values_to = "l_CI") %>%
mutate(`Neuro Status` = if_else(`Neuro Status` == "l_ci_cns", "CNS", "PNS"))
rr_results_tidy_u_ci <- rr_results %>%
select(Comorbidity, u_ci_cns, u_ci_pns) %>%
pivot_longer(cols = u_ci_cns:u_ci_pns, names_to = "Neuro Status", values_to = "u_CI") %>%
mutate(`Neuro Status` = if_else(`Neuro Status` == "u_ci_cns", "CNS", "PNS"))
# combine all tidy results
rr_results_tidy <- rr_results_tidy_rr %>%
left_join(., rr_results_tidy_l_ci,
by = c("Comorbidity", "Neuro Status")) %>%
left_join(., rr_results_tidy_u_ci,
by = c("Comorbidity", "Neuro Status"))
# order by CNS diagnoses with higher RR
cns_top_results <- rr_results_tidy %>%
filter(`Neuro Status` == "CNS") %>%
arrange(RR) %>%
select(Comorbidity)
rr_results_tidy$Comorbidity <- ordered(rr_results_tidy$Comorbidity, levels = cns_top_results$Comorbidity)
cns_rr_plot <- ggplot(rr_results_tidy %>%
filter(`Neuro Status` == "CNS"),
aes(x = RR, y = Comorbidity)) +
geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
geom_errorbarh(aes(xmax = u_CI, xmin = l_CI), size = 0.7, height = 0.2, color = "gray50") +
geom_point(size = 1.5, color = "tomato") +
scale_x_continuous(breaks = seq(0, 4, 1), labels = seq(0, 4, 1),
limits = c(0,4)) +
ylab("") +
xlab("Relative Risk") +
ggtitle("CNS Patients") +
theme(axis.text.y = element_text(face = "bold"))
# arrange by top PNS results
pns_top_results <- rr_results_tidy %>%
filter(`Neuro Status` == "PNS") %>%
arrange(RR) %>%
select(Comorbidity)
rr_results_tidy$Comorbidity <- ordered(rr_results_tidy$Comorbidity, levels = pns_top_results$Comorbidity)
pns_rr_plot <- ggplot(rr_results_tidy %>%
filter(`Neuro Status` == "PNS"),
aes(x = RR, y = Comorbidity)) +
geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
geom_errorbarh(aes(xmax = u_CI, xmin = l_CI), size = 0.70, height = 0.2, color = "gray50") +
geom_point(size = 1.5, color = "slateblue") +
scale_x_continuous(breaks = seq(0, 1.5, 0.5), labels = seq(0, 1.5, 0.5),
limits = c(0,1.5)) +
ylab("") +
xlab("Relative Risk") +
ggtitle("PNS Patients") +
theme(axis.text.y = element_text(face = "bold"))
pns_rr_plot <- set_panel_size(pns_rr_plot,
width = unit(7, "cm"),
height = unit(6.5, "in"))
cns_rr_plot <- set_panel_size(cns_rr_plot,
width = unit(7, "cm"),
height = unit(6.5, "in"))
#grid.arrange(cns_rr_plot, pns_rr_plot, ncol=2)
ggsave(paste0("figures/Figure4.relative_risk_", pop, ".png"), grid.arrange(cns_rr_plot, pns_rr_plot, ncol=2), width = 15, height = 8, dpi = 300)
}
Relative Risk (RR)
Here we plot the relative risk of acquiring a CNS or PNS diagnosis in
adult patients who have each comorbidity of the ECI compared to those
who do not.
We are not able to compute this analysis in pediatric patients, due
to the overall low incidence of comorbidities and neurological illness
during COVID-19 hospitalization.
LPCA deviance explained
During the local analysis for each participating healthcare system,
we computed the top 10 principal components using logistic principal
component analysis (LPCA). LPCA was performed on the matrix of npatients
x 29 ECI comorbidities. Across healthcare systems, 10 principal
components explained at least 75% of the model deviance.
This supplementary analysis was performed in order to determine the
best way to adjust for each patient’s comorbidity burden in our
downstream survival models.
compare_deviance <- function(sorted_sites, is_pediatric=FALSE) {
pca_dev <- list()
if(is_pediatric==FALSE) {
population = "adults"
sorted_sites = sorted_sites[!sorted_sites%in% c("GOSH_results", "BCH_results")]
} else {
population = "pediatrics"
}
# for each healthcare system, we will calculate the total number of patients with each ICD code
for (i in sorted_sites) {
print(i)
tmp <- get(i)
tmp_pca <- tmp[[c(
"comorbidities",
paste0("comorb_", population),
"deviance_expl"
)]] %>%
data.frame() %>%
rename(dev_expl = '.')
tmp_pca$site <- tmp[["site"]]
pca_dev[[i]] <- tmp_pca
}
pca_df <- bind_rows(pca_dev)
return(pca_df)
}
pca_adult <- compare_deviance(sorted_sites = adult_sites, is_pediatric = FALSE)
## [1] "APHP_results"
## [1] "FRBDX_results"
## [1] "UKFR_results"
## [1] "ICSM_results"
## [1] "HPG23_results"
## [1] "NUH_results"
## [1] "MGB_results"
## [1] "UPENN_results"
## [1] "UMICH_results"
## [1] "NWU_results"
## [1] "UPITT_results"
## [1] "H12O_results"
## [1] "UKY_results"
## [1] "VA1_results"
## [1] "VA2_results"
## [1] "VA3_results"
## [1] "VA4_results"
## [1] "VA5_results"
## [1] "UCLA_results"
pca_data <- bind_rows(pca_adult)
theme_set(theme_classic())
pca_plot <- ggplot(pca_data,
aes(x = fct_reorder(site, -dev_expl), y = dev_expl, group = 1)) +
geom_point(size = 2.5) +
geom_line() +
xlab("Healthcare System") +
ylab("Proportion of Deviance explained") +
ylim(0,1) +
theme(axis.text.x=element_text(angle=90, hjust=1, face = "bold", size = 12),
axis.text.y=element_text(face = "bold"),
axis.title.y = element_text(face = "bold", size = 15)); pca_plot
ggsave("figures/SuppFig_pca_deviance.png", pca_plot, height = 6, width = 6)
---
title: "**Patient Characteristics**"
author: "Meg Hutch, Ji Son, Trang Le, Chuan Hong, Xuan Wang, Zahra Shakeri"
date: "04/11/2023"
output:
  html_document:
    toc: true
    toc_float: true
    code_download: true
    theme: spacelab
---

This notebook describes the **106,229** adult and pediatric hospitalized COVID-19 positive patients that were studied as part of the [Consortium for Clinical Characterization of COVID-19 by EHR](https://covidclinical.net/) Neurology group. This analysis contains the results of patients from 21 different healthcare systems spanning 6 countries.

<center>![](docs/assets/neuro-stratification-diagram.png){width="373"}</center>

```{r message=FALSE, warning=FALSE}
library(tidyverse)
library(gridGraphics)
library(gridExtra)
library(ggpubr)
library(DT)
library(kableExtra)
library(cowplot)
library(RColorBrewer)
library(glue)
library(egg)
source("R/plot_theme.R")
source("R/demo_tables.R")
source("R/utils.R")
```

# **Import Data from each Healthcare System**

Each participating healthcare system ran the analysis locally on patient level data using our [customized R package](https://github.com/covidclinical/Phase2.1NeuroRPackage). The output of each local analysis consisted of aggregated counts and summary statistics (means, medians, hazard ratios, p-values, etc). Summary results from each healthcare system were then aggregated and are imported and then combined using the following code:

```{r message=FALSE, warning=FALSE}
# read in files from results folder 
# this folder contains all of the local healthcare system level analyses
rdas <- list.files(
  path = "results",
  pattern = ".rda",
  full.names = TRUE
)


for (rda in rdas) {
  load(rda)
}

rm(rdas, rda)

# create a list of participating healthcare systems from our study tracking spreadsheet
site_google_url <- "https://docs.google.com/spreadsheets/d/1epcYNd_0jCUMktOHf8mz5v651zy1JALD6PgzobrGWDY/edit?usp=sharing"

# load site parameters
site_params <- googlesheets4::read_sheet(site_google_url, sheet = 1)
site_avails <- googlesheets4::read_sheet(site_google_url, sheet = 2)

# filter the list of sites who ran the analysis
sorted_sites <- site_avails %>%
  filter(!is.na(date_v4_received)) %>%
  pull(siteid) %>%
  paste("results", sep = "_")

# list sites without race
sites_wo_race <- site_params %>%
  filter(!include_race) %>%
  pull(siteid)

# combine all rda files with 'results' in name
results <- mget(ls(pattern = "results"))

## load in the pre-processed comorbidity and meta-data
load("processed/comorbidity_table_cnps.rda")
load("processed/neuro_pt_counts.rda") 
```

# **Pre-processing**

The following code pre-processes the demographic and clinical course data in order to construct a Table 1.

**1. Format primary demographic characteristics**

```{r message=FALSE, warning=FALSE}
# create list of hospitals for adult and pediatric analyses
adult_sites = sorted_sites[!sorted_sites %in% c("BCH_results", "GOSH_results")]

pediatric_sites = sorted_sites[!sorted_sites %in% c("VA1_results", "VA2_results", "VA3_results", "VA4_results", "VA5_results")]

demo_table_adult <- create_demo_tableone(sorted_sites = adult_sites, is_pediatric = FALSE)

demo_table_pediatric <- create_demo_tableone(sorted_sites = pediatric_sites, is_pediatric = TRUE)

demo_table_combine <- rbind(demo_table_adult, demo_table_pediatric)
```

**2. Format clinical characteristics (i.e: continuous variables such as median time to discharge)**

```{r message=FALSE, warning=FALSE}
clinical_table_adult <- create_clinical_tableone(sorted_sites = adult_sites, is_pediatric = FALSE)

clinical_table_pediatric <- create_clinical_tableone(sorted_sites = pediatric_sites, is_pediatric = TRUE)

clinical_table_combine <- rbind(clinical_table_adult, clinical_table_pediatric)
```

**3. Conduct additional pre-processing of clinical variables**

```{r message=FALSE, warning=FALSE}
clinical_table_adult_clean <- clean_clinical_tables(clinical_table = clinical_table_adult)

clinical_table_pediatric_clean <- clean_clinical_tables(clinical_table = clinical_table_pediatric)

clinical_table_clean_combine <- rbind(clinical_table_adult_clean, clinical_table_pediatric_clean)
```

**4. Add Comorbidity data**

Here we create separate lists of the top 4 comorbidities in the adult and pediatric populations

```{r}
comorbidity_table_adults <- comorb_table_wide %>%
  filter(population == "Adult") %>% 
  arrange(desc(Comorb_Total)) %>% 
  slice(1:4) # return top comorbidities

comorbidity_table_pediatric <- comorb_table_wide %>%
  filter(population == "Pediatric") %>% 
  arrange(desc(Comorb_Total)) %>% 
  slice(1:4) # return top comorbidities

top_comorb_adults <- comorbidity_table_adults$Comorbidity
top_comorb_pediatrics <- comorbidity_table_pediatric$Comorbidity
```

# **Demographics**

## Table One - Combined

The below table contains the counts and summary stats of the entire cohort (adult & pediatric patients).

```{r}
neuro_pt_counts_sum <- colSums(neuro_pt_counts[,-1]) %>% 
  data.frame() %>% 
  t() %>% 
  data.frame()

# specify the order of Table 1
row_order_adult <- c(
  "All Patients", "Female", "Male", "Unknown Sex",
  "0-2", "3-5", "6-11", "12-17", "18-25",
  "26-49", "50-69", "70-79", "80+", "Unknown Age",
  "American Indian", "Asian", "Black",
  "Hawaiian/Pacific Islander",
  "Hispanic/Latino", "White", "Other",
  top_comorb_adults, 
  "Median Elixhauser score  (sd) ",
  "Median pre admission cns  (sd) ",
  "Median pre admission pns  (sd) ",
  "Non-Severe", "Severe",
  "Median time to severe  (sd) ",
  "Alive", "Deceased",
  "Median time to death  (sd) ",
  "Discharged", "Not Discharged",
  "Median time to first discharge  (sd) ",
  "Not Readmitted", "Readmitted",
  "Median time to first readmission  (sd) ",
  "Median number of readmissions  (sd) "
  )


tableone_combine <- create_tableone(demo_table = demo_table_combine,
                                    clinical_table = clinical_table_clean_combine) %>%
  rbind(., comorb_table_wide %>%
          group_by(Comorbidity) %>%
          mutate(Comorb_Total = sum(Comorb_Total, na.rm = TRUE),
          None_sum = sum(None_Total, na.rm = TRUE),
          CNS_sum = sum(CNS_Total, na.rm = TRUE),
          PNS_sum = sum(PNS_Total, na.rm = TRUE),
          None_perc = round(None_sum/neuro_pt_counts_sum$None_n*100,1),
          Central_perc = round(CNS_sum/neuro_pt_counts_sum$CNS_n*100,1),
          Peripheral_perc = round(PNS_sum/neuro_pt_counts_sum$PNS_n*100,1),
          None = paste(None_sum, "(", None_perc, ")"),
          Central = paste(CNS_sum, "(", Central_perc, ")"),
          Peripheral = paste(PNS_sum, "(", Peripheral_perc, ")")) %>%
          distinct(Comorbidity, Comorb_Total, None, Central, Peripheral) %>%
          rename(`Table 1` = "Comorbidity",
          N = "Comorb_Total")) %>%
  slice(match(row_order_adult, `Table 1`))

kbl(tableone_combine %>%
      rename("No Neurological Condition (NNC)" = None)) %>%
      add_header_above(c(
      " ",
      "Total Patients" = 1,
      "Neurological Disease" = 3
      )) %>%
      kable_paper("striped", full_width = F) %>%
      pack_rows("Sex", 2, 4) %>%
      pack_rows("Age", 5, 13) %>%
      pack_rows("Race & Ethnicity", 14, 20) %>%
      pack_rows("Past Medical History", 21, 27) %>%
      pack_rows("Severity", 28, 30) %>%
      pack_rows("Survival", 31, 33) %>%
      pack_rows("Discharge", 34, 36) %>%
      pack_rows("Readmission", 37, 40)

write.csv(tableone_combine, 'tables/table1_combined.csv', row.names = FALSE)
```

## Table One - Adults

```{r}
tableone_adult <- create_tableone(demo_table = demo_table_adult,
                                  clinical_table = clinical_table_adult_clean) %>%
                                  rbind(., comorbidity_table_adults %>%
                                          select(Comorbidity, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
                                          rename(`Table 1` = "Comorbidity",
                                                 N = "Comorb_Total",
                                                 None = "NNC_N_%",
                                                 Central = "CNS_N_%",
                                                 Peripheral = "PNS_N_%")) %>% 
  slice(match(row_order_adult, `Table 1`))

kbl(tableone_adult %>%
    rename("No Neurological Condition (NNC)" = None)) %>%
  add_header_above(c(
  " ",
  "Total Patients" = 1,
  "Neurological Disease" = 3
  )) %>%
  kable_paper("striped", full_width = F) %>%
  pack_rows("Sex", 2, 4) %>%
  pack_rows("Age", 5, 9) %>%
  pack_rows("Race & Ethnicity", 10, 16) %>%
  pack_rows("Past Medical History", 17, 23) %>%
  pack_rows("Severity", 24, 26) %>%
  pack_rows("Survival", 27, 29) %>%
  pack_rows("Discharge", 30, 32) %>%
  pack_rows("Readmission", 33, 36)

write.csv(tableone_adult, 'tables/table1_adults.csv', row.names = FALSE)
```

## Table One - Pediatrics

```{r}
# specify the order of Table 1
row_order_pediatric <- c(
  "All Patients", "Female", "Male", "Unknown Sex",
  "0-2", "3-5", "6-11", "12-17", "18-25",
  "26-49", "50-69", "70-79", "80+", "Unknown Age",
  "American Indian", "Asian", "Black",
  "Hawaiian/Pacific Islander",
  "Hispanic/Latino", "White", "Other",
  top_comorb_pediatrics, 
  "Median Elixhauser score  (sd) ",
  "Median pre admission cns  (sd) ",
  "Median pre admission pns  (sd) ",
  "Non-Severe", "Severe",
  "Median time to severe  (sd) ",
  "Alive", "Deceased",
  "Median time to death  (sd) ",
  "Discharged", "Not Discharged",
  "Median time to first discharge  (sd) ",
  "Not Readmitted", "Readmitted",
  "Median time to first readmission  (sd) ",
  "Median number of readmissions  (sd) "
  )

tableone_pediatric <- create_tableone(demo_table = demo_table_pediatric,
                                      clinical_table = clinical_table_pediatric_clean)  %>%
  rbind(., comorbidity_table_pediatric %>%
          select(Comorbidity, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>% 
          rename(`Table 1` = "Comorbidity",
                  N = "Comorb_Total",
                  None = "NNC_N_%",
                  Central = "CNS_N_%",
                  Peripheral = "PNS_N_%")) %>% 
  slice(match(row_order_pediatric, `Table 1`))

kbl(tableone_pediatric %>%
      rename("No Neurological Condition (NNC)" = None)) %>%
  add_header_above(c(
  " ",
  "Total Patients" = 1,
  "Neurological Disease" = 3
  )) %>%
  kable_paper("striped", full_width = F) %>%
  pack_rows("Sex", 2, 4) %>%
  pack_rows("Age", 5, 8) %>%
  pack_rows("Race & Ethnicity", 9, 15) %>%
  pack_rows("Past Medical History", 16, 22) %>%
  pack_rows("Severity", 23, 25) %>%
  pack_rows("Survival", 26, 28) %>%
  pack_rows("Discharge", 29, 31) %>%
  pack_rows("Readmission", 32, 35)

write.csv(tableone_pediatric, 'tables/table1_pediatric.csv', row.names = FALSE)
```

# **Demographic & Clinical Course Analysis**

Next we will evaluate characteristics among patients stratified by neurological status: 1) patients with no neurological condition (NNC), 2) patients with a central nervous system condition (CNS), and 3) patients with a peripheral nervous system condition (PNS).

Categorical variables will be evaluated with chi-square tests, while continuous variables will be evaluated with a non-parametric anova (Kruskal-Wallis) test.

**1. Conduct chi-squared tests for categorical demographic variables**

```{r}
compute_chi_square <- function(tableone, demo_table) {
  
  N = tableone %>% select(N) %>% head(1) %>% as.numeric()

  # sum of all neuro condition groups
  cns_n = as.numeric(gsub( " .*$", "", tableone[1,"Central"]))
  pns_n = as.numeric(gsub( " .*$", "", tableone[1,"Peripheral"]))
  none_n = as.numeric(gsub( " .*$", "", tableone[1,"None"]))
  
  # sum up the total counts of each demographic variable across healthcare systems
  tableOne_sums <- demo_table %>%
    group_by(variable, Demo_var, Demo_var_i) %>%
    summarise(across(
      starts_with("n_var"),
      function(x) sum(x, na.rm = TRUE)
    ), .groups = "drop")
  
  # create list of demographic/clinical variables
  vars = tableOne_sums$variable
  
  # for binary variables, we should group by category 
  # Thus, only run the analysis for sex.male rather than both sex.female AND sex.male which would be redundant
  # we will also remove `age_group.unknown` since their is only 1 patient here
  exclude <- c("readmitted.false", "sex.female", "sex.other", "survival.deceased", "severity.non-severe",
               "covid_discharged.discharged", "age_group.Unknown")
  vars <- vars[!vars %in% exclude]
  
  # create empty list for chi.square test results
  chi_result_list = list()
  
  # for each variable, we will run the chi.square test
  for(i in vars) {
    test = calc_chisq(i, tableOne_sums, none_n, pns_n, cns_n)
    X2 <- round(test$statistic, 4)
    p_value <- test$p.value
    df <- test$parameter
    Variable = paste(i)
    
    chi_results <- cbind(Variable, X2, p_value, df) %>% 
      data.frame() %>% 
      mutate(p_value = as.numeric(p_value))
    
    rownames(chi_results) <- NULL
    
    chi_result_list[[i]] <- chi_results
  }
  
  # save list of chi.square test results
  chisq_results <- bind_rows(chi_result_list)
  
  return(chisq_results)
  
  
  }
```

**Combined Adult & Pediatric Chi-Square Results**

```{r message=FALSE, warning=FALSE}
chisq_combine <- compute_chi_square(tableone = tableone_combine %>%
                                      filter(!`Table 1` == 'Discharged',
                                             !`Table 1` == 'Not Discharged'),
                                    demo_table = demo_table_combine %>%
                                      filter(!variable == 'covid_discharged.discharged',
                                             !variable == 'covid_discharged.not discharged'))

datatable(chisq_combine %>% arrange(p_value))
```

**Adult Chi-Square Results**

```{r message=FALSE, warning=FALSE}
chisq_adult <- compute_chi_square(tableone = tableone_adult %>%
                                      filter(!`Table 1` == 'Discharged',
                                             !`Table 1` == 'Not Discharged'),
                                    demo_table = demo_table_adult %>%
                                      filter(!variable == 'covid_discharged.discharged',
                                             !variable == 'covid_discharged.not discharged'))

datatable(chisq_adult %>% arrange(p_value))
```

**Pediatric Chi-Square Results**

```{r message=FALSE, warning=FALSE}
chisq_pediatric <- compute_chi_square(tableone = tableone_pediatric %>%
                                      filter(!`Table 1` == 'Discharged',
                                             !`Table 1` == 'Not Discharged'),
                                    demo_table = demo_table_pediatric %>%
                                      filter(!variable == 'covid_discharged.discharged',
                                             !variable == 'covid_discharged.not discharged'))

datatable(chisq_pediatric %>% arrange(p_value))
```

**2. Conduct non-parametric anova (Kruskal-Wallis) tests for continuous clinical variables**

```{r}
compute_kruskal <- function(clinical_table) {

  # create empty list to save processed variables
  processed_list_figs <- list()
  
  # create a list of clinical variables to further pre-process
  vars_to_process <- unique(clinical_table$name)
  
  # for each variable, we will remove the [min, max] as before
  for (i in vars_to_process) {
    mod_table <- clinical_table %>%
      rename("var" = name) %>%
      filter(var == i) %>%
      mutate(
        site = toupper(site),
        None = sub("(\\(.*|\\[.*)", "", None),
        Peripheral = sub("(\\(.*|\\[.*)", "", Peripheral),
        Central = sub("(\\(.*|\\[.*)", "", Central)
      ) %>%
      mutate(across(None:Central, as.numeric))
  
    processed_list_figs[[i]] <- mod_table
  }
  
  clinical_table_anova <- bind_rows(processed_list_figs)
  
  # format the continuous variables
  cont_vars = clinical_table_anova %>%
    mutate(
      # create a variable without mean/median prefix
      grouped_var = gsub("Mean |Median | \\[Min, Max\\]| \\(SD\\)", "", var)
    ) %>%
    pivot_longer(
      cols = None:Central,
      names_to = "type"
    ) %>%
    mutate(type = factor(type, levels = c("None", "Central", "Peripheral"))) %>% 
    # select variables to analyze
    filter(grouped_var %in% c("Elixhauser score",
                              "pre admission cns", 
                              "pre admission pns", 
                              "time to death", 
                              "time to first discharge", 
                              "time to severe",
                              "number of readmissions",
                              "time to first readmission")) %>% 
    distinct()
  
  # create function to conduct anova
  cont_var_results <- function(variable) {
    
    df <- cont_vars %>% 
    filter(grouped_var == paste(variable))
    
    # kruskal.test calculates the kruskal-Wallis H-statistic 
    # does not assume normality between groups
    # also called the one-way ANOVA on ranks
    test = kruskal.test(df$value ~ df$type)
    
  }
  
  # create empty list for Kruskal-Wallis results
  cont_vars_list = list()
  
  # create list of unique variables
  outcome_cont_vars = unique(cont_vars$grouped_var)
  
  for(i in outcome_cont_vars) {
    
    test = cont_var_results(i)
    X2 <- round(test$statistic, 4)
    p_value <- test$p.value
    df <- test$parameter
    Variable = paste(i)
    
    kw <- cbind(Variable, X2, p_value, df) %>% 
      data.frame() 
    
    rownames(kw) <- NULL
    
    cont_vars_list[[i]] <- kw
  
    
  }
  
  anova_results <- bind_rows(cont_vars_list)
  
  }
```

**Combined Adult & Pediatric Kruskal-Wallis Results**

```{r message=FALSE, warning=FALSE}

kw_combine <- compute_kruskal(clinical_table = clinical_table_combine)
datatable(kw_combine %>% arrange(p_value))
```

**Adult Kruskal-Wallis Results**

*Note: median CNS is 0 for all sites (reason for the NA values)*

```{r message=FALSE, warning=FALSE}

kw_adult <- compute_kruskal(clinical_table = clinical_table_adult)
datatable(kw_adult %>% arrange(p_value))
```

**Pediatric Kruskal-Wallis Results**

```{r message=FALSE, warning=FALSE}

kw_pediatric <- compute_kruskal(clinical_table = clinical_table_pediatric)
datatable(kw_pediatric %>% arrange(p_value))
```

**3. Combine chi-square and Kruskal-Wallis results**

We will evaluate significance when controlling for False Discovery Rate (FDR)

```{r}
tableone_stats <- function(chisq_results, anova_results, population) {

  demo_stats <- rbind(chisq_results %>% select(Variable, p_value),
                      anova_results %>% select(Variable, p_value))
  
  # adjust p-values with FDR
  demo_stats$adj_p_value <- p.adjust(demo_stats$p_value, method = "fdr")
  
  # round & format p-value 
  demo_stats <- demo_stats %>% 
    mutate(p_value = as.numeric(p_value),
           pvalue = if_else(p_value < 0.001, "< 0.001", paste(round(p_value, 3))),
           adj.p_value = if_else(adj_p_value < 0.001, "< 0.001", paste(round(adj_p_value, 3))))
  
  # tidy up the demographics stat table
  demo_stats_tidy <- demo_stats %>% 
    rename(`Unadjusted P-value (raw)` = p_value,
           `FDR Adjusted P-value (raw)` = adj_p_value,
           `Unadjusted P-value (tidy)` = pvalue,
           `FDR Adjusted P-value (tidy)` = adj.p_value) %>% 
    select(Variable, `Unadjusted P-value (raw)`, `FDR Adjusted P-value (raw)`, `Unadjusted P-value (tidy)`, `FDR Adjusted P-value (tidy)`)

  write.csv(demo_stats_tidy, paste0("tables/Table1_pvals_", population, ".csv"), row.names = FALSE)
  
  return(demo_stats_tidy)
  
}
```

**4. Incorporate Comorbidities**

We will also add the top 4 comorbidities for each population to our table one. For the combined adult and pediatric table, we will use the top 4 comorbidities of the adult population.

```{r}
# define matrix for each comorbidity
comorb_matrix <- function(comorbidity_table, comorbidity, population) {

    if(population=='Combined') {
      comorbidity_table = comorbidity_table
    } else if (population=="Adult") {
      comorbidity_table = comorbidity_table %>% 
        filter(population == "Adult")
    } else if (population=="Pediatric") {
      comorbidity_table = comorbidity_table %>% 
        filter(population == "Pediatric")
    } else {
      print('incorrect population specification')
    }
  
  mat_comorb <- comorbidity_table %>%
    filter(Comorbidity == paste(comorbidity)) %>%
    group_by(Comorbidity) %>% 
    mutate(None_comorb = sum(None_Total, na.rm = TRUE),
    CNS_comorb = sum(CNS_Total, na.rm = TRUE),
    PNS_comorb = sum(PNS_Total, na.rm = TRUE)) %>%
    ungroup() %>%
    distinct(None_comorb, CNS_comorb, PNS_comorb) %>%
    mutate(none_dif = neuro_pt_counts_sum$None_n - None_comorb,
    pns_dif = neuro_pt_counts_sum$PNS_n - PNS_comorb,
    cns_dif = neuro_pt_counts_sum$CNS_n - CNS_comorb) %>%
    data.frame() %>%
    #as.integer() %>%
    matrix(nrow = 2, ncol = 3, byrow = TRUE)

  comorb_results <- chisq.test(unlist(mat_comorb))
  
  results_df <- data.frame(Variable = paste(comorbidity),
                                X2 = round(comorb_results$statistic, 4),
                                p_value = comorb_results$p.value,
                                df = comorb_results$parameter)
  row.names(results_df) <- NULL
  
  return(results_df)
}

## combined comorbidities
comorbidity_combined_chi_list <- list()

for(i in top_comorb_adults) {
  
  comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Combined")
  comorbidity_combined_chi_list[[i]] <- comorb_chi_results
}

comorbidity_combined_chi <- comorbidity_combined_chi_list %>%
  bind_rows() 

chisq_combine_comorb <- chisq_combine %>%
  mutate(X2 = as.numeric(X2),
  df = as.numeric(df)) %>%
  bind_rows(comorbidity_combined_chi) 



## adult comorbidities
comorbidity_adult_chi_list <- list()

for(i in top_comorb_adults) {
  
  comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Adult")
  comorbidity_adult_chi_list[[i]] <- comorb_chi_results
}

comorbidity_adult_chi <- comorbidity_adult_chi_list %>%
  bind_rows() 

chisq_adult_comorb <- chisq_adult %>%
  mutate(X2 = as.numeric(X2),
  df = as.numeric(df)) %>%
  bind_rows(comorbidity_adult_chi) 

## pediatric comorbidities
comorbidity_pediatric_chi_list <- list()

for(i in top_comorb_pediatrics) {
  
  comorb_chi_results <- comorb_matrix(comorbidity_table = comorb_table_wide, i, population = "Pediatric")
  comorbidity_pediatric_chi_list[[i]] <- comorb_chi_results
}

comorbidity_pediatric_chi <- comorbidity_pediatric_chi_list %>%
  bind_rows() 

chisq_pediatric_comorb <- chisq_pediatric %>%
  mutate(X2 = as.numeric(X2),
  df = as.numeric(df)) %>%
  bind_rows(comorbidity_pediatric_chi) 
```

## Table One - Combined Analysis

```{r}
combine_tableone_stats <- tableone_stats(chisq_results = chisq_combine_comorb,
                                         anova_results = kw_combine,
                                         population = "combined") 
  

datatable(combine_tableone_stats)
```

## Table One - Adult Analysis

```{r}
adult_tableone_stats <- tableone_stats(chisq_results = chisq_adult_comorb,
                                         anova_results = kw_adult,
                                         population = "adult") 
  

datatable(adult_tableone_stats)
```

## Table One - Pediatric Analysis

```{r}
pediatric_tableone_stats <- tableone_stats(chisq_results = chisq_pediatric_comorb,
                                         anova_results = kw_pediatric,
                                         population = "pediatric") 
  

datatable(pediatric_tableone_stats)
```

# **Neurological Diagnosis Analysis**

Next, we will evaluate the diagnoses captured with the CNS and PNS groups. Specifically, we will examine the frequency of CNS and PNS diagnoses by patient age group and by clinical outcomes (mortality and COVID-19 severity).

**1. Format neurological icd code data**

```{r fig.width=12}
# import list of ICD-9/ICD-10 codes
neuro_icds_10 <-
  readxl::read_excel("public-data/2020-02-22_neuro-icd10_CNSvPNS.xlsx", sheet = 2) %>% 
  rename(
    "icd" = `ICD-10`,
    "pns_cns" = `Nervous system Involvement (1=central, 2=peripheral, 3=irrelevant)`,
    "type" = `ICD-10_type (1=first three alphanumeric code only, 2=digits after decimal point)`) %>%
  filter(type == 1) %>% 
  mutate(pns_cns = as.factor(pns_cns) %>% fct_recode(
    Central = "1",
    Peripheral = "2"
  )) %>%
  distinct(icd, `Neurological Disease Category`, pns_cns, `ICD-10 Description Revised`) %>%
  rename("description" = `ICD-10 Description Revised`) %>%
  mutate(concept_type = "icd-10")

neuro_icds_9 <- read.csv("public-data/icd9_tab_CNSvPNS.csv") %>%
  rename(
    "Neurological Disease Category" = "Neurological.Disease.Category",
    "pns_cns" = `Nervous.system.Involvement..1.central..2.peripheral.`,
    "icd_description" = `icd9_desc_revised`
  ) %>%
  mutate(
    pns_cns = as.factor(pns_cns) %>% fct_recode(
      Central = "1",
      Peripheral = "2"
    ),
    concept_type = "DIAG-ICD9"
  ) %>%
  select(icd, `Neurological Disease Category`, pns_cns, icd_description) %>%
  rename("description" = icd_description) %>%
  mutate(concept_type = "icd-9")

# bind list of ICD-9/ICD-10 codes
icds <- rbind(neuro_icds_10, neuro_icds_9)
```

**2. Calculate total number of neurological diagnoses for each adults & pediatrics**

```{r}
compute_neuro_counts <- function(sorted_sites, sample_size, is_pediatric = FALSE) {
  
    # create empty list to save processed diagnostic data
    diag_table_list <- list()
  
    if(is_pediatric==FALSE) {
      population = "adults"
      } else {
      population = "pediatrics"
      }
  
  # for each healthcare system, we will calculate the total number of patients with each ICD code
  for (i in sorted_sites) {
    tmp <- get(i)
    tmp_diag <- tmp[[c(
      "first_hosp_results",
      "icd_tables",
      paste0("icd_tables_", population),
      "demo_table"
    )]]
    tmp_diag$site <- tmp[["site"]]
    diag_table_list[[i]] <- tmp_diag %>%
      filter(variable == "sex.female" | variable == "sex.male" | variable == "sex.other") %>%
      select(site, contains("n_var"))
  }
  
  diag_table_wide <- bind_rows(diag_table_list)
  
  # we will count the total number and percent across healthcare systems
  diag_counts <- colSums(Filter(is.numeric, diag_table_wide), na.rm = TRUE) %>% 
    data.frame() %>% 
    mutate(perc = round(./sample_size * 100, 1))
  
  # format diagnostic table
  diag_counts <- tibble::rownames_to_column(diag_counts, "icd") %>%
    mutate(icd = gsub("n_var_", "", icd)) %>%
    rename(`Number of Patients` = ".") %>%
    full_join(., icds, by = "icd") %>%
    dplyr::arrange(desc(`Number of Patients`)) %>% 
    mutate(`Number of Patients (% of Cohort)` = paste(`Number of Patients`, "(", perc, ")", sep = " ")) %>% 
    select(pns_cns, `Neurological Disease Category`, concept_type, 
           icd, description, `Number of Patients (% of Cohort)`) %>% 
      rename(Code = "icd",
           `Nervous System` = "pns_cns",
           Description = "description",
           `ICD Version` = "concept_type") %>% 
    mutate(`Number of Patients (% of Cohort)` = if_else(`Number of Patients (% of Cohort)` == "NA ( NA )", "0 ( 0 )", `Number of Patients (% of Cohort)`),
           Code = if_else(Code == "NN", "No Neurological Disease (NNC)", Code))
  
  return(diag_counts)
  
  }

```

```{r message=FALSE, warning=FALSE}
# create vectors containing patient sample sizes
n_adult <- as.numeric(tableone_adult$N)[1]
n_pediatric <- as.numeric(tableone_pediatric$N)[1]

# calculate the total neurological patient counts 
neuro_counts_adult <- compute_neuro_counts(sorted_sites = adult_sites,
                                           sample_size = n_adult, 
                                           is_pediatric = FALSE)

neuro_counts_pediatric <- compute_neuro_counts(sorted_sites = pediatric_sites,
                                           sample_size = n_pediatric, 
                                           is_pediatric = TRUE)

neuro_counts_combined <- neuro_counts_adult %>% 
  rename(`Number of Adult Patients (% of Cohort)` = `Number of Patients (% of Cohort)`) %>% 
  left_join(., neuro_counts_pediatric %>% 
              select(`ICD Version`, Code, `Number of Patients (% of Cohort)`) %>% 
              rename("Number of Pediatric Patients (% of Cohort`)" = `Number of Patients (% of Cohort)`))

write.csv(neuro_counts_combined, paste0("tables/Supp.Table2_diagnoses", ".csv"), row.names = FALSE)
  
```

## Number (%) of Adult Patients with each Neurological Diagnosis

```{r}
datatable(neuro_counts_adult) 
```

## Number (%) of Pediatric Patients with each Neurological Diagnosis

```{r}
datatable(neuro_counts_pediatric) 
```

## Evaluate Diagnoses by Age & Outcome

```{r}
compute_stratified_neuro_counts <- function(tableone, sorted_sites, is_pediatric = FALSE) {
  
  # create empty list to save processed diagnostic data
    diag_table_list <- list()
  
    if(is_pediatric==FALSE) {
      population = "adults"
      } else {
      population = "pediatrics"
      }
  
  # for each healthcare system, we will calculate the total number of patients with each ICD code
  for (i in sorted_sites) {
    #print(i)
    tmp <- get(i)
    tmp_diag <- tmp[[c(
      "first_hosp_results",
      "icd_tables",
      paste0("icd_tables_", population),
      "demo_table"
    )]]
    tmp_diag$site <- tmp[["site"]]
    try(
    diag_table_list[[i]] <- tmp_diag %>%
      select(site, variable, starts_with("n_var")) %>% 
      pivot_longer(cols = starts_with("n_var"), names_to = "code", values_to = "freq") %>% 
      ungroup()
    )
  }

  diag_table_wide_all <- bind_rows(diag_table_list) %>% 
      group_by(variable, code) %>% 
    # total number of patients who had each code for each variable across sites
      mutate(total_n_var  = sum(freq, na.rm = TRUE)) %>% 
    distinct(variable, code, total_n_var) %>% 
    mutate(code = gsub("n_var_", "", code),
           variable = gsub("Severity.", "", variable),
           variable = gsub("Survival.", "", variable),
           variable = gsub("readmitted", "", variable),
           variable = gsub("age_group", "", variable),
           variable = gsub("covid_discharged", "", variable),
           variable = if_else(variable == ".TRUE", "Readmitted", variable)) %>% 
    left_join(., icds %>% rename(code = "icd"), by = "code") %>% 
    ungroup() %>% 
    filter(!concept_type == "icd-9")


  # refactor neuro status
  diag_table_wide_all$pns_cns <- factor(diag_table_wide_all$pns_cns,
                                        levels=c("Central", "Peripheral"),
                                        labels=c("CNS", "PNS"))
  
  
  return(diag_table_wide_all)
  
  }
```

```{r}
stratified_neuro_counts_adult <- compute_stratified_neuro_counts(sorted_sites = adult_sites,
                                                                 is_pediatric = FALSE,
                                                                 tableone = tableone_adult)

stratified_neuro_counts_pediatric <- compute_stratified_neuro_counts(sorted_sites = pediatric_sites,
                                                                 is_pediatric = TRUE,
                                                                 tableone = tableone_pediatric)
```

```{r}
compute_stratified_neuro_counts_age <- function(stratified_neuro_counts, tableone) {
  
  stratified_neuro_counts$variable <- factor(stratified_neuro_counts$variable, 
                                     levels=c(".00to02", ".03to05", ".06to11", ".12to17",
                                              ".18to25", ".26to49", ".50to69", ".70to79", ".80plus"), 
                                     labels=c("0-2 Years", "3-5 Years", "6-11 Years", "12-17 Years",
                                              "18-25 Years", "26-49 Years", "50-69 Years", "70-79 Years", "80+ Years"))
  
  # total counts
  n_0_2 <- get_table_n(tableone, var = "0-2")
  n_3_5 = get_table_n(tableone, var = "3-5")
  n_6_11 = get_table_n(tableone, var = "6-11")
  n_12_17 = get_table_n(tableone, var = "12-17")
  n_18_25 = get_table_n(tableone, var = "18-25")
  n_26_49 = get_table_n(tableone, var = "26-49")
  n_50_69 = get_table_n(tableone, var = "50-69")
  n_70_79 = get_table_n(tableone, var = "70-79")
  n_80 = get_table_n(tableone, var = "80+")
  
  age_df <- data.frame("variable" = c("0-2 Years",
                                    "3-5 Years", 
                                    "6-11 Years", 
                                    "12-17 Years",
                                    "18-25 Years", 
                                    "26-49 Years", 
                                    "50-69 Years", 
                                    "70-79 Years", 
                                    "80+ Years"),
                     "n_var" = c(n_0_2,
                               n_3_5,
                               n_6_11,
                               n_12_17,
                               n_18_25,
                               n_26_49,
                               n_50_69,
                               n_70_79,
                               n_80))

  
  stratified_neuro_counts <- stratified_neuro_counts %>% 
    filter(grepl("Years", variable),
                !code == "NN", 
                !total_n_var == 0) %>% 
         # total_n would be calculating total codes across each age group which is not true reflection of patient counts b/c patients can have more than one code
         #mutate(total_n = sum(total_n_var)) %>% 
         ungroup() %>% 
         left_join(., age_df, by = "variable") %>% 
         mutate(n_var = as.numeric(n_var),
                perc = as.character(round(total_n_var/n_var*100,1)),
                perc = if_else(perc < 0.1, '<0.1', perc),
                perc = paste0(perc, "%")) 
  
  return(stratified_neuro_counts)
  
  
}


```

```{r}
stratified_neuro_counts_age_adult <- compute_stratified_neuro_counts_age(stratified_neuro_counts = stratified_neuro_counts_adult,
                                                                         tableone = tableone_adult)

stratified_neuro_counts_age_pediatric <- compute_stratified_neuro_counts_age(stratified_neuro_counts = stratified_neuro_counts_pediatric,
                                                                 tableone = tableone_pediatric)
```

**Diagnoses by Adult and Pediatric Populations**

```{r fig.height=20, fig.width=15}
n_0_2 <- get_table_n(tableone_combine, var = "0-2")
#n_3_5 <- get_table_n(tableone_combine, var = "3-5")
n_6_11 <- get_table_n(tableone_combine, var = "6-11")
n_12_17 <- get_table_n(tableone_combine, var = "12-17")

total_ped_age_count = as.numeric(n_0_2) +  as.numeric(n_6_11) + as.numeric(n_12_17)

stratified_neuro_counts_age_pediatric_reformat = stratified_neuro_counts_age_pediatric %>%
  select(variable, pns_cns, description, n_var, total_n_var) %>%
  mutate(variable = "0-17 Years") %>%
  group_by(pns_cns, description) %>%
  mutate(total_n_var = sum(total_n_var, na.rm = TRUE)) %>%
  ungroup() %>%
  # n_var is the total per `age_group` stratification
  select(-n_var) %>%
  distinct() %>%
  mutate(perc = round(total_n_var/total_ped_age_count*100,1),
         perc = if_else(perc < 0.1, '<0.1', as.character(perc)),
         perc = paste0(perc, "%"))

stratified_neuro_counts_age_combined <- stratified_neuro_counts_age_adult %>%
  select(variable, pns_cns, description, total_n_var, perc) %>%
  rbind(., stratified_neuro_counts_age_pediatric_reformat %>%
          select(variable, pns_cns, description, total_n_var, perc))

stratified_neuro_counts_age_combined$variable <- factor(stratified_neuro_counts_age_combined$variable,
                                                                      levels=c("0-17 Years", "18-25 Years", "26-49 Years", "50-69 Years", "70-79 Years", "80+ Years"))
```

```{r fig.height=20, fig.width=12}
group.colors <- c(NNC = "gray", PNS = "slateblue", CNS ="tomato")

adult_ped_combined_plot <- ggplot(stratified_neuro_counts_age_combined,
                                                aes(x = total_n_var, y = reorder(description,
                                                                                 total_n_var), 
                                                    fill = pns_cns)) +
  geom_bar(stat = "identity") +
  geom_text(
    aes(x = total_n_var, y = description, label = perc),
    hjust = -0.1,
    #hjust="inward",
    size = 6,
    inherit.aes = TRUE) +
  scale_x_continuous(expand = expansion(mult = c(0,0.2))) + # to ensure percent labels are within the figure
  facet_grid(pns_cns ~ variable, scales = "free", space = "free_y") + 
  scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +   
  scale_fill_manual(values = group.colors) +
  xlab("Number of Patients") +
  theme(legend.position = "none",
        strip.text.y = element_text(size = 40),
        strip.text.x = element_text(size = 35),
        axis.text.y.left = element_text(size = 25, face = "bold"),
        axis.text.x = element_text(size = 25),
        axis.title.x = element_text(size = 35))

ggsave("figures/Fig3_diagnoses_by_age_adult_ped_18group.png", adult_ped_combined_plot, width = 40, height = 19, dpi = 300)
```

![](figures/Fig3_diagnoses_by_age_adult_ped_18group.png){width="1031" height="337"}

## Evaluate Diagnoses by Outcome

```{r}

stratified_neuro_counts_adult$population = "adult"
stratified_neuro_counts_pediatric$population = "pediatric"

stratified_neuro_counts_combine <- rbind(stratified_neuro_counts_adult, stratified_neuro_counts_pediatric)

## determine total number of patients who died or severe
n_death_adult = get_table_n(df = tableone_adult, var = "Deceased") %>% as.numeric()
n_severe_adult = get_table_n(df = tableone_adult, var = "Severe") %>% as.numeric()

n_death_pediatric = get_table_n(df = tableone_pediatric, var = "Deceased") %>% as.numeric()
n_severe_pediatric = get_table_n(df = tableone_pediatric, var = "Severe") %>% as.numeric()

stratified_neuro_counts_outcomes <- stratified_neuro_counts_combine %>% 
  filter(variable %in% c("Severe", "Deceased"),
         !code == "NN", 
         !total_n_var == 0) %>%          
         # total_n would be calculating total codes across each outcome group which is not true reflection of patient counts b/c patients can have more than one code
         #mutate(total_n = sum(total_n_var)) %>% 
         group_by(variable, description, population) %>% 
  mutate(total_n_var = sum(total_n_var)) %>% 
  ungroup() %>% 
  mutate(perc = case_when(variable == "Severe" & population == "adult" ~ round(total_n_var/n_severe_adult*100,1),
                                 variable == "Deceased" & population == "adult" ~ round(total_n_var/n_death_adult*100,1),
                                 variable == "Severe" & population == "pediatric" ~ round(total_n_var/n_severe_pediatric*100,1),
                                 variable == "Deceased" & population == "pediatric" ~ round(total_n_var/n_death_pediatric*100,1)),
         perc = if_else(perc < 0.1, '<0.1', as.character(perc)),
       perc = paste0(perc, "%")) %>% 
  select(-code, -concept_type) %>% 
  distinct()
```

```{r}
adult_outcomes = ggplot(stratified_neuro_counts_outcomes %>% 
         filter(population == "adult") %>% 
           mutate(variable = as.factor(variable) %>% 
                                          fct_recode(
                                            Mortality = "Deceased")),
       aes(x = total_n_var, y = reorder(description, total_n_var), fill = pns_cns)) +
  geom_bar(stat = "identity") +
 geom_text(
    aes(x = total_n_var, y = description, label = perc),
    hjust = -0.1,
    #hjust="inward",
    size = 5,
    inherit.aes = TRUE) +
  scale_x_continuous(expand = expansion(mult = c(0,0.2))) + 
  facet_grid(pns_cns ~ variable, scales = "free", space = "free_y") + 
  scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +   
  scale_fill_manual(values = group.colors) +
  xlab("Number of Patients") + 
  ggtitle("B. Adult") + 
  theme(legend.position = "none",
        plot.title = element_text(size = 44, face = "bold", hjust = -0.5), 
        strip.text.y = element_text(size = 35),
        strip.text.x = element_text(size = 30),
        axis.text.y.left = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(size = 15),
        axis.title.x = element_text(size = 35))

ggsave("figures/SuppFig1_diagnoses_by_outcomes_adult.png", adult_outcomes, width = 27, height = 15, dpi = 300)
```

```{r}
pediatric_outcomes = ggplot(stratified_neuro_counts_outcomes %>% 
         filter(population == "pediatric") %>% 
           mutate(variable = as.factor(variable) %>% 
                                          fct_recode(
                                            Mortality = "Deceased")),
       aes(x = total_n_var, y = reorder(description, total_n_var), fill = pns_cns)) +
  geom_bar(stat = "identity") +
 geom_text(
    aes(x = total_n_var, y = description, label = perc),
    hjust = -0.1,
    #hjust="inward",
    size = 5,
    inherit.aes = TRUE) +
  scale_x_continuous(expand = expansion(mult = c(0,0.2))) + 
  facet_grid(pns_cns ~ variable, scales = "free", space = "free_y") + 
  scale_y_discrete("", position="left", labels = function(x) str_wrap(x, width = 100)) +   
  scale_fill_manual(values = group.colors) +
  xlab("") + 
  ggtitle("A. Pediatric") + 
  theme(legend.position = "none",
        plot.title = element_text(size = 44, face = "bold", hjust = -0.5), 
        strip.text.y = element_text(size = 35),
        strip.text.x = element_text(size = 30),
        axis.text.y.left = element_text(size = 20, face = "bold"),
        axis.text.x = element_text(size = 15),
        axis.title.x = element_text(size = 35))

ggsave("figures/SuppFig1_diagnoses_by_outcomes_pediatric.png", pediatric_outcomes, width = 27, height = 9, dpi = 300)
```

```{r include=FALSE}
#grid.arrange(pediatric_outcomes, adult_outcomes, ncol=1)

ggsave("figures/SuppFig1_diagnoses_by_outcome_combined.png", grid.arrange(pediatric_outcomes, adult_outcomes, ncol=1), width = 27, height = 25, dpi = 300)
```

![](figures/SuppFig1_diagnoses_by_outcome_combined.png){.class width="800" height="800px"}

## Evaluate patients with "Both" CNS and PNS diseases

These patients were excluded from the primary analysis due to potential confounding.

```{r}
# create empty list to store the counts of "both" patients
both_counts_list <- list()

# for each healthcare system, retrieve the number of excluded "both" patients recorded
for (i in sorted_sites) {
  n_both <- results[[paste(i)]][["first_hosp_results"]][["both_counts"]]
  
  both_counts_list[[i]] <- n_both
}

both_counts <- bind_rows(both_counts_list) %>% t() %>% data.frame() %>% sum()

print(paste(both_counts, "(", round(both_counts/(as.numeric(tableone_combine$N[1])+both_counts)*100,2), "%)")) 
```

# **Comorbidity Analysis**

Next, we will evaluate the frequency of comorbidities in our population and the associated risk that comorbidities may incur for acquiring a CNS or PNS diagnosis during COVID-19 hospitalization.

**1. Create a table of counts for each comorbidity and stratified by neurological status.**

```{r}
comorbidity_table_adult <- comorb_table_wide %>%
  select(Comorbidity, population, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
  filter(population == "Adult") %>% 
  rename(N = "Comorb_Total",
         NNC = "NNC_N_%",
         CNS = "CNS_N_%",
         PNS = "PNS_N_%") %>% 
  mutate(N = paste0(N, " (", round(N/n_adult, 2)*100, "%)"))

comorbidity_table_pediatric <- comorb_table_wide %>%
  select(Comorbidity, population, Comorb_Total, `NNC_N_%`, `CNS_N_%`, `PNS_N_%`) %>%
  filter(population == "Pediatric") %>% 
  rename(N = "Comorb_Total",
         NNC = "NNC_N_%",
         CNS = "CNS_N_%",
         PNS = "PNS_N_%") %>% 
  mutate(N = paste0(N, " (", round(N/n_pediatric, 2)*100, "%)"))

supp_comorbidity_table <- rbind(comorbidity_table_adult, comorbidity_table_pediatric) %>% 
  arrange(Comorbidity)

write.csv(supp_comorbidity_table, "tables/SuppTable_Comorbidity_Counts.csv", row.names = FALSE)
```

**2. Create vector counts of patients with comorbidity data**

```{r}
neuro_pt_counts_sum_adult <- neuro_pt_counts %>% 
  filter(population == "Adult") %>% 
  select(-population)

neuro_pt_counts_sum_pediatric <- neuro_pt_counts %>% 
  filter(population == "Pediatric") %>% 
  select(-population)

none_n_adult = neuro_pt_counts_sum_adult$None_n
cns_n_adult = neuro_pt_counts_sum_adult$CNS_n
pns_n_adult = neuro_pt_counts_sum_adult$PNS_n

none_n_pediatric = neuro_pt_counts_sum_pediatric$None_n
cns_n_pediatric = neuro_pt_counts_sum_pediatric$CNS_n
pns_n_pediatric = neuro_pt_counts_sum_pediatric$PNS_n
```

**3. Calculate the relative risk (RR) of developing a CNS or PNS diagnosis for patients with each comorbidity**

```{r}
calculate_rr <- function(is_pediatric = FALSE) {
  
  if(is_pediatric == FALSE) {
    cns_n = cns_n_adult
    pns_n = pns_n_adult
    neuro_pt_counts_sum = neuro_pt_counts_sum_adult
    pop = "Adult"
    # create a list of comorbidities
    list_of_comorbs <- comorb_table_wide %>% 
      filter(population == paste(pop)) %>% 
      distinct(Comorbidity) 
    list_of_comorbs <- list_of_comorbs$Comorbidity
    } else  {
    cns_n = cns_n_pediatric
    pns_n = pns_n_pediatric
    neuro_pt_counts_sum = neuro_pt_counts_sum_pediatric
    pop = "Pediatric"
    # create a list of comorbidities
    list_of_comorbs <- comorb_table_wide %>% 
      filter(population == paste(pop)) %>% 
      distinct(Comorbidity) %>% 
      as.vector()
    list_of_comorbs <- list_of_comorbs$Comorbidity
  }
  
  
  # create empty list to save relative risk calculations
  rr_calcs <- list()
  
  # for each comorbidity, we will calculate the relative risk of having a CNS and PNS diagnosis
  for (i in list_of_comorbs) {
  
    # https://www.cdc.gov/csels/dsepd/ss1978/lesson3/section5.html
    # create matrix as:
    # exposed group - with outcome, without outcome
    # non exposed group - with outcome, without outcome
  
    # where variables ending with `_Total` are the total number of CNS, PNS, or NNC patients with the respective comorbidity.
    # variables ending with `_n` are the total numbers in the entire population
  
    # to calculate confidence intervals
  #https://sphweb.bumc.bu.edu/otlt/mph-modules/bs/bs704_confidence_intervals/bs704_confidence_intervals8.html
  
    rr_cns <- comorb_table_wide %>%
      filter(population == paste(pop),
             Comorbidity == paste(i)) %>%
      mutate(a = CNS_Total, # number CNS w/comorb
             b = Comorb_Total - CNS_Total, # number of comorb pts w/o CNS
             c = cns_n - CNS_Total, # number of CNS pts w/o comorb
             d = sum(neuro_pt_counts_sum) - Comorb_Total - c, # number of pts w/o comorb or CNS
             risk_cns = a/(a+b),
             risk_non_cns = c/(c+d),
             rr_CNS = risk_cns/risk_non_cns,
             # for confidence intervals
             ci_1 = (b/a)/(a+b), # (n1-x1)/n1 
             ci_2 = (d/c)/(c+d), # (n2-x2)/n2
             ci_root = sqrt(ci_1 + ci_2),
             ci_lower = log(rr_CNS) - (1.96*ci_root),
             ci_upper = log(rr_CNS) + (1.96*ci_root),
             l_ci_cns = exp(ci_lower),
             u_ci_cns = exp(ci_upper)) %>%
      select(Comorbidity, Comorb_Total, None_Total, CNS_Total, PNS_Total, rr_CNS, l_ci_cns, u_ci_cns)
  
      rr_pns <- comorb_table_wide %>%
      filter(population == paste(pop),
             Comorbidity == paste(i)) %>%
      mutate(a = PNS_Total, # number PNS w/comorb
             b = Comorb_Total - PNS_Total, # number of comorb pts w/o PNS
             c = pns_n - PNS_Total, # number of PNS pts w/o comorb
             d = sum(neuro_pt_counts_sum) - Comorb_Total - c, # number of pts w/o comorb or PNS
             risk_pns = a/(a+b),
             risk_non_pns = c/(c+d),
             rr_PNS = risk_pns/risk_non_pns,
             # for confidence intervals
             ci_1 = (b/a)/(a+b),
             ci_2 = (d/c)/(c+d),
             ci_root = sqrt(ci_1 + ci_2),
             ci_lower = log(rr_PNS) - (1.96*ci_root),
             ci_upper = log(rr_PNS) + (1.96*ci_root),
             l_ci_pns = exp(ci_lower),
             u_ci_pns = exp(ci_upper)) %>%
      select(Comorbidity, rr_PNS, l_ci_pns, u_ci_pns)
  
      rr <- rr_cns %>%
        left_join(., rr_pns, by = c("Comorbidity"))
  
    rr_calcs[[i]] <- rr
  
  }
  
  rr_results <- bind_rows(rr_calcs) %>%
      mutate(rr_CNS = round(rr_CNS, 2),
           l_ci_cns = round(l_ci_cns, 2),
           u_ci_cns = round(u_ci_cns, 2),
           rr_PNS = round(rr_PNS, 2),
           l_ci_pns = round(l_ci_pns, 2),
           u_ci_pns = round(u_ci_pns, 2),
           `RR CNS (95% CI)` = paste(rr_CNS, "(", l_ci_cns, ",", u_ci_cns, ")"),
           `RR PNS (95% CI)` = paste(rr_PNS, "(", l_ci_pns, ",", u_ci_pns, ")")) %>%
    select(Comorbidity, rr_CNS, l_ci_cns, u_ci_cns, `RR CNS (95% CI)`,
           rr_PNS, l_ci_pns, u_ci_pns, `RR PNS (95% CI)`)
  
  # tidy up data
  write.csv(rr_results %>%
              arrange(desc(rr_CNS)) %>%
              select(Comorbidity, `RR CNS (95% CI)`, `RR PNS (95% CI)`), 
            paste0("tables/Supp.Table4_RR_", pop, ".csv"), row.names = FALSE)
  
    # reformat data
  rr_results_tidy_rr <- rr_results %>%
    select(Comorbidity, rr_CNS, rr_PNS) %>%
    pivot_longer(cols = rr_CNS:rr_PNS, names_to = "Neuro Status", values_to = "RR") %>%
    mutate(`Neuro Status` = if_else(`Neuro Status` == "rr_CNS", "CNS", "PNS"))
  
  rr_results_tidy_l_ci <- rr_results %>%
    select(Comorbidity, l_ci_cns, l_ci_pns) %>%
    pivot_longer(cols = l_ci_cns:l_ci_pns, names_to = "Neuro Status", values_to = "l_CI") %>%
    mutate(`Neuro Status` = if_else(`Neuro Status` == "l_ci_cns", "CNS", "PNS"))
  
  rr_results_tidy_u_ci <- rr_results %>%
    select(Comorbidity, u_ci_cns, u_ci_pns) %>%
    pivot_longer(cols = u_ci_cns:u_ci_pns, names_to = "Neuro Status", values_to = "u_CI") %>%
    mutate(`Neuro Status` = if_else(`Neuro Status` == "u_ci_cns", "CNS", "PNS"))
  
  # combine all tidy results
  rr_results_tidy <- rr_results_tidy_rr %>%
    left_join(., rr_results_tidy_l_ci,
                               by = c("Comorbidity", "Neuro Status")) %>%
    left_join(., rr_results_tidy_u_ci,
                               by = c("Comorbidity", "Neuro Status"))
  
  # order by CNS diagnoses with higher RR
  cns_top_results <- rr_results_tidy %>%
    filter(`Neuro Status` == "CNS") %>%
    arrange(RR) %>%
    select(Comorbidity)
  
  rr_results_tidy$Comorbidity <-  ordered(rr_results_tidy$Comorbidity, levels = cns_top_results$Comorbidity)
  
  cns_rr_plot <- ggplot(rr_results_tidy %>%
           filter(`Neuro Status` == "CNS"),
         aes(x = RR, y = Comorbidity)) +
    geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
      geom_errorbarh(aes(xmax = u_CI, xmin = l_CI), size = 0.7, height = 0.2, color = "gray50") +
      geom_point(size = 1.5, color = "tomato") +
      scale_x_continuous(breaks = seq(0, 4, 1), labels = seq(0, 4, 1),
                         limits = c(0,4)) +
    ylab("") +
    xlab("Relative Risk") +
    ggtitle("CNS Patients") + 
    theme(axis.text.y = element_text(face = "bold"))
  
  # arrange by top PNS results
  pns_top_results <- rr_results_tidy %>%
    filter(`Neuro Status` == "PNS") %>%
    arrange(RR) %>%
    select(Comorbidity)
  
  rr_results_tidy$Comorbidity <-  ordered(rr_results_tidy$Comorbidity, levels = pns_top_results$Comorbidity)
  
  
  pns_rr_plot <- ggplot(rr_results_tidy %>%
           filter(`Neuro Status` == "PNS"),
         aes(x = RR, y = Comorbidity)) +
    geom_vline(aes(xintercept = 1), size = .25, linetype = "dashed") +
      geom_errorbarh(aes(xmax = u_CI, xmin = l_CI), size = 0.70, height = 0.2, color = "gray50") +
      geom_point(size = 1.5, color = "slateblue") +
      scale_x_continuous(breaks = seq(0, 1.5, 0.5), labels = seq(0, 1.5, 0.5),
                         limits = c(0,1.5)) +
    ylab("") +
    xlab("Relative Risk") +
    ggtitle("PNS Patients") + 
    theme(axis.text.y = element_text(face = "bold"))
  
  pns_rr_plot <- set_panel_size(pns_rr_plot,
                            width  = unit(7, "cm"),
                            height = unit(6.5, "in"))
  
  cns_rr_plot <- set_panel_size(cns_rr_plot,
                            width  = unit(7, "cm"),
                            height = unit(6.5, "in"))
  
  #grid.arrange(cns_rr_plot, pns_rr_plot, ncol=2)
  
  ggsave(paste0("figures/Figure4.relative_risk_", pop, ".png"), grid.arrange(cns_rr_plot, pns_rr_plot, ncol=2), width = 15, height = 8, dpi = 300)
  
  }
```

### Relative Risk (RR)

Here we plot the relative risk of acquiring a CNS or PNS diagnosis in adult patients who have each comorbidity of the ECI compared to those who do not.

We are not able to compute this analysis in pediatric patients, due to the overall low incidence of comorbidities and neurological illness during COVID-19 hospitalization.

```{r eval=TRUE, include=FALSE}
rr_adult <- calculate_rr(is_pediatric = FALSE)
#rr_pediatric <- calculate_rr(is_pediatric = TRUE)
```

![](figures/Figure4.relative_risk_Adult.png){.class width="827" height="452"}

### LPCA deviance explained

During the local analysis for each participating healthcare system, we computed the top 10 principal components using logistic principal component analysis (LPCA). LPCA was performed on the matrix of npatients x 29 ECI comorbidities. Across healthcare systems, 10 principal components explained at least 75% of the model deviance. 

This supplementary analysis was performed in order to determine the best way to adjust for each patient's comorbidity burden in our downstream survival models.

```{r message=FALSE, warning=FALSE}
compare_deviance <- function(sorted_sites, is_pediatric=FALSE) {
  
  pca_dev <- list()
  
  if(is_pediatric==FALSE) {
    population = "adults"
    sorted_sites = sorted_sites[!sorted_sites%in% c("GOSH_results", "BCH_results")]
    } else {
    population = "pediatrics"
    }

  # for each healthcare system, we will calculate the total number of patients with each ICD code
  for (i in sorted_sites) {
    print(i)
    tmp <- get(i)
    tmp_pca <- tmp[[c(
      "comorbidities",
      paste0("comorb_", population),
      "deviance_expl"
    )]] %>% 
      data.frame() %>% 
      rename(dev_expl = '.')
    tmp_pca$site <- tmp[["site"]]
    pca_dev[[i]] <- tmp_pca
    
  }
  
  pca_df <- bind_rows(pca_dev)
  
  return(pca_df)
  
}

pca_adult <- compare_deviance(sorted_sites = adult_sites, is_pediatric = FALSE)


pca_data <- bind_rows(pca_adult)


theme_set(theme_classic())

pca_plot <- ggplot(pca_data, 
       aes(x = fct_reorder(site, -dev_expl), y = dev_expl, group = 1)) +
  geom_point(size = 2.5) +
  geom_line() +
  xlab("Healthcare System") + 
  ylab("Proportion of Deviance explained") +  
  ylim(0,1) + 
  theme(axis.text.x=element_text(angle=90, hjust=1, face = "bold", size = 12),
        axis.text.y=element_text(face = "bold"), 
        axis.title.y = element_text(face = "bold", size = 15)); pca_plot

ggsave("figures/SuppFig_pca_deviance.png", pca_plot, height = 6, width = 6)
```
