library(dplyr)
library(tidyr)
library(readr)
library(ggplot2)
library(rcartocolor)
library(forcats)
library(purrr)
library(stringr)
library(DT)
library(cowplot)
load('data/processed-data.Rdata')
theme_set(theme_bw() + theme(legend.title = element_blank()))
source('utils.R')
<- c("France", "Germany", "Spain", "Italy", "Singapore", "USA" ) countries
Calculate percentages:
<- diag_icd_10 %>%
code_prevalence group_by(siteid, Country, time, icd, `Neurological Disease Category`, full_icd) %>%
# summarise(pats_time_icd_site = sum(num_patients_icd), .groups = 'drop') %>%
# left_join(demo_ana, by = 'siteid') %>%
mutate(percent_pats_site = num_patients_icd/num_patients_all,
siteid = as.factor(siteid)) %>%
ungroup() %>%
group_by(Country, time, icd, `Neurological Disease Category`, full_icd) %>%
mutate(percent_pats_country = sum(num_patients_icd)/all_pats_country) %>%
ungroup()
<- code_prevalence %>%
diff_code select(location = siteid, icd, `Neurological Disease Category`, time, percent_pats_site) %>%
pivot_wider(names_from = time, values_from = percent_pats_site,
id_cols = c(location, icd, `Neurological Disease Category`),
values_fill = list(percent_pats_site = 0)) %>%
mutate(percent_diff = `After admission` - `Before admission`,
loc_type = 'Site', location = as.factor(tolower(location)))
<- code_prevalence %>%
diff_code_country select(location = Country, icd, `Neurological Disease Category`, time, percent_pats_country) %>%
distinct() %>%
pivot_wider(names_from = time, values_from = percent_pats_country,
id_cols = c(location, icd, `Neurological Disease Category`),
values_fill = list(percent_pats_country = 0)) %>%
mutate(percent_diff = `After admission` - `Before admission`,
location = fct_relevel(location, countries),
loc_type = 'Country')
## Warning: Problem with `mutate()` input `location`.
## ℹ Unknown levels in `f`: Italy
## ℹ Input `location` is `fct_relevel(location, countries)`.
## Warning: Unknown levels in `f`: Italy
<- diff_code %>%
diff_code_loc bind_rows(diff_code_country)
<- diff_code_loc %>%
diff_heat ggplot(aes(y = fct_rev(icd), x = location, fill = percent_diff)) +
geom_tile() +
labs(y = NULL, x = NULL, fill = 'After - Before') +
scale_x_discrete() +
scale_fill_gradient2(#009392,#39b185,#9ccb86,#e9e29c,#eeb479,#e88471,#cf597e
low = '#798234',
high = '#cf597e',
labels = scales::percent_format(accuracy = 1)) +
facet_grid(rows = vars(`Neurological Disease Category`),
cols = vars(fct_rev(loc_type)), space = 'free', scales = 'free') +
heat_theme_bottom() +
theme(plot.margin = unit(c(1,0.2,0.2,3), "lines")) +
NULL
diff_heat
# ggsave('figs/icd_diff_heat_sitemap.png', diff_heat_site, height = 4, width = 8)
<- diff_code_loc %>%
before_code_prev ggplot(aes(y = icd, x = location, fill = `Before admission`)) +
geom_tile() +
labs(y = NULL, x = NULL, fill = 'Patient % per
site/country') +
#f6d2a9,#f5b78e,#f19c7c,#ea8171,#dd686c,#ca5268,#b13f64
scale_fill_gradient(
low = '#d1eeea', high = '#2a5674',
labels = scales::percent_format(accuracy = 1),
limits = c(0, max(diff_code_loc$`After admission`)),
guide = guide_legend(override.aes = list(fill = "white"))) +
scale_x_discrete() +
facet_grid(rows = vars(`Neurological Disease Category`),
cols = vars(fct_rev(loc_type)), space = 'free', scale = 'free') +
heat_theme_top() +
NULL
<- diff_code_loc %>%
after_code_prev ggplot(aes(y = icd, x = location, fill = `After admission`)) +
geom_tile() +
labs(y = NULL, x = NULL, fill = 'Patient % per
site/country') +
scale_x_discrete() +
scale_fill_gradient(low = '#d1eeea', high = '#2a5674',
labels = scales::percent_format(accuracy = 1),
limits = c(0, max(diff_code_loc$`After admission`))) +
facet_grid(rows = vars(`Neurological Disease Category`),
cols = vars(fct_rev(loc_type)), space = 'free', scale = 'free') +
heat_theme_bottom() +
theme(legend.title = element_text(hjust = 0)) +
NULL
<- cowplot::plot_grid(before_code_prev, after_code_prev, ncol = 1,
heats rel_heights = c(1, 1.15),
labels = c('A. Before admission', 'B. After admission'))
heats
ggsave('figs/icd_heatmap.png', heats, height = 10, width = 9)
ggsave('figs/tiffs/efig_1.tiff', heats, height = 10, width = 9, dpi = 300)
<- function(icdi){
my_t <- diff_code %>% filter(icd == icdi)
diff_icd data.frame(
icd = icdi,
t.test(diff_icd$percent_diff) %>%
::tidy()
broom
)
}<- unique(diff_code$icd) %>%
prevalence_stats lapply(my_t) %>%
bind_rows()
$p_value_holm <- p.adjust(prevalence_stats$p.value, 'holm')
prevalence_stats$p_value_bh <- p.adjust(prevalence_stats$p.value, 'BH')
prevalence_stats%>%
prevalence_stats filter(p_value_bh < 0.05)
## icd estimate statistic p.value parameter conf.low conf.high
## 1 R41 0.05765947 5.652728 1.705131e-06 38 0.03701006 0.07830888
## 2 G93 0.08116207 6.897838 4.471201e-08 36 0.05729890 0.10502525
## method alternative p_value_holm p_value_bh
## 1 One Sample t-test two.sided 3.239748e-05 1.705131e-05
## 2 One Sample t-test two.sided 8.942402e-07 8.942402e-07
= diff_code %>%
check filter(loc_type == 'Site', icd == 'G93') %>%
left_join(diag_icd_10 %>% select(siteid, Country) %>% distinct() %>% mutate(siteid = tolower(siteid)),
by = c('location' = 'siteid'))
%>% group_by(Country) %>%
check summarise(country_diff = mean(percent_diff), .groups = 'drop')
## # A tibble: 4 x 2
## Country country_diff
## <chr> <dbl>
## 1 France 0.00850
## 2 Germany 0.0161
## 3 Spain -0.00422
## 4 USA 0.0924
%>%
prevalence_stats arrange(desc(estimate)) %>%
mutate(ci = paste0('(', round(conf.low*100, 2), '%, ',
round(conf.high*100, 2), '%)')) %>%
select(- c(parameter, method, alternative, conf.low, conf.high)) %>%
datatable(rownames = FALSE, filter = 'top') %>%
formatRound(c('statistic'), 1) %>%
formatPercentage('estimate', 1) %>%
formatSignif(c('p.value', 'p_value_holm', 'p_value_bh'), 3) %>%
{.}
%>%
prevalence_stats write_csv('results/icd10_prevalence_stats.csv')
of the mean proportion of patients diagnosed with each ICD
<- qnorm(0.975)
alpha_threshold
<- code_prevalence %>%
ci_prevalence group_by(time, full_icd) %>%
add_count() %>%
summarise(
mean_prop = mean(percent_pats_site, na.rm = T),
sd_prob = sd(percent_pats_site, na.rm = T),
n = mean(n),
me_prop = alpha_threshold * sd_prob / sqrt(n)
%>%
) ungroup()
## `summarise()` regrouping output by 'time' (override with `.groups` argument)
<- diag_icd_10 %>%
icd_time mutate(time = fct_relevel(time, c('After admission', 'Before admission'))) %>%
group_by(full_icd, time) %>%
summarise(pats_icd_time = sum(num_patients_icd, na.rm = T), .groups = 'drop')
# sorted_icds <- icd_time %>%
# filter(time == 'After admission') %>%
# arrange(pats_icd_time) %>%
# pull(full_icd)
# sorted_icds <- code_prevalence %>%
# ungroup() %>%
# select(time, percent_pats_site, full_icd, siteid) %>%
# pivot_wider(names_from = time, values_from = percent_pats_site,
# values_fill = list(percent_pats_site = 0)) %>%
# group_by(full_icd) %>%
# summarise(after = mean(`After admission`, na.rm = T),
# before = mean(`Before admission`, na.rm = T),
# diff_prev = after - before,
# .groups = 'drop') %>%
# arrange(diff_prev) %>%
# pull(full_icd)
<- code_prevalence %>%
sorted_icds distinct(full_icd, icd, `Neurological Disease Category`) %>%
arrange(desc(`Neurological Disease Category`), desc(icd)) %>%
pull(full_icd)
<- icd_time %>%
total_icd ggplot(aes(x = pats_icd_time, y = fct_relevel(full_icd, sorted_icds), fill = time)) +
scale_fill_carto_d(palette = 4, guide = guide_legend(reverse = TRUE)) +
geom_col(position = 'dodge') +
theme_minimal() +
scale_x_reverse(expand = expansion(add = c(0,0))) +
scale_y_discrete(labels = NULL) +
theme(legend.position = c(0.3, 0.15),
panel.grid.minor = element_blank(),
legend.title = element_blank(),
legend.key.height = unit(4, 'mm'),
plot.margin = margin(t = 1, r = 0.5, l = 0.5, unit = 'lines')) +
labs(x = 'Total number of patients at all sites', y = NULL)
<-
percent_icd %>%
ci_prevalence ggplot(aes(group = time)) +
::geom_pointrangeh(
ggstanceaes(x = mean_prop,
y = fct_relevel(full_icd, sorted_icds),
xmin = mean_prop - me_prop,
xmax = mean_prop + me_prop,
color = time),
position = position_dodge(width = 0.3),
stroke = 0.1, fatten = 3, size = 0.7
+
) scale_color_carto_d(palette = 4, guide = NULL) +
theme_minimal() +
theme(legend.position = c(0.75, 0.1),
panel.grid.minor = element_blank(),
legend.title = element_blank(),
axis.text.y = element_text(hjust = 0.5),
plot.margin = margin(t = 1, unit = 'lines')) +
scale_x_continuous(expand = expansion(add = c(0, 0.03)),
labels = scales::percent_format(accuracy = 1)) +
labs(x = 'Proportion of patients each site', y = NULL)
<- cowplot::plot_grid(total_icd, percent_icd, ncol = 2,
icd_prevalence_plots rel_widths = c(1, 3))
icd_prevalence_plots
# ggsave('figs/icd_prevalence.png', icd_prevalence_plots, height = 5, width = 10)
plot_grid(diff_heat, icd_prevalence_plots, rel_heights = c(1, 0.85),
ncol = 1, labels = 'AUTO') %>%
ggsave('figs/icd_prevalence_AB.png', ., height = 8, width = 9.7)
plot_grid(diff_heat, icd_prevalence_plots, rel_heights = c(1, 0.85),
ncol = 1, labels = 'AUTO') %>%
ggsave('figs/tiffs/fig_3.tiff', ., height = 8, width = 9.7, dpi = 300)
%>%
diff_code filter(icd == 'R41') %>%
ungroup() %>%
count(percent_diff > 0)
## # A tibble: 2 x 2
## `percent_diff > 0` n
## <lgl> <int>
## 1 FALSE 9
## 2 TRUE 30
%>%
diff_code filter(icd == 'G93') %>%
ungroup() %>%
count(percent_diff > 0)
## # A tibble: 2 x 2
## `percent_diff > 0` n
## <lgl> <int>
## 1 FALSE 6
## 2 TRUE 31
%>%
diff_code filter(icd == 'R42') %>%
ungroup() %>%
count(percent_diff > 0)
## # A tibble: 2 x 2
## `percent_diff > 0` n
## <lgl> <int>
## 1 FALSE 28
## 2 TRUE 10
What ICD code has the most number severe patients? Sites with more severe patients?
<- diag_icd_10 %>%
severe_code # filter(siteid != 'SITE309') %>%
mutate(percent_severe_site = num_patients_ever_severe_icd1/num_patients_icd,
siteid = as.factor(siteid))
<- severe_code %>%
severe_bef_heat filter(time == 'Before admission') %>%
ggplot(aes(y = icd, x = siteid, fill = percent_severe_site)) +
geom_tile() +
labs(y = NULL, x = NULL, fill = 'Severe % per ICD') +
#3d5941,#778868,#b5b991,#f6edbd,#edbb8a,#de8a5a,#ca562c
scale_fill_gradient(
# low = '#f6d2a9', high = '#b13f64',
low = 'white', high = '#ca562c',
guide = guide_legend(override.aes = list(fill = "white")),
+
) scale_x_discrete() +
facet_grid(rows = vars(`Neurological Disease Category`), space = 'free', scale = 'free') +
heat_theme_top() +
NULL
<- severe_code %>%
severe_aft_heat filter(time == 'After admission') %>%
ggplot(aes(y = icd, x = siteid, fill = percent_severe_site)) +
geom_tile() +
labs(y = NULL, x = NULL, fill = 'Severe % per ICD') +
scale_x_discrete() +
#3d5941,#778868,#b5b991,#f6edbd,#edbb8a,#de8a5a,#ca562c
scale_fill_gradient(
low = 'white', high = '#ca562c',
labels = scales::percent_format(accuracy = 1)) +
facet_grid(rows = vars(`Neurological Disease Category`), space = 'free', scale = 'free') +
heat_theme_bottom() +
NULL
<- cowplot::plot_grid(severe_bef_heat, severe_aft_heat, ncol = 1,
severe_heats rel_heights = c(1, 1.15),
labels = c('A. Before admission', 'B. After admission'))
severe_heats
ggsave('figs/icd_severe_heatmap.png', severe_heats, height = 10, width = 7)
For each ICD code, the proportion of severe patients who were diagnosed with that ICD code is similar to the proportion of never-severe patients who were diagnosed with that same ICD code.
For the sake of simplicity in this notebook, we’re going to denote patients whose symptoms have been categorized as severe (based on respiratory status +/- requiring ICU) at least once as severe patients, and patients whose symptoms have NEVER been categorized as severe as non-severe patients.
For each ICD code, we computed the expected number of severe patients by multiplying the proportion of non-severe patients who were diagnosed with that code with the total number of severe patients. We performed an enrichment analysis to examine the difference of severe patients proportions across ICD codes. We calculated each ICD code’s enrichment by dividing the observed proportion of honorees by the expected proportion of honorees and reported a value of log2 enrichment (LOE) and its 95% confidence intervals. The 95% confidence interval of the LOE was estimated using the Poisson model method [@isbn:9780849394447].
<- diag_icd_10 %>%
contingency_continent mutate(continent = ifelse(Country == 'USA', 'US', 'NonUS')) %>%
group_by(full_icd, time, continent) %>%
summarise(across(contains('severe'), .fns = sum, na.rm = T), .groups = 'drop') %>%
mutate(Observed = num_patients_ever_severe_icd1,
num_non_severe = num_patients_never_severe_icd1 + num_patients_never_severe_icd0,
num_severe = num_patients_ever_severe_icd1 + num_patients_ever_severe_icd0,
Expected = num_patients_never_severe_icd1/num_non_severe*num_severe,
over_sev = Observed - Expected)
<- contingency_continent %>%
ind_fish filter(!is.na(Expected), num_patients_never_severe_icd0 >= 0) %>%
select(full_icd, time, continent,
num_patients_never_severe_icd0,
num_patients_never_severe_icd1,
num_patients_ever_severe_icd0,%>%
num_patients_ever_severe_icd1) group_by(full_icd, time, continent) %>%
nest() %>%
mutate(fish = map(data, my_fish)) %>%
::select(-data) %>%
dplyrunnest(cols = c(fish)) %>%
mutate(upper = ifelse(is.na(upper), Inf, upper)) %>%
mutate(lci = paste0('(', round(log2(lower), 1), ', ',
round(log2(upper), 1), ')'),
lestimate = log2(estimate))
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 8: full_icd = "Disturbances of smell and taste (R43)", time = "Before admission", continent = "US".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 15: full_icd = "Encephalitis, myelitis and encephalomyelitis (G04)", time = "Before admission", continent = "NonUS".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 16: full_icd = "Encephalitis, myelitis and encephalomyelitis (G04)", time = "Before admission", continent = "US".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 23: full_icd = "Inflammatory polyneuropathy (G61)", time = "Before admission", continent = "NonUS".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 24: full_icd = "Inflammatory polyneuropathy (G61)", time = "Before admission", continent = "US".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 25: full_icd = "Meningitis due to other and unspecified causes (G03)", time = "After admission", continent = "NonUS".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 27: full_icd = "Meningitis due to other and unspecified causes (G03)", time = "Before admission", continent = "NonUS".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 28: full_icd = "Meningitis due to other and unspecified causes (G03)", time = "Before admission", continent = "US".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 29: full_icd = "Myositis (M60)", time = "After admission", continent = "NonUS".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 31: full_icd = "Myositis (M60)", time = "Before admission", continent = "NonUS".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 47: full_icd = "Other and unspecified nontraumatic intracranial hemorrhage (I62)", time = "Before admission", continent = "NonUS".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ NaNs produced
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 59: full_icd = "Other headache syndromes (G44)", time = "Before admission", continent = "NonUS".
## Warning in sqrt(1/(a1 + 0.1) - 1/(n1) + 1/(a0 + 0.1) - 1/(n0)): NaNs produced
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 59: full_icd = "Other headache syndromes (G44)", time = "Before admission", continent = "NonUS".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 69: full_icd = "Sequelae of inflammatory and toxic polyneuropathies (G65)", time = "After admission", continent = "US".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 70: full_icd = "Sequelae of inflammatory and toxic polyneuropathies (G65)", time = "Before admission", continent = "US".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 77: full_icd = "Unspecified psychosis not due to a substance or known physiological condition (F29)", time = "Before admission", continent = "NonUS".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 79: full_icd = "Vascular syndromes of brain in cerebrovascular disease (G46)", time = "After admission", continent = "NonUS".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 80: full_icd = "Vascular syndromes of brain in cerebrovascular disease (G46)", time = "After admission", continent = "US".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 81: full_icd = "Vascular syndromes of brain in cerebrovascular disease (G46)", time = "Before admission", continent = "NonUS".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 82: full_icd = "Vascular syndromes of brain in cerebrovascular disease (G46)", time = "Before admission", continent = "US".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
$`P value (Holm)` <- p.adjust(ind_fish$p_value, 'holm')
ind_fish$`P value (FDR)` <- p.adjust(ind_fish$p_value, 'BH')
ind_fish
<- contingency_continent %>%
madata left_join(ind_fish, by = c('full_icd', 'continent', 'time')) %>%
filter(time == 'After admission') %>%
mutate(
distance_to_null = case_when(
> 1 ~ lower - 1,
lower TRUE ~ upper - 2
),presentation = case_when(
> 1 & `P value (FDR)` < 0.05 ~ '#d46780',
lower < 1 & `P value (FDR)` < 0.05 ~ '#798234',
upper TRUE ~ 'grey20'
),upper = ifelse(is.na(upper), Inf, upper))
<- madata %>%
plot_obs_exp mutate(lestimate = log2(estimate),
llower = log2(lower),
lupper = log2(upper)) %>%
select(full_icd, continent, lestimate, llower, lupper, presentation, over_sev, Observed, Expected) %>%
pivot_longer(- c(full_icd, continent, presentation, over_sev), names_to = 'type') %>%
mutate(subtype = ifelse(type == 'Expected' | type == 'Observed',
'Sqrt(number of honorees)',
'Log2 enrichment, 95% CI')) %>%
pivot_wider(names_from = type, values_from = value) %>%
mutate(presentation = as.factor(presentation),
full_icd = fct_relevel(full_icd, sorted_icds))
<- madata %>%
sorted_icds filter(time == 'After admission') %>%
group_by(full_icd) %>%
summarise(total_expected = sum(Expected), .groups = 'drop') %>%
arrange(total_expected) %>%
pull(full_icd)
<- plot_obs_exp %>% filter(subtype == 'Sqrt(number of honorees)')
plot_obs_exp_right <- plot_obs_exp %>% filter(subtype != 'Sqrt(number of honorees)')
plot_obs_exp_left
<- plot_enrich(
enrichment_plot_us %>% filter(continent == 'US'),
plot_obs_exp_left %>% filter(continent == 'US') %>%
plot_obs_exp_right slice(match(sorted_icds, full_icd)),
nudge = 2.5)
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
<- plot_enrich(
enrichment_plot_nonus %>% filter(continent == 'NonUS'),
plot_obs_exp_left %>% filter(continent == 'NonUS') %>% slice(match(sorted_icds, full_icd)),
plot_obs_exp_right nudge = 2.5)
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
<- cowplot::plot_grid(
enrichment_plot_cont
enrichment_plot_us,
enrichment_plot_nonus,ncol = 1, hjust = -0.05,
labels = c('A. US sites', 'B. Non-US sites')
)
ggsave('figs/icd_severe_after_us_nonus.png', enrichment_plot_cont, width = 9.5, height = 7)
ggsave('figs/tiffs/efig_2.tiff', enrichment_plot_cont, width = 9.5, height = 7, dpi = 300)
%>%
madata write_csv('results/icd10_fish_tab_us_nonus.csv')
<- diag_icd_10 %>%
contingency_df # filter(time == 'Before admission') %>%
select(- c(num_patients_all, num_patients_ever_severe, num_patients_never_severe)) %>%
group_by(full_icd, time) %>%
summarise(across(contains('severe'), .fns = sum, na.rm = T), .groups = 'drop') %>%
mutate(Observed = num_patients_ever_severe_icd1,
num_non_severe = num_patients_never_severe_icd1 + num_patients_never_severe_icd0,
num_severe = num_patients_ever_severe_icd1 + num_patients_ever_severe_icd0,
Expected = num_patients_never_severe_icd1/num_non_severe*num_severe,
over_sev = Observed - Expected)
<- contingency_df %>%
nested_obs_exp select(full_icd,
time,
num_patients_never_severe_icd0,
num_patients_never_severe_icd1,
num_patients_ever_severe_icd0, %>%
num_patients_ever_severe_icd1) group_by(full_icd, time) %>%
nest()
<- nested_obs_exp %>%
fish_obs_exp mutate(fish = map(data, my_fish)) %>%
::select(-data) %>%
dplyrunnest(cols = c(fish)) %>%
mutate(upper = ifelse(is.na(upper), Inf, upper)) %>%
mutate(lci = paste0('(', round(log2(lower), 1), ', ',
round(log2(upper), 1), ')'),
lestimate = log2(estimate))
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 8: full_icd = "Encephalitis, myelitis and encephalomyelitis (G04)", time = "Before admission".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 14: full_icd = "Meningitis due to other and unspecified causes (G03)", time = "Before admission".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 35: full_icd = "Sequelae of inflammatory and toxic polyneuropathies (G65)", time = "After admission".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
## Warning: Problem with `mutate()` input `fish`.
## ℹ Chi-squared approximation may be incorrect
## ℹ Input `fish` is `map(data, my_fish)`.
## ℹ The error occurred in group 36: full_icd = "Sequelae of inflammatory and toxic polyneuropathies (G65)", time = "Before admission".
## Warning in chisq.test(xx, correct = correction): Chi-squared approximation may
## be incorrect
$`P value (Holm)` <- p.adjust(fish_obs_exp$p_value, 'holm')
fish_obs_exp$`P value (FDR)` <- p.adjust(fish_obs_exp$p_value, 'BH')
fish_obs_exp%>%
fish_obs_exp filter(`P value (FDR)` < 0.05,
== 'After admission') time
## # A tibble: 13 x 10
## # Groups: full_icd, time [13]
## full_icd time estimate lower upper p_value lci lestimate `P value (Holm)`
## <chr> <chr> <dbl> <dbl> <dbl> <dbl> <chr> <dbl> <dbl>
## 1 Blindne… Afte… 0.771 0.669 0.889 5.33e- 5 (-0.… -0.375 1.71e- 3
## 2 Dizzine… Afte… 0.724 0.669 0.785 6.01e-20 (-0.… -0.465 2.34e-18
## 3 Encepha… Afte… 1.37 1.17 1.60 3.09e- 3 (0.2… 0.455 8.33e- 2
## 4 Nontrau… Afte… 1.36 1.23 1.51 6.81e- 6 (0.3… 0.446 2.38e- 4
## 5 Nontrau… Afte… 1.28 1.10 1.48 7.53e- 3 (0.1… 0.355 1.96e- 1
## 6 Other a… Afte… 1.72 1.67 1.77 4.21e-46 (0.7… 0.780 1.72e-44
## 7 Other a… Afte… 1.34 1.20 1.50 8.67e- 5 (0.3… 0.425 2.69e- 3
## 8 Other c… Afte… 1.24 1.13 1.35 4.75e- 5 (0.2… 0.307 1.57e- 3
## 9 Other d… Afte… 1.36 1.32 1.40 1.34e-74 (0.4… 0.442 5.63e-73
## 10 Other h… Afte… 0.531 0.409 0.690 1.34e- 9 (-1.… -0.913 4.97e- 8
## 11 Other s… Afte… 1.22 1.19 1.25 1.52e-40 (0.2… 0.283 6.08e-39
## 12 Transie… Afte… 0.554 0.444 0.693 6.01e-11 (-1.… -0.851 2.28e- 9
## 13 Unspeci… Afte… 0.559 0.434 0.720 1.29e- 8 (-1.… -0.839 4.64e- 7
## # … with 1 more variable: `P value (FDR)` <dbl>
While the Warning messages mentioned Chi-squared, the p-values were actually calculated using Fisher’s Exact test (see more in epitools::tab2by2.test()
).
Note: small number of observations for ICD-10 code G04, G03, G65:
%>%
contingency_df filter(grepl('G04|G03|G65', full_icd)) %>%
datatable()
The full table with all ICD codes and their corresponding enrichment can be browsed interactively below:
library(DT)
<- fish_obs_exp %>%
fish_tab left_join(contingency_df, by = c('full_icd', 'time')) %>%
select(full_icd, time, Observed, Expected, over_sev,
estimate, lestimate, lci, p_value,`P value (Holm)`, `P value (FDR)`) %>%
arrange(desc(over_sev)) %>%
rename('ICD' = 'full_icd',
'Observed - Expected' = 'over_sev',
'Enrichment' = 'estimate',
'Log2(enrichment)' = 'lestimate',
'95% Confidence interval' = 'lci',
'P value (raw)' = 'p_value')
%>%
fish_tab datatable(rownames = FALSE, filter = 'top') %>%
formatRound(c('Observed', 'Expected', 'Observed - Expected',
'Enrichment', 'Log2(enrichment)'), 1) %>%
formatSignif(c('P value (raw)', 'P value (Holm)', 'P value (FDR)'), 3) %>%
{.}
%>%
fish_tab write_csv('results/icd10_fish_tab.csv')
%>%
fish_obs_exp filter(`P value (FDR)` < 0.05) %>%
arrange(time) %>%
mutate(lestimate = round(lestimate, 2),
drr = round((estimate - 1)*100, 0),
lower_drr = (lower - 1)*100,
upper_drr = (upper - 1)*100,
ci_drr = paste0('(', round(lower_drr, 0), ', ',
round(upper_drr, 0), ')'),
p_fdr = format(`P value (FDR)`, digits = 2)) %>%
select(time, full_icd, lestimate, drr, ci_drr, p_fdr) %>%
write_csv('results/icd10_signi.csv')
A positive value of LOE indicates a higher proportion of severe patients with that ICD code compared to non-severe patients. A LOE value of 1 represents a one-fold enrichment (i.e., observed number of severe patients is twice as much as expected). We found an excess of severe patients with the following ICD codes:
<- contingency_df %>%
filtered_obs_exp left_join(fish_obs_exp, by = c('full_icd', 'time')) %>%
mutate(
distance_to_null = case_when(
> 1 ~ lower - 1,
lower TRUE ~ upper - 2
),presentation = case_when(
> 1 & `P value (FDR)` < 0.05 ~ '#d46780',
lower < 1 & `P value (FDR)` < 0.05 ~ '#798234',
upper TRUE ~ 'grey20'
%>%
)) {.}
# plot_obs_exp_before <- plot_obs_exp_left
# sorted_icds <- rev(as.character(filtered_obs_exp$full_icd))
# sorted_icds <- filtered_obs_exp %>%
# select(full_icd, time, estimate) %>%
# pivot_wider(names_from = time, values_from = estimate) %>%
# mutate(diff_est = log2(`After admission`) - log2(`Before admission`),
# diff_est = ifelse(is.na(diff_est), -Inf, diff_est),
# full_icd = as.character(full_icd)) %>%
# arrange(diff_est) %>%
# pull(full_icd)
<- filtered_obs_exp %>%
sorted_icds filter(time == 'After admission') %>%
arrange(Expected) %>%
pull(full_icd)
<- filtered_obs_exp %>%
plot_obs_exp mutate(lestimate = log2(estimate),
llower = log2(lower),
lupper = log2(upper)) %>%
select(full_icd, time, lestimate, llower, lupper, presentation, over_sev, Observed, Expected) %>%
pivot_longer(- c(full_icd, time, presentation, over_sev), names_to = 'type') %>%
mutate(subtype = ifelse(type == 'Expected' | type == 'Observed',
'Sqrt(number of honorees)',
'Log2 enrichment, 95% CI')) %>%
pivot_wider(names_from = type) %>%
mutate(presentation = as.factor(presentation),
full_icd = fct_relevel(full_icd, sorted_icds))
<- plot_obs_exp %>% filter(subtype == 'Sqrt(number of honorees)')
plot_obs_exp_right <- plot_obs_exp %>% filter(subtype != 'Sqrt(number of honorees)') plot_obs_exp_left
<- plot_enrich(
enrichment_plot_before %>% filter(time == 'Before admission'),
plot_obs_exp_left %>% filter(time == 'Before admission') %>%
plot_obs_exp_right slice(match(sorted_icds, full_icd)),
nudge = 2.5)
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
<- plot_enrich(
enrichment_plot_after %>% filter(time == 'After admission'),
plot_obs_exp_left %>% filter(time == 'After admission') %>% slice(match(sorted_icds, full_icd)),
plot_obs_exp_right nudge = 4)
## Warning: Vectorized input to `element_text()` is not officially supported.
## Results may be unexpected or may change in future versions of ggplot2.
<- cowplot::plot_grid(
enrichment_plot
enrichment_plot_before,
enrichment_plot_after,ncol = 1, hjust = -0.05,
labels = c('A. Before admission', 'B. After admission')
) enrichment_plot
ggsave('figs/icd_severe_after.png', enrichment_plot_after, width = 9.5, height = 3.5)
ggsave('figs/tiffs/fig_4.tiff', enrichment_plot_after, width = 9.5, height = 3.5, dpi = 300)
Figure caption: Each ICD code’s log2 enrichment (LOE) and its 95% confidence interval (left), and the absolute difference between observed (triangle) and expected (circle) number of severe patients (right) before admission. Positive value of LOE indicates a higher proportion of severe patients with that ICD code compared to non-severe patients. Neurological ICD codes are ordered based on the number of severe patients. Difference has been rounded.
<- plot_obs_exp %>%
plot_enrich_both ggplot(aes(y = fct_relevel(full_icd, sorted_icds))) +
geom_vline(aes(xintercept = 0), linetype = 2) +
::geom_pointrangeh(
ggstanceaes(x = lestimate,
xmin = llower,
xmax = lupper,
color = time),
position = position_dodge(width = 0.3),
stroke = 0.1, fatten = 3, size = 0.7
+
) labs(y = NULL, x = bquote(Log[2] ~ 'enrichment, 95% CI')) +
theme(
legend.position = 'bottom',
legend.background = element_blank(),
axis.title = element_text(size = 9),
plot.margin = margin(5.5, 2, 5.5, 5.5, unit = 'pt')
+
) scale_color_carto_d(palette = 4, direction = -1) +
NULL
plot_enrich_both
## Warning: Removed 42 rows containing missing values (geom_pointrangeh).
ggsave('figs/icd_severe.png', plot_enrich_both, width = 9, height = 5)
## Warning: Removed 42 rows containing missing values (geom_pointrangeh).