Mentions légales du service

Skip to content
Snippets Groups Projects
Commit 8f61373f authored by AndreaBnf's avatar AndreaBnf
Browse files

update: explo_ratios_trend

parent fc09c94a
No related tags found
No related merge requests found
# Explore data trend
## Methodology
on these plots we continue to explore the significant variables. we seek to simplify the reading of the boxplots by creating for each plot :
- A line representing the average increase in the value of patients in the non_resistant group.
The points at visit S and visit 2 are respectively the mean of the values at visit 1 and visit 2 (only non-resistant patients with a positive ratio).
- A line showing the average decrease in the value of patients in the non_resistant group.
The points at Visit S and Visit 2 are respectively the mean values at Visit 1 and Visit 2 (only non-resistant patients with a negative ratio).
- A line showing the average increase in the value of patients in the primary_resistant group.
The points at Visit S and Visit 2 are respectively the mean of the values at Visit 1 and Visit 2 (only primary_resistant patients with a positive ratio).
- A line showing the average decrease in the value of patients in the primary_resistant group.
The points at Visit S and Visit 2 are respectively the average of the values at Visit 1 and Visit 2 (only primary_resistant patients with a negative ratio).
## Connected boxplots
### Very significant (t-test, $p < 0.01$):
```{r connected_boxplots_trends very signif, cache=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
#display complete subjid in my init work dataframe
df_boxplots = .df_bm_original %>%
select(SUBJID, any_of(dictionary_ratios$CODE_from)) %>%
rename_all(~ c("SUBJID", dictionary_ratios %>% pull(CODE) %>% as.character())) %>%
mutate(VISIT = from) %>%
rbind(
.df_bm_original %>%
select(SUBJID, any_of(dictionary_ratios$CODE_to)) %>%
rename_all(~ c("SUBJID", dictionary_ratios %>% pull(CODE) %>% as.character())) %>%
mutate(VISIT = to)
)
# creation work dataframe very significant
df_boxplots_trend_very_signif <- df_boxplots %>%
select(any_of(c('SUBJID',dictionary_very_signif$CODE,'VISIT')))%>%
rename_with(~paste0(., "_values"), c(-VISIT, -SUBJID))
# keep ratio of very significant variables
df_ratio_trend_very_signif <- ratios$df %>%
select(any_of(c('SUBJID', dictionary_very_signif$CODE)))
# merge SUIBJID, VISIT, VS_to_V2, Ratios
.df_resistance = subset(.df_outcome, select=c(SUBJID, PRIM_RES))
df_boxplots_trend_very_signif2= merge(df_boxplots_trend_very_signif, .df_resistance, by = "SUBJID")
df_boxplots_trend_very_signif3= merge(df_boxplots_trend_very_signif2, df_ratio_trend_very_signif, by = "SUBJID")
```
```{r create data very signif}
variables_values=c(colnames(df_boxplots_trend_very_signif2))
variables_values=variables_values[variables_values!= "SUBJID" & variables_values!= "VISIT" & variables_values!= "PRIM_RES"]
variables_values = lapply(variables_values, sym)
variables_ratio=c(colnames(df_ratio_trend_very_signif))
variables_ratio=variables_ratio[variables_ratio!= "SUBJID" & variables_ratio!= "VISIT" & variables_ratio!= "PRIM_RES"]
variables_ratio = lapply(variables_ratio, sym)
```
```{r Prepare trend data median, results = "asis", cache=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
compute_median_data = function(df, x_var, y_var) {
median_data = df %>%
group_by(PRIM_RES, {{ x_var }} > 0, VISIT) %>%
summarise(median = median({{ y_var }}))
median_data$PRIM_RES <- factor(median_data$PRIM_RES, levels = c(0, 1), labels = c("non resistant", "primary resistant"))
return(median_data)
}
ponderation = function(df, x_var) {
count_data <- df %>%
group_by(PRIM_RES, {{ x_var }} > 0, VISIT) %>%
summarise(patient_count = n())
return(count_data)
}
plot_median_data2 = function(tab_median,tab_ponderation,ratio_label) {
label_legend = ratio_label
label_legend = dictionary_ratios$LABEL[dictionary_ratios$CODE == ratio_label]
tab_median = na.omit(tab_median)
tab_ponderation = na.omit(tab_ponderation)
tab_median$index = c(rep(1:(nrow(tab_median)/2), each = 2))
median_fig = ggplot(tab_median, aes(x = factor(VISIT, levels = c("VS", "V2")), y = median, color = PRIM_RES, group = tab_median[[2]])) +
geom_point()+
geom_line(aes(group = tab_median$index ), size = (tab_ponderation$patient_count/4))+
geom_text(aes(label = paste("nb:", tab_ponderation$patient_count), y = median), hjust = -0.2, vjust = 0) +
labs(title = label_legend,
x = "Visite",
color = "Resistance") +
theme_minimal()
return(median_fig)
}
```
```{r Prepare trend data mean, results = "asis", cache=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
compute_mean_data = function(df, x_var, y_var) {
mean_data = df %>%
group_by(PRIM_RES, {{ x_var }} > 0, VISIT) %>%
summarise(mean = mean({{ y_var }}))
mean_data$PRIM_RES <- factor(mean_data$PRIM_RES, levels = c(0, 1), labels = c("non resistant", "primary resistant"))
return(mean_data)
}
ponderation = function(df, x_var) {
count_data <- df %>%
group_by(PRIM_RES, {{ x_var }} > 0, VISIT) %>%
summarise(patient_count = n())
return(count_data)
}
plot_mean_data2 = function(tab_mean,tab_ponderation,ratio_label) {
label_legend = ratio_label
label_legend = dictionary_ratios$LABEL[dictionary_ratios$CODE == ratio_label]
tab_mean = na.omit(tab_mean)
tab_ponderation = na.omit(tab_ponderation)
tab_mean$index = c(rep(1:(nrow(tab_mean)/2), each = 2))
mean_fig = ggplot(tab_mean, aes(x = factor(VISIT, levels = c("VS", "V2")), y = mean, color = PRIM_RES, group = tab_mean[[2]])) +
geom_point()+
geom_line(aes(group = tab_mean$index ), size = (tab_ponderation$patient_count/4))+
geom_text(aes(label = paste("nb:", tab_ponderation$patient_count), y = mean), hjust = -0.2, vjust = 0) +
labs(title = label_legend,
x = "Visite",
color = "Resistance") +
theme_minimal()
return(mean_fig)
}
```
#### Median trend
```{r plot trend data median very signif, results = "asis", cache=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
tab_median = pmap(list(
rep(list(df_boxplots_trend_very_signif3), length(variables_values)),
variables_ratio,
variables_values),
compute_median_data)
tab_ponderation = pmap(list(
rep(list(df_boxplots_trend_very_signif3), length(variables_values)),
variables_ratio),
ponderation)
plot_median = pmap(list(
tab_median, tab_ponderation,variables_ratio), plot_median_data2)
#plot_median
compoEDA::eda_multiplot(
plot_median,
ncol = 4,
text_size = 4
)
```
#### Mean trend
```{r plot trend data mean very signif, results = "asis", cache=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
tab_mean = pmap(list(
rep(list(df_boxplots_trend_very_signif3), length(variables_values)),
variables_ratio,
variables_values),
compute_mean_data)
tab_ponderation = pmap(list(
rep(list(df_boxplots_trend_very_signif3), length(variables_values)),
variables_ratio),
ponderation)
plot_mean = pmap(list(
tab_mean, tab_ponderation,variables_ratio), plot_mean_data2)
#plot_median
compoEDA::eda_multiplot(
plot_mean,
ncol = 4,
text_size = 4
)
```
### Significant (t-test, $0.01 \leq p < 0.05$):
```{r connected_boxplots_trends, cache=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
# creation work dataframe significant
df_boxplots_trend <- df_boxplots %>%
select(any_of(c('SUBJID',dictionary_signif$CODE,'VISIT')))%>%
rename_with(~paste0(., "_values"), c(-VISIT, -SUBJID))
# keep ratio of significant variables
df_ratio_trend <- ratios$df %>%
select(any_of(c('SUBJID', dictionary_signif$CODE)))
# merge SUIBJID, VISIT, VS_to_V2, Ratios
.df_resistance = subset(.df_outcome, select=c(SUBJID, PRIM_RES))
df_boxplots_trend2= merge(df_boxplots_trend, .df_resistance, by = "SUBJID")
df_boxplots_trend3= merge(df_boxplots_trend2, df_ratio_trend, by = "SUBJID")
```
```{r create data}
variables_values=c(colnames(df_boxplots_trend2))
variables_values=variables_values[variables_values!= "SUBJID" & variables_values!= "VISIT" & variables_values!= "PRIM_RES"]
variables_values = lapply(variables_values, sym)
variables_ratio=c(colnames(df_ratio_trend))
variables_ratio=variables_ratio[variables_ratio!= "SUBJID" & variables_ratio!= "VISIT" & variables_ratio!= "PRIM_RES"]
variables_ratio =lapply(variables_ratio, sym)
```
#### Median trend
```{r plot trend data median, results = "asis", cache=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
tab_median = pmap(list(
rep(list(df_boxplots_trend3), length(variables_values)),
variables_ratio,
variables_values),
compute_median_data)
tab_ponderation = pmap(list(
rep(list(df_boxplots_trend3), length(variables_values)),
variables_ratio),
ponderation)
plot_median = pmap(list(
tab_median, tab_ponderation,variables_ratio), plot_median_data2)
#plot_median
compoEDA::eda_multiplot(
plot_median,
ncol = 4,
text_size = 4
)
```
#### Mean trend
```{r plot trend data mean, results = "asis", cache=FALSE, echo=FALSE, message=FALSE, warning=FALSE}
tab_mean = pmap(list(
rep(list(df_boxplots_trend3), length(variables_values)),
variables_ratio,
variables_values),
compute_mean_data)
tab_ponderation = pmap(list(
rep(list(df_boxplots_trend3), length(variables_values)),
variables_ratio),
ponderation)
plot_mean = pmap(list(
tab_mean, tab_ponderation,variables_ratio), plot_mean_data2)
#plot_median
compoEDA::eda_multiplot(
plot_mean,
ncol = 4,
text_size = 4
)
```
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment