::p_load(ggdist, ggridges, ggthemes,
pacman
colorspace, tidyverse, ggstatsplot,
tidyverse, plotly, crosstalk, DT, ggdist, gganimate, FunnelPlotR, knitr)
Hands-on_Ex04
Getting Start
Installing and loading the packages
Importing data
<- read_csv("data04/Exam_data_03.csv")
exam
<- read_csv("data04/COVID-19_DKI_Jakarta.csv") %>%
covid19 mutate_if(is.character, as.factor)
Visualising Distribution
Visualising Distribution with Ridgeline Plot
Plotting ridgeline graph: ggridges method
ggplot(exam,
aes(x = ENGLISH,
y = CLASS)) +
geom_density_ridges(
scale = 3,
rel_min_height = 0.01,
bandwidth = 3.4,
fill = lighten("#7097BB", .3),
color = "white"
+
) scale_x_continuous(
name = "English grades",
expand = c(0, 0)
+
) scale_y_discrete(name = NULL, expand = expansion(add = c(0.2, 2.6))) +
theme_ridges()
Varying fill colors along the x axis
ggplot(exam,
aes(x = ENGLISH,
y = CLASS,
fill = stat(x))) +
geom_density_ridges_gradient(
scale = 3,
rel_min_height = 0.01) +
scale_fill_viridis_c(name = "Temp. [F]",
option = "C") +
scale_x_continuous(
name = "English grades",
expand = c(0, 0)
+
) scale_y_discrete(name = NULL, expand = expansion(add = c(0.2, 2.6))) +
theme_ridges()
Mapping the probabilities directly onto colour
ggplot(exam,
aes(x = ENGLISH,
y = CLASS,
fill = 0.5 - abs(0.5-stat(ecdf)))) +
stat_density_ridges(geom = "density_ridges_gradient",
calc_ecdf = TRUE) +
scale_fill_viridis_c(name = "Tail probability",
direction = -1) +
theme_ridges()
Ridgeline plots with quantile lines
ggplot(exam,
aes(x = ENGLISH,
y = CLASS,
fill = factor(stat(quantile))
+
)) stat_density_ridges(
geom = "density_ridges_gradient",
calc_ecdf = TRUE,
quantiles = 4,
quantile_lines = TRUE) +
scale_fill_viridis_d(name = "Quartiles") +
theme_ridges()
ggplot(exam,
aes(x = ENGLISH,
y = CLASS,
fill = factor(stat(quantile))
+
)) stat_density_ridges(
geom = "density_ridges_gradient",
calc_ecdf = TRUE,
quantiles = c(0.025, 0.975)
+
) scale_fill_manual(
name = "Probability",
values = c("#FF0000A0", "#A0A0A0A0", "#0000FFA0"),
labels = c("(0, 0.025]", "(0.025, 0.975]", "(0.975, 1]")
+
) theme_ridges()
Visualising Distribution with Raincloud Plot
Plotting a Half Eye graph
ggplot(exam,
aes(x = RACE,
y = ENGLISH)) +
stat_halfeye(adjust = 0.5,
justification = -0.2,
.width = 0,
point_colour = NA)
Adding the boxplot with geom_boxplot()
ggplot(exam,
aes(x = RACE,
y = ENGLISH)) +
stat_halfeye(adjust = 0.5,
justification = -0.2,
.width = 0,
point_colour = NA) +
geom_boxplot(width = .20,
outlier.shape = NA)
Adding the Dot Plots with stat_dots()
ggplot(exam,
aes(x = RACE,
y = ENGLISH)) +
stat_halfeye(adjust = 0.5,
justification = -0.2,
.width = 0,
point_colour = NA) +
geom_boxplot(width = .20,
outlier.shape = NA) +
stat_dots(side = "left",
justification = 1.2,
binwidth = .5,
dotsize = 2)
Finishing touch
ggplot(exam,
aes(x = RACE,
y = ENGLISH)) +
stat_halfeye(adjust = 0.5,
justification = -0.2,
.width = 0,
point_colour = NA) +
geom_boxplot(width = .20,
outlier.shape = NA) +
stat_dots(side = "left",
justification = 1.2,
binwidth = .5,
dotsize = 1.5) +
coord_flip() +
theme_economist()
Visual Statistical Analysis
Statistical Tests
One-sample test: gghistostats() method
set.seed(1234)
gghistostats(
data = exam,
x = ENGLISH,
type = "bayes",
test.value = 60,
xlab = "English scores"
)
Two-sample mean test: ggbetweenstats()
ggbetweenstats(
data = exam,
x = GENDER,
y = MATHS,
type = "np",
messages = FALSE
)
Oneway ANOVA Test: ggbetweenstats() method
ggbetweenstats(
data = exam,
x = RACE,
y = ENGLISH,
type = "p",
mean.ci = TRUE,
pairwise.comparisons = TRUE,
pairwise.display = "s",
p.adjust.method = "fdr",
messages = FALSE
)
Significant Test of Association (Depedence) : ggbarstats() methods
<- exam %>%
exam1 mutate(MATHS_bins =
cut(MATHS,
breaks = c(0,60,75,85,100))
)
ggbarstats(exam1,
x = MATHS_bins,
y = GENDER)
Visualising Uncertainty
Visualizing the uncertainty of point estimates: ggplot2 methods
Preparation
<- exam %>%
my_sum group_by(RACE) %>%
summarise(
n=n(),
mean=mean(MATHS),
sd=sd(MATHS)
%>%
) mutate(se=sd/sqrt(n-1))
::kable(head(my_sum), format = 'html') knitr
RACE | n | mean | sd | se |
---|---|---|---|---|
Chinese | 193 | 76.50777 | 15.69040 | 1.132357 |
Indian | 12 | 60.66667 | 23.35237 | 7.041005 |
Malay | 108 | 57.44444 | 21.13478 | 2.043177 |
Others | 9 | 69.66667 | 10.72381 | 3.791438 |
Plotting standard error bars of point estimates
ggplot(my_sum) +
geom_errorbar(
aes(x=RACE,
ymin=mean-se,
ymax=mean+se),
width=0.2,
colour="black",
alpha=0.9,
linewidth=0.5) +
geom_point(aes
x=RACE,
(y=mean),
stat="identity",
color="red",
size = 1.5,
alpha=1) +
ggtitle("Standard error of mean maths score by rac")
Plotting confidence interval of point estimates
ggplot(my_sum) +
geom_errorbar(
aes(x=reorder(RACE, -mean),
ymin=mean-1.96*se,
ymax=mean+1.96*se),
width=0.2,
colour="black",
alpha=0.9,
linewidth=0.5) +
geom_point(aes
x=RACE,
(y=mean),
stat="identity",
color="red",
size = 1.5,
alpha=1) +
labs(x = "Maths score",
title = "95% confidence interval of mean maths score by race")
Visualizing the uncertainty of point estimates with interactive error bars
= SharedData$new(my_sum)
shared_df
bscols(widths = c(5,7),
ggplotly(
ggplot(shared_df) +
geom_errorbar(aes(
x = reorder(RACE, -mean),
ymin = mean - 2.58 * se,
ymax = mean + 2.58 * se),
linewidth = 0.5, colour = "black", alpha = 0.9) +
geom_point(aes(
x = RACE,
y = mean,
text = paste("Race:", RACE,
"<br>N:", n,
"<br>Avg. Scores:", round(mean, 2),
"<br>99% CI:[", round((mean - 2.58 * se), 2), ",",
round((mean + 2.58 * se), 2), "]")),
stat = "identity",
color = "red", size = 1.5, alpha = 1) +
xlab("Race") +
ylab("Average Scores") +
theme_minimal() +
theme(axis.text.x = element_text(angle = 45, vjust = 0.5, hjust = 1)) +
ggtitle("99% Confidence interval of average\nmaths scores by race"),
tooltip = "text"
),::datatable(shared_df,
DTrownames = FALSE,
class = "compact",
width = "100%",
extensions = 'Buttons',
options = list(
dom = 'Blfrtip',
pageLength = 10,
scrollX = TRUE
),colnames = c("No. of pupils", "Avg Scores", "Std Dev", "Std Error")) %>%
formatRound(columns = c('mean', 'sd', 'se'), digits = 2)
)
Visualising Uncertainty: ggdist package
Visualizing the uncertainty of point estimates: ggdist methods
%>%
exam ggplot(aes(x = RACE,
y = MATHS)) +
stat_pointinterval() +
labs(
title = "Visualising confidence intervals of mean math score",
subtitle = "Mean Point + Multiple-interval plot")
%>%
exam ggplot(aes(x = RACE, y = MATHS)) +
stat_pointinterval(.width = 0.95,
.point = median,
.interval = qi) +
labs(
title = "Visualising confidence intervals of median math score",
subtitle = "Median Point + Multiple-interval plot")
Visualizing the uncertainty of point estimates: ggdist methods
%>%
exam ggplot(aes(x = RACE,
y = MATHS)) +
stat_pointinterval(
show.legend = FALSE) +
labs(
title = "Visualising confidence intervals of mean math score",
subtitle = "Mean Point + Multiple-interval plot")
Visualizing the uncertainty of point estimates: ggdist methods
%>%
exam ggplot(aes(x = RACE,
y = MATHS)) +
stat_gradientinterval(
fill = "skyblue",
show.legend = TRUE
+
) labs(
title = "Visualising confidence intervals of mean math score",
subtitle = "Gradient + interval plot")
Visualising Uncertainty with Hypothetical Outcome Plots (HOPs)
Installing ungeviz package
::install_github("wilkelab/ungeviz") devtools
library(ungeviz)
ggplot(data = exam,
aes(x = factor(RACE),
(y = MATHS))) +
geom_point(position = position_jitter(
height = 0.3,
width = 0.05),
size = 0.4,
color = "#0072B2",
alpha = 1/2) +
geom_hpline(data = sampler(25,
group = RACE),
height = 0.6,
color = "#D55E00") +
theme_bw() +
transition_states(.draw, 1, 3)
Funnel Plots for Fair Comparisons
FunnelPlotR methods
FunnelPlotR methods: The basic plot
funnel_plot(
.data = covid19,
numerator = Positive,
denominator = Death,
group = `Sub-district`
)
A funnel plot object with 267 points of which 0 are outliers.
Plot is adjusted for overdispersion.
FunnelPlotR methods: Makeover 1
funnel_plot(
.data = covid19,
numerator = Death,
denominator = Positive,
group = `Sub-district`,
data_type = "PR", #<<
xrange = c(0, 6500), #<<
yrange = c(0, 0.05) #<<
)
A funnel plot object with 267 points of which 7 are outliers.
Plot is adjusted for overdispersion.
FunnelPlotR methods: Makeover 2
funnel_plot(
.data = covid19,
numerator = Death,
denominator = Positive,
group = `Sub-district`,
data_type = "PR",
xrange = c(0, 6500),
yrange = c(0, 0.05),
label = NA,
title = "Cumulative COVID-19 Fatality Rate by Cumulative Total Number of COVID-19 Positive Cases", #<<
x_label = "Cumulative COVID-19 Positive Cases", #<<
y_label = "Cumulative Fatality Rate" #<<
)
A funnel plot object with 267 points of which 7 are outliers.
Plot is adjusted for overdispersion.
Funnel Plot for Fair Visual Comparison: ggplot2 methods
Computing the basic derived fields
<- covid19 %>%
df mutate(rate = Death / Positive) %>%
mutate(rate.se = sqrt((rate*(1-rate)) / (Positive))) %>%
filter(rate > 0)
<- weighted.mean(df$rate, 1/df$rate.se^2) fit.mean
Calculate lower and upper limits for 95% and 99.9% CI
<- seq(1, max(df$Positive), 1)
number.seq <- fit.mean - 1.96 * sqrt((fit.mean*(1-fit.mean)) / (number.seq))
number.ll95 <- fit.mean + 1.96 * sqrt((fit.mean*(1-fit.mean)) / (number.seq))
number.ul95 <- fit.mean - 3.29 * sqrt((fit.mean*(1-fit.mean)) / (number.seq))
number.ll999 <- fit.mean + 3.29 * sqrt((fit.mean*(1-fit.mean)) / (number.seq))
number.ul999 <- data.frame(number.ll95, number.ul95, number.ll999,
dfCI number.ul999, number.seq, fit.mean)
Plotting a static funnel plot
<- ggplot(df, aes(x = Positive, y = rate)) +
p geom_point(aes(label=`Sub-district`),
alpha=0.4) +
geom_line(data = dfCI,
aes(x = number.seq,
y = number.ll95),
size = 0.4,
colour = "grey40",
linetype = "dashed") +
geom_line(data = dfCI,
aes(x = number.seq,
y = number.ul95),
size = 0.4,
colour = "grey40",
linetype = "dashed") +
geom_line(data = dfCI,
aes(x = number.seq,
y = number.ll999),
size = 0.4,
colour = "grey40") +
geom_line(data = dfCI,
aes(x = number.seq,
y = number.ul999),
size = 0.4,
colour = "grey40") +
geom_hline(data = dfCI,
aes(yintercept = fit.mean),
size = 0.4,
colour = "grey40") +
coord_cartesian(ylim=c(0,0.05)) +
annotate("text", x = 1, y = -0.13, label = "95%", size = 3, colour = "grey40") +
annotate("text", x = 4.5, y = -0.18, label = "99%", size = 3, colour = "grey40") +
ggtitle("Cumulative Fatality Rate by Cumulative Number of COVID-19 Cases") +
xlab("Cumulative Number of COVID-19 Cases") +
ylab("Cumulative Fatality Rate") +
theme_light() +
theme(plot.title = element_text(size=12),
legend.position = c(0.91,0.85),
legend.title = element_text(size=7),
legend.text = element_text(size=7),
legend.background = element_rect(colour = "grey60", linetype = "dotted"),
legend.key.height = unit(0.3, "cm"))
p
Interactive Funnel Plot: plotly + ggplot2
<- ggplotly(p,
fp_ggplotly tooltip = c("label",
"x",
"y"))
fp_ggplotly