library(ggplot2) # beautiful graphs
library(ggthemes) # nice themes for ggplot2
library(ggbeeswarm) # helpful 'beeswarm' geometry
library(ggdist) # 'distribution' geometries
library(cowplot) # arrange graphs
library(pander) # nice tables
Choosing the right chart to represent your data can be a daunting process. I believe that a starting point for this thinking is some basic statistical thinking about the type of variables that you have. At the broadest level, variables may be conceptualized as categorical variables, or continuous variables.
Once we have discerned the type of variable that have, there are two followup questions we may ask before deciding upon a chart strategy:
The principles of graphing discussed in this document transcend any particular software package, and could be implemented in many different software packages, such as SPSS, SAS, Stata, or R.
The graphs in these particular examples use ggplot2, a graphing library in R. ggplot2
graph syntax can be formidably complex, with a steep learning curve. More information about ggplot can be found here.
Note that ggplot2 can be MUCH simpler than these examples make it look.
For example,
ggplot(mydata, aes(x = x)) + geom_histogram()
will produce a perfectly serviceable histogram.
Much of the complication of the code in this document is simply the result of formatting tweaks to get the graphs EXACTLY the way I wanted them.
Observe also, that for layout purposes, I am reading each ggplot call into an object, e.g.
p1 <- ggplot(mydata, aes(x = x)) + geom_histogram()
so that I can later use plot_grid
to lay out the graphs.
In your own work, you do not need to do this, and it may be simpler to simply say:
ggplot(...) + ...
This document uses colors based upon official University of Michigan colors. Using colors that match the design scheme of your organization may be helpful.
# michigan colors
=c("#00274c", # blue
michigan_colors"#ffcb05", # maize
"#a4270b", # tappan red
"#e96300", # ross school orange
"#beb300", # wave field green
"#21c1bc", # taubman teal
"#2878ba", # arboretum blue
"#7207a5") # ann arbor amethyst
# name individual colors
<- "#00274c"
michigan_blue
<- "#ffcb05"
michigan_maize
<- "#a4270b"
tappan_red
<- "#e96300"
ross_school_orange
<- "#beb300"
wave_field_green
<- "#21c1bc"
taubman_teal
<- "#2878ba"
arboretum_blue
<- "#7207a5" ann_arbor_amethyst
A few randomly selected observations…
 | x | y | z | u | v | w | s | q |
---|---|---|---|---|---|---|---|---|
983 | 96.46 | 152 | 106 | Group B | Group B | Group A | Group 2 | 116.5 |
557 | 240.7 | 181.2 | 77.56 | Group A | Group A | Group B | Group 2 | 260.7 |
43 | 158.5 | 411.7 | 125 | Group A | Group A | Group B | Group 2 | 178.5 |
828 | 235.8 | 169.5 | 65.58 | Group B | Group B | Group B | Group 3 | 265.8 |
217 | 112.8 | 127.8 | 103 | Group B | Group B | Group A | Group 3 | 142.8 |
979 | 153.5 | 193.5 | 96.83 | Group B | Group B | Group B | Group 2 | 173.5 |
594 | 82.44 | 32.71 | 75.58 | Group A | Group A | Group A | Group 2 | 102.4 |
225 | 110.4 | 131.6 | 117.1 | Group B | Group B | Group A | Group 2 | 130.4 |
750 | 81.92 | 42.23 | 140.6 | Group B | Group A | Group A | Group 4 | 121.9 |
635 | 95.33 | 103.8 | 100.1 | Group A | Group A | Group A | Group 2 | 115.3 |
<- ggplot(mydata, aes(x = x)) +
my_histogram geom_histogram(fill = arboretum_blue) +
ggtitle("histogram") +
xlab("continuous") + ylab("count") +
theme_minimal()
<- ggplot(mydata, aes(x = x)) +
my_facet_histogram geom_histogram(fill = arboretum_blue) +
facet_wrap(~w, nrow = 2) +
ggtitle("histogram by group") +
xlab("continuous") + ylab("count") +
theme_minimal() +
theme(axis.text = element_text(size = 5)) # small font size for axis
plot_grid(my_histogram, my_facet_histogram, ncol=2)
<- ggplot(mydata, aes(x = y)) +
my_density geom_density(fill = michigan_maize) +
ggtitle("density") +
xlab("continuous") + ylab("density") +
theme_minimal()
<- ggplot(mydata, aes(x = y)) +
my_facet_density geom_density(fill = michigan_maize) +
facet_wrap(~w, nrow = 2) +
ggtitle("density by group") +
xlab("continuous") + ylab("density") +
theme_minimal() +
theme(axis.text = element_text(size = 5)) # small font size for axis
plot_grid(my_density, my_facet_density, ncol = 2)
<- ggplot(mydata,
my_m_barchart aes(x = 1,
y = q,
fill = factor(1))) +
stat_summary(fun = mean, geom = "bar") +
scale_fill_manual(values = c(arboretum_blue)) +
ggtitle("barchart of mean") +
guides(fill=FALSE) +
xlab(" ") +
ylab("mean of continuous") +
theme_minimal() +
theme(axis.text.x = element_blank()) +
theme(axis.ticks.x = element_blank())
<- ggplot(mydata,
my_facet_m_barchart aes(x = factor(s),
y = q,
fill = s)) +
stat_summary(fun = mean, geom = "bar") +
scale_fill_manual(values = c(arboretum_blue,
taubman_teal,
michigan_blue,+
michigan_maize)) ggtitle("barchart of mean \nby group") +
guides(fill=FALSE) +
xlab("categorical") + ylab("mean of continuous") +
theme_minimal()
plot_grid(my_m_barchart,
my_facet_m_barchart, ncol = 2)
<- ggplot(mydata,
my_horiz_m_barchart aes(x = 1,
y = q,
fill = factor(1))) +
stat_summary(fun = mean,
geom = "bar") +
coord_flip() +
scale_fill_manual(values = c(arboretum_blue)) +
ggtitle("horizontal barchart of mean") +
guides(fill=FALSE) +
xlab(" ") +
ylab("mean of continuous") +
theme_minimal() +
theme(axis.text.y = element_blank()) +
theme(axis.ticks.y = element_blank())
<- ggplot(mydata,
my_facet_horiz_m_barchart aes(x = factor(s),
y = q,
fill = s)) +
stat_summary(fun = mean,
geom = "bar") +
coord_flip() +
scale_fill_manual(values = c(arboretum_blue,
taubman_teal,
michigan_blue,+
michigan_maize)) ggtitle("horizontal barchart of mean \nby group") +
guides(fill=FALSE) +
xlab(" ") + ylab("mean of continuous") +
theme_minimal() +
theme(axis.text.y = element_blank()) +
theme(axis.ticks.y = element_blank())
plot_grid(my_horiz_m_barchart,
my_facet_horiz_m_barchart)
<- ggplot(mydata,
my_horiz_m_dotchart aes(x = 1,
y = q,
fill = factor(1))) +
stat_summary(fun = mean,
geom = "point", size = 5) +
coord_flip() +
scale_color_manual(values = c(arboretum_blue)) +
ggtitle("horizontal dotchart of mean") +
guides(fill = FALSE) +
xlab(" ") +
ylab("mean of continuous") +
theme_minimal() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank())
<- ggplot(mydata,
my_facet_horiz_m_dotchart aes(x = factor(s),
y = q,
color = s)) +
stat_summary(fun = mean,
geom = "point",
size = 5) +
coord_flip() +
scale_color_manual(name = "group",
values = c(arboretum_blue,
taubman_teal,
michigan_blue,+
michigan_maize)) ggtitle("horizontal dotchart of mean \nby group") +
guides(fill=FALSE) +
xlab(" ") +
ylab("mean of continuous") +
theme_minimal() +
theme(axis.title.y = element_blank(),
axis.ticks = element_blank())
plot_grid(my_horiz_m_dotchart,
my_facet_horiz_m_dotchart)
<- ggplot(mydata,
my_horiz_m_lollipop_chart aes(x = 1,
y = q,
fill = factor(1))) +
stat_summary(fun = mean,
geom = "point",
size = 5) +
geom_segment(aes(x = 1,
xend = 1,
y = 0,
yend = mean(q))) +
coord_flip() +
scale_color_manual(values = c(arboretum_blue)) +
ggtitle("horizontal lollipop chart of mean") +
guides(fill = FALSE) +
xlab(" ") + ylab("mean of continuous") +
theme_minimal() +
theme(axis.text.y = element_blank(),
axis.ticks.y = element_blank())
<- ggplot(mydata,
my_facet_horiz_m_lollipop_chart aes(x = factor(s),
y = q,
color = s)) +
stat_summary(fun = mean,
geom = "point",
size = 5) +
geom_segment(aes(x = factor(s),
xend = factor(s),
y = 0,
yend = mean(q))) +
coord_flip() +
scale_color_manual(name = "group",
values = c(arboretum_blue,
taubman_teal,
michigan_blue,+
michigan_maize)) ggtitle("horizontal lollipop chart of mean \nby group") +
guides(fill=FALSE) +
xlab(" ") +
ylab("mean of continuous") +
theme_minimal() +
theme(axis.title.y = element_blank(),
axis.ticks = element_blank())
plot_grid(my_horiz_m_lollipop_chart,
my_facet_horiz_m_lollipop_chart)
<- ggplot(mydata,
my_m_linechart aes(x = factor(s),
y = mean(q),
group = 1)) +
stat_summary(fun = mean,
geom = "line",
size = 2,
color = arboretum_blue) +
geom_blank() +
ggtitle("linechart of mean") +
xlab(" ") +
ylab("mean of continuous") +
theme_minimal() +
theme(axis.text.x = element_blank()) +
theme(axis.ticks.x = element_blank())
<- ggplot(mydata,
my_facet_m_linechart aes(x = factor(s),
y = q,
group = 1)) +
stat_summary(fun = mean,
geom = "line",
size = 2,
color = arboretum_blue) +
ggtitle("linechart of mean \nby group") +
xlab(" ") +
ylab("mean of continuous") +
theme_minimal()
plot_grid(my_m_linechart, my_facet_m_linechart)
<- ggplot(mydata,
my_violin aes(x = 1,
y = y)) +
geom_violin(fill = ross_school_orange) +
ggtitle("violin plot") +
xlab(" ") +
ylab("continuous") +
theme_minimal() +
theme(axis.text.x = element_blank()) +
theme(axis.ticks.x = element_blank())
<- ggplot(mydata,
my_facet_violin aes(x = 1,
y = y)) +
geom_violin(fill = ross_school_orange) +
facet_wrap(~w,
ncol = 2) +
ggtitle("violin plot \nby group") +
xlab("categorical") +
ylab("continuous") +
theme_minimal() +
theme(axis.text.x = element_blank()) +
theme(axis.ticks.x = element_blank())
plot_grid(my_violin, my_facet_violin, ncol = 2)
<- ggplot(mydata,
my_boxplot aes(x = 2,
y = y)) +
geom_boxplot(colour=tappan_red) +
scale_x_discrete(limit = c(0,1,2)) +
ggtitle("boxplot") +
xlab(" ") +
ylab("continuous") +
theme_minimal() +
theme(axis.text.x = element_blank()) +
theme(axis.ticks.x = element_blank())
<- ggplot(mydata,
my_conditional_boxplot aes(x = w,
y = y)) +
geom_boxplot(colour=tappan_red, width = .5) +
ggtitle("boxplot \nby group") +
xlab("categorical") +
ylab("continuous") +
theme_minimal()
plot_grid(my_boxplot, my_conditional_boxplot, ncol = 2)
library(ggbeeswarm) # beeswarm geometry
<- ggplot(mydata,
my_beeswarm aes(x = y,
y = 1)) +
geom_beeswarm(colour = ann_arbor_amethyst,
groupOnX = FALSE) +
ggtitle("beeswarm plot") +
xlab("continuous") +
ylab("") +
theme_minimal() +
theme(axis.text=element_text(size = 5),
axis.text.y = element_blank())
<- ggplot(mydata,
my_facet_beeswarm aes(x = y,
y = 1)) +
geom_beeswarm(colour = ann_arbor_amethyst,
groupOnX = FALSE) +
facet_wrap(~w, nrow = 2) +
ggtitle("beeswarm plot \nby group") +
xlab("continuous") +
ylab("") +
theme_minimal() +
theme(axis.text.x = element_text(size = 5),
axis.text.y = element_blank())
plot_grid(my_beeswarm, my_facet_beeswarm)
library(ggdist) # 'distribution' geometries
<- ggplot(mydata,
my_dotplot aes(x = y)) +
stat_dots(colour=wave_field_green) +
ggtitle("dotplot") +
xlab("continuous") +
ylab("density") +
theme_minimal()
<- ggplot(mydata,
my_facet_dotplot aes(x = y)) +
stat_dots(colour=wave_field_green) +
facet_wrap(~w, nrow = 2) +
ggtitle("dotplot \nby group") +
xlab("continuous") +
ylab("density") +
theme_minimal() +
theme(axis.text = element_text(size = 5)) # small font size for axis
plot_grid(my_dotplot, my_facet_dotplot, ncol = 2)
<- ggplot(mydata,
my_barchart aes(s,
fill = s)) +
geom_bar(width = 1.0) +
scale_fill_manual(values = c(arboretum_blue,
taubman_teal,
michigan_blue,+
michigan_maize)) ggtitle("bar chart") +
guides(fill=FALSE) +
xlab("categorical") +
ylab("count") +
theme_minimal()
<- ggplot(mydata,
my_facet_barchart aes(s,
fill = s)) +
geom_bar(width = 1.0) +
facet_wrap(~u, ncol = 2) +
scale_fill_manual(values = c(arboretum_blue,
taubman_teal,
michigan_blue,+
michigan_maize)) ggtitle("bar chart \nby group") +
guides(fill=FALSE) +
xlab("categorical") + ylab("count") +
theme_minimal() +
theme(axis.text.x = element_text(size = rel(.6)))
plot_grid(my_barchart, my_facet_barchart, ncol = 2)
<- ggplot(mydata, aes(s, fill = s)) +
my_horiz_barchat geom_bar(width = 1.0) +
coord_flip() +
scale_fill_manual(values = c(arboretum_blue,
taubman_teal,
michigan_blue,+
michigan_maize)) ggtitle("horizontal bar chart") +
guides(fill=FALSE) +
xlab("categorical") +
ylab("count") +
theme_minimal()
<- ggplot(mydata, aes(s, fill = s)) +
my_horiz_facet_barchart geom_bar(width = 1.0) +
facet_wrap(~u, ncol = 1) +
coord_flip() +
scale_fill_manual(values = c(arboretum_blue,
taubman_teal,
michigan_blue,+
michigan_maize)) ggtitle("horizontal bar chart \nby group") +
guides(fill=FALSE) +
xlab("categorical") +
ylab("count") +
theme_minimal() +
theme(axis.text.y = element_text(size = rel(.5)))
plot_grid(my_horiz_barchat, my_horiz_facet_barchart, ncol = 2)
<- ggplot(mydata, aes(x = 1, fill = v)) +
my_pie geom_bar(width = 1) +
coord_polar(theta="y") +
scale_fill_manual(values = michigan_colors) +
ggtitle("pie chart") + guides(fill=FALSE) +
xlab(" ") +
ylab("categorical") +
theme_void() +
theme(axis.text.x = element_blank()) +
theme(axis.text.y = element_blank()) +
theme(axis.ticks = element_blank())
<- ggplot(mydata, aes(x = 1, fill = v)) +
my_facet_pie geom_bar(width = 1, position = "fill") +
coord_polar(theta="y") +
facet_wrap(~u, ncol = 2) +
scale_fill_manual(values = michigan_colors) +
ggtitle("pie chart \nby group") +
guides(fill=FALSE) +
xlab(" ") +
ylab("categorical") +
theme_void() +
theme(axis.text.x = element_blank()) +
theme(axis.text.y = element_blank()) +
theme(axis.ticks = element_blank())
plot_grid(my_pie, my_facet_pie, ncol = 2)
<- ggplot(mydata, aes(x = 2, fill = v)) +
my_doughnut geom_bar() +
coord_polar(theta="y") +
scale_fill_manual(values = michigan_colors) +
ggtitle("doughnut chart") +
guides(fill=FALSE) +
xlab(" ") +
ylab("categorical") +
xlim(.5, 2.5) +
theme_void() +
theme(axis.text.x = element_blank()) +
theme(axis.text.y = element_blank()) +
theme(axis.ticks = element_blank())
<- ggplot(mydata, aes(x = 2, fill = v)) +
my_facet_doughnut geom_bar(position = "fill") +
coord_polar(theta="y") +
facet_wrap(~u, ncol = 2) +
scale_fill_manual(values = michigan_colors) +
ggtitle("doughnut chart \nby group") +
guides(fill=FALSE) +
xlab(" ") +
ylab("categorical") +
xlim(.5, 2.5) +
theme_void() +
theme(axis.text.x = element_blank()) +
theme(axis.text.y = element_blank()) +
theme(axis.ticks = element_blank())
plot_grid(my_doughnut, my_facet_doughnut, ncol = 2)
<- ggplot(mydata, aes(x=x, y=y)) +
my_scatterplot geom_point(colour = ann_arbor_amethyst) +
ggtitle("scatterplot") +
xlab("continuous") +
ylab("continuous") +
theme_minimal() +
theme(axis.text.x = element_blank()) +
theme(axis.text.y = element_blank()) +
theme(axis.ticks = element_blank())
<- ggplot(mydata, aes(x=x, y=y)) +
my_scatterplot_smoother geom_point(colour = ann_arbor_amethyst) +
geom_smooth(method = lm, color = michigan_maize, size = 2) +
ggtitle("scatterplot with fit line") +
xlab("continuous") +
ylab("continuous") +
theme_minimal() +
theme(axis.text.x = element_blank()) +
theme(axis.text.y = element_blank()) +
theme(axis.ticks = element_blank())
plot_grid(my_scatterplot, my_scatterplot_smoother)
<- ggplot(mydata, aes(x=x, y=y)) +
my_hexagon geom_hex() +
ggtitle("hexagon plot") +
xlab("continuous") +
ylab("continuous") +
theme_minimal() +
theme(legend.text = element_text(size=4),
legend.key.size = unit(.25, "cm")) +
theme(axis.text.x = element_blank()) +
theme(axis.text.y = element_blank()) +
theme(axis.ticks = element_blank()) +
scale_fill_gradient(low = taubman_teal,
high = arboretum_blue)
<- ggplot(mydata, aes(x=x, y=y)) +
my_smoother geom_point(colour = ann_arbor_amethyst) +
geom_smooth(se=TRUE, color=michigan_maize, size=2) +
ggtitle("scatterplot with smoother") +
xlab("continuous") +
ylab("continuous") +
theme_minimal() +
theme(axis.text.x = element_blank()) +
theme(axis.text.y = element_blank()) +
theme(axis.ticks = element_blank())
plot_grid(my_hexagon, my_smoother)
<- ggplot(mydata, aes(x=x, y=y)) +
my_area geom_area(position = "stack", fill = ross_school_orange) +
ggtitle("area plot") +
xlab("continuous") +
ylab("continuous") +
theme_minimal() +
theme(axis.text.x = element_blank()) +
theme(axis.text.y = element_blank()) +
theme(axis.ticks = element_blank())
<- ggplot(mydata, aes(x=x, y=y)) +
my_contour stat_density_2d(aes(fill = ..level..),
geom = "polygon") +
ggtitle("contour plot") +
xlab("continuous") +
ylab("continuous") +
theme_minimal() +
theme(axis.text.x = element_blank()) +
theme(axis.text.y = element_blank()) +
theme(axis.ticks = element_blank()) +
scale_fill_gradient(low = taubman_teal,
high = arboretum_blue)
plot_grid(my_area, my_contour)
Graphics made with the ggplot2 graphing library created by Hadley Wickham.
Available online at https://agrogan1.github.io/
How to Choose a Chart by Andrew Grogan-Kaylor is licensed under a Creative Commons Attribution-ShareAlike 4.0 International License. You are welcome to download and use this handout in your own classes, or work, as long as the handout remains properly attributed.
Last updated: August 23 2021
at 09:36