March 27, 2019
Creative Commons Attribution-NonCommercial-NoDerivs 3.0 License
Heavily borrowed from here: R for Data Science
Data visualization by Kieran Healy
The World Wide Web
What is the grammar of graphics?
ggplot(data = <DATA>) +
<GEOM_FUNCTION>(
mapping = aes(<MAPPINGS>),
stat = <STAT>,
position = <POSITION>) +
<COORDINATE_FUNCTION> +
<FACET_FUNCTION>
trends over time
Built iteratively!
gapminder dataset from gapminder packagegdpPercap vs lifeExpgapminder look?require(ggplot2) ## Loading required package: ggplot2 library(gapminder) gapminder_sub <- gapminder[gapminder$year == 2007, ] head(gapminder_sub) ## # A tibble: 6 x 6 ## country continent year lifeExp pop gdpPercap ## <fct> <fct> <int> <dbl> <int> <dbl> ## 1 Afghanistan Asia 2007 43.8 31889923 975. ## 2 Albania Europe 2007 76.4 3600523 5937. ## 3 Algeria Africa 2007 72.3 33333216 6223. ## 4 Angola Africa 2007 42.7 12420476 4797. ## 5 Argentina Americas 2007 75.3 40301927 12779. ## 6 Australia Oceania 2007 81.2 20434176 34435.
This should be straighforward
ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp))
We can add points by geom_point()
ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_point()
We smooth the plot with geom_smooth()
ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_smooth() ## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
geom_smooth() and geom_point() together
ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_point() + geom_smooth() ## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
geom_smooth() with a linear fit
ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_point() + geom_smooth(method = "lm")
The function scale_x_log10() to scale axis to logs
ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_point() + geom_smooth(method = "gam") + scale_x_log10()
Using xlim() and ylim()
plot_example <- ggplot(gapminder_sub,
mapping = aes(x = gdpPercap, y = lifeExp)) +
geom_point()
plot_example
Let's clip their wings
plot_example + xlim(c(0, 20000)) + ylim(c(50, 70)) ## Warning: Removed 102 rows containing missing values (geom_point).
The second method can be by zooming in coord_cartesian()
plot_example <- ggplot(gapminder_sub,
mapping = aes(x = gdpPercap, y = lifeExp)) +
geom_point()
plot_example
Let's zoom in:
plot_example + coord_cartesian(xlim = c(0, 20000) , ylim = c(50, 70))
One method can be to use the ggtitle(), xlab(), and ylab()
plot_example <- ggplot(gapminder_sub,
mapping = aes(x = gdpPercap, y = lifeExp)) +
geom_point() +
ggtitle("GDP per capita Vs Life expectancy",
subtitle="From Gapminder dataset") +
xlab("GDP per capita") +
ylab("Life expectancy")
plot_example
The other method can be seeping the entire plot with labs()
plot_example + labs(title = "GDP per capita Vs Life expectancy", subtitle = "From Gapminder dataset", y = "Life expectancy", x = "GDP per capita", caption = "Demographics")
Hardcoding col and size arguments within the geom_point()
plot_example <- ggplot(gapminder_sub,
mapping = aes(x = gdpPercap, y = lifeExp)) +
geom_point(col = "steelblue", size = 3) +
labs(title = "GDP per capita Vs Life expectancy",
subtitle = "From Gapminder dataset",
y = "Life expectancy", x = "GDP per capita",
caption = "Demographics")
plot_example
Setting color based on categories by using aes() with col function
plot_example <- ggplot(gapminder_sub,
mapping = aes(x = gdpPercap, y = lifeExp)) +
geom_point(aes(col = continent), size = 3) +
labs(title = "GDP per capita Vs Life expectancy",
subtitle = "From Gapminder dataset",
y = "Life expectancy", x = "GDP per capita",
caption = "Demographics")
plot_example
You can change colors by scale_color_brewer(). RColorBrewer package.
plot_example + scale_colour_brewer(palette = "Set1")
library(RColorBrewer) head(brewer.pal.info, 10) ## maxcolors category colorblind ## BrBG 11 div TRUE ## PiYG 11 div TRUE ## PRGn 11 div TRUE ## PuOr 11 div TRUE ## RdBu 11 div TRUE ## RdGy 11 div FALSE ## RdYlBu 11 div TRUE ## RdYlGn 11 div FALSE ## Spectral 11 div FALSE ## Accent 8 qual FALSE
Change breaks by scale_x_continuous()
options(scipen=999) plot_example + scale_x_continuous(breaks=seq(0, 30000, 10000))
Change labels by labels in scale_x_continuous()
options(scipen=999)
plot_example +
scale_x_continuous(breaks = seq(0, 30000, 10000),
labels = letters[1:4])
sprintf() and custom functions
options(scipen = 999)
plot_example + scale_x_continuous(breaks=seq(0, 30000, 10000),
labels = sprintf("%1.2f%%",
seq(0, 30000, 10000))) +
scale_y_continuous(labels = function(x){paste0(x/1000, 'K')})
plot_example <- ggplot(gapminder_sub,
mapping = aes(x = gdpPercap,
y = lifeExp)) +
geom_point(aes(col = continent, size = pop)) +
labs(title = "GDP per capita Vs Life expectancy",
subtitle = "From Gapminder dataset",
y ="Life expectancy",
x ="GDP per capita",
caption ="Demographics")
plot_example
Adding text and labels around the points
library(ggrepel) gapminder_highgdp <- gapminder_sub[gapminder_sub$gdpPercap > 30000, ]
plot_example +
geom_label_repel(aes(label = country),
size = 2, data = gapminder_highgdp) +
labs(subtitle = "With ggrepel::geom_label_repel") +
theme(legend.position = "None")
plot_example +
facet_wrap( ~ continent, nrow=2) +
labs(title = "GDP per capita vs Life expectancy",
caption = "Source: Gapminder",
subtitle = "Ggplot2 - Faceting - Multiple plots in one figure")
plot_example + theme_minimal()
Economist!
library(ggthemes) plot_example + theme_economist()
plot_example <- ggplot(subset(gapminder,
country == "India"),
mapping = aes(x = as.factor(year),
y = lifeExp)) +
geom_bar(stat = "identity", fill = "#0D82BB", width = 0.5) +
labs(title="Change in life expectancy in India",
y = "Life Expectancy (in years)",
x = "") +
theme_bw() +
coord_flip()
plot_example
Industrial-level Line Graph using only R
stock_amzn <- read_csv("http://sharpsightlabs.com/wp-content/uploads/2017/09/AMZN_stock.csv")
colnames(stock_amzn) <- colnames(stock_amzn) %>% str_to_lower()
ggplot(stock_amzn, aes(x = date, close)) +
geom_line(color = 'cyan') +
geom_area(fill = 'cyan', alpha = .1) +
labs(x = 'Date'
, y = 'Closing\nPrice'
, title = "Amazon's stock price has increased dramatically\nover the last 20 years") +
theme(text = element_text(family = 'Gill Sans', color = "#444444")
,panel.background = element_rect(fill = '#444B5A')
,panel.grid.minor = element_line(color = '#4d5566')
,panel.grid.major = element_line(color = '#586174')
,plot.title = element_text(size = 18)
,axis.title = element_text(size = 18, color = '#555555')
,axis.title.y = element_text(vjust = 1, angle = 0)
,axis.title.x = element_text(hjust = 0)
)
ggsave("amazon.png")
devtools::install_github("UrbanInstitute/urbnmapr")
library(tidyverse)
library(urbnmapr)
household_data <- left_join(countydata, counties, by = "county_fips")
plot_map <- household_data %>%
ggplot(aes(long, lat, group = group, fill = medhhincome)) +
geom_polygon(color = NA) +
coord_map(projection = "albers", lat0 = 39, lat1 = 45) +
labs(fill = "Median Household Income")
plot_map
install.packages(c("gganimate", "png", "gifski"))
library(gapminder)
library(ggplot2)
library(gganimate)
plot <- ggplot(gapminder, aes(gdpPercap, lifeExp,
size = pop,
colour = country)) +
geom_point(alpha = 0.7) +
scale_colour_manual(values = country_colors) +
scale_size(range = c(2, 12)) +
scale_x_log10() +
facet_wrap(~continent) +
theme(legend.position = 'none') +
labs(title = 'Year: {frame_time}',
x = 'GDP per capita',
y = 'life expectancy') +
transition_time(year) +
ease_aes('linear')
animate(plot, 100, 10)
library(ggplot2)
plot_base <- ggplot(data_graph, aes(x=district, y=value)) +
geom_bar(stat='identity', position = "dodge",
aes(fill = state, alpha = round)) +
facet_grid(state ~ ., scales="free_y",
space = "free_y", switch = "y") +
ggtitle(data_graph$indicator.name[1]) +
scale_fill_manual(values = colours_hex) +
scale_alpha_manual(values = c(1, 0.6)) +
format_bars +
format_facet
E.g. Define the parameters of the error bars:
plot_ci <- list(
geom_errorbar(aes(ymin = lower.ci, ymax = upper.ci,
group = round), width = .4,
position = position_dodge(width = dodge_width),
color = "#3f444c"))
Add them to the base plot:
plot_base + plot_ci
plot.margin()plot.bckground()coord_flip()scale_x_reverse() and scale_y_reverse()labs()guides()scale_aesthetic_vartype()element_type() and extensions for everything elselibrary("ggsci")
library("ggplot2")
library("gridExtra")
data("diamonds")
p1 = ggplot(subset(diamonds, carat >= 2.2),
aes(x = table, y = price, colour = cut)) +
geom_point(alpha = 0.7) +
geom_smooth(method = "loess", alpha = 0.05, size = 1, span = 1) +
theme_bw()
p2 = ggplot(subset(diamonds, carat > 2.2 & depth > 55 & depth < 70),
aes(x = depth, fill = cut)) +
geom_histogram(colour = "black", binwidth = 1, position = "dodge") +
theme_bw()
p1_d3 = p1 + scale_color_d3()
p2_d3 = p2 + scale_fill_d3()
grid.arrange(p1_d3, p2_d3, ncol = 2)