March 27, 2019
Creative Commons Attribution-NonCommercial-NoDerivs 3.0 License
Heavily borrowed from here: R for Data Science
Data visualization by Kieran Healy
The World Wide Web
What is the grammar of graphics?
ggplot(data = <DATA>) + <GEOM_FUNCTION>( mapping = aes(<MAPPINGS>), stat = <STAT>, position = <POSITION>) + <COORDINATE_FUNCTION> + <FACET_FUNCTION>
trends over time
Built iteratively!
gapminder
dataset from gapminder
packagegdpPercap
vs lifeExp
gapminder
look?require(ggplot2) ## Loading required package: ggplot2 library(gapminder) gapminder_sub <- gapminder[gapminder$year == 2007, ] head(gapminder_sub) ## # A tibble: 6 x 6 ## country continent year lifeExp pop gdpPercap ## <fct> <fct> <int> <dbl> <int> <dbl> ## 1 Afghanistan Asia 2007 43.8 31889923 975. ## 2 Albania Europe 2007 76.4 3600523 5937. ## 3 Algeria Africa 2007 72.3 33333216 6223. ## 4 Angola Africa 2007 42.7 12420476 4797. ## 5 Argentina Americas 2007 75.3 40301927 12779. ## 6 Australia Oceania 2007 81.2 20434176 34435.
This should be straighforward
ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp))
We can add points by geom_point()
ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_point()
We smooth the plot with geom_smooth()
ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_smooth() ## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
geom_smooth()
and geom_point()
together
ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_point() + geom_smooth() ## `geom_smooth()` using method = 'loess' and formula 'y ~ x'
geom_smooth()
with a linear fit
ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_point() + geom_smooth(method = "lm")
The function scale_x_log10()
to scale axis to logs
ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_point() + geom_smooth(method = "gam") + scale_x_log10()
Using xlim()
and ylim()
plot_example <- ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_point() plot_example
Let's clip their wings
plot_example + xlim(c(0, 20000)) + ylim(c(50, 70)) ## Warning: Removed 102 rows containing missing values (geom_point).
The second method can be by zooming in coord_cartesian()
plot_example <- ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_point() plot_example
Let's zoom in:
plot_example + coord_cartesian(xlim = c(0, 20000) , ylim = c(50, 70))
One method can be to use the ggtitle()
, xlab()
, and ylab()
plot_example <- ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_point() + ggtitle("GDP per capita Vs Life expectancy", subtitle="From Gapminder dataset") + xlab("GDP per capita") + ylab("Life expectancy")
plot_example
The other method can be seeping the entire plot with labs()
plot_example + labs(title = "GDP per capita Vs Life expectancy", subtitle = "From Gapminder dataset", y = "Life expectancy", x = "GDP per capita", caption = "Demographics")
Hardcoding col
and size
arguments within the geom_point()
plot_example <- ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_point(col = "steelblue", size = 3) + labs(title = "GDP per capita Vs Life expectancy", subtitle = "From Gapminder dataset", y = "Life expectancy", x = "GDP per capita", caption = "Demographics")
plot_example
Setting color based on categories by using aes()
with col
function
plot_example <- ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_point(aes(col = continent), size = 3) + labs(title = "GDP per capita Vs Life expectancy", subtitle = "From Gapminder dataset", y = "Life expectancy", x = "GDP per capita", caption = "Demographics")
plot_example
You can change colors by scale_color_brewer()
. RColorBrewer
package.
plot_example + scale_colour_brewer(palette = "Set1")
library(RColorBrewer) head(brewer.pal.info, 10) ## maxcolors category colorblind ## BrBG 11 div TRUE ## PiYG 11 div TRUE ## PRGn 11 div TRUE ## PuOr 11 div TRUE ## RdBu 11 div TRUE ## RdGy 11 div FALSE ## RdYlBu 11 div TRUE ## RdYlGn 11 div FALSE ## Spectral 11 div FALSE ## Accent 8 qual FALSE
Change breaks by scale_x_continuous()
options(scipen=999) plot_example + scale_x_continuous(breaks=seq(0, 30000, 10000))
Change labels by labels
in scale_x_continuous()
options(scipen=999) plot_example + scale_x_continuous(breaks = seq(0, 30000, 10000), labels = letters[1:4])
sprintf()
and custom functions
options(scipen = 999) plot_example + scale_x_continuous(breaks=seq(0, 30000, 10000), labels = sprintf("%1.2f%%", seq(0, 30000, 10000))) + scale_y_continuous(labels = function(x){paste0(x/1000, 'K')})
plot_example <- ggplot(gapminder_sub, mapping = aes(x = gdpPercap, y = lifeExp)) + geom_point(aes(col = continent, size = pop)) + labs(title = "GDP per capita Vs Life expectancy", subtitle = "From Gapminder dataset", y ="Life expectancy", x ="GDP per capita", caption ="Demographics")
plot_example
Adding text and labels around the points
library(ggrepel) gapminder_highgdp <- gapminder_sub[gapminder_sub$gdpPercap > 30000, ]
plot_example + geom_label_repel(aes(label = country), size = 2, data = gapminder_highgdp) + labs(subtitle = "With ggrepel::geom_label_repel") + theme(legend.position = "None")
plot_example + facet_wrap( ~ continent, nrow=2) + labs(title = "GDP per capita vs Life expectancy", caption = "Source: Gapminder", subtitle = "Ggplot2 - Faceting - Multiple plots in one figure")
plot_example + theme_minimal()
Economist!
library(ggthemes) plot_example + theme_economist()
plot_example <- ggplot(subset(gapminder, country == "India"), mapping = aes(x = as.factor(year), y = lifeExp)) + geom_bar(stat = "identity", fill = "#0D82BB", width = 0.5) + labs(title="Change in life expectancy in India", y = "Life Expectancy (in years)", x = "") + theme_bw() + coord_flip()
plot_example
Industrial-level Line Graph using only R
stock_amzn <- read_csv("http://sharpsightlabs.com/wp-content/uploads/2017/09/AMZN_stock.csv") colnames(stock_amzn) <- colnames(stock_amzn) %>% str_to_lower() ggplot(stock_amzn, aes(x = date, close)) + geom_line(color = 'cyan') + geom_area(fill = 'cyan', alpha = .1) + labs(x = 'Date' , y = 'Closing\nPrice' , title = "Amazon's stock price has increased dramatically\nover the last 20 years") + theme(text = element_text(family = 'Gill Sans', color = "#444444") ,panel.background = element_rect(fill = '#444B5A') ,panel.grid.minor = element_line(color = '#4d5566') ,panel.grid.major = element_line(color = '#586174') ,plot.title = element_text(size = 18) ,axis.title = element_text(size = 18, color = '#555555') ,axis.title.y = element_text(vjust = 1, angle = 0) ,axis.title.x = element_text(hjust = 0) ) ggsave("amazon.png")
devtools::install_github("UrbanInstitute/urbnmapr") library(tidyverse) library(urbnmapr) household_data <- left_join(countydata, counties, by = "county_fips") plot_map <- household_data %>% ggplot(aes(long, lat, group = group, fill = medhhincome)) + geom_polygon(color = NA) + coord_map(projection = "albers", lat0 = 39, lat1 = 45) + labs(fill = "Median Household Income")
plot_map
install.packages(c("gganimate", "png", "gifski")) library(gapminder) library(ggplot2) library(gganimate)
plot <- ggplot(gapminder, aes(gdpPercap, lifeExp, size = pop, colour = country)) + geom_point(alpha = 0.7) + scale_colour_manual(values = country_colors) + scale_size(range = c(2, 12)) + scale_x_log10() + facet_wrap(~continent) + theme(legend.position = 'none') + labs(title = 'Year: {frame_time}', x = 'GDP per capita', y = 'life expectancy') + transition_time(year) + ease_aes('linear') animate(plot, 100, 10)
library(ggplot2) plot_base <- ggplot(data_graph, aes(x=district, y=value)) + geom_bar(stat='identity', position = "dodge", aes(fill = state, alpha = round)) + facet_grid(state ~ ., scales="free_y", space = "free_y", switch = "y") + ggtitle(data_graph$indicator.name[1]) + scale_fill_manual(values = colours_hex) + scale_alpha_manual(values = c(1, 0.6)) + format_bars + format_facet
E.g. Define the parameters of the error bars:
plot_ci <- list( geom_errorbar(aes(ymin = lower.ci, ymax = upper.ci, group = round), width = .4, position = position_dodge(width = dodge_width), color = "#3f444c"))
Add them to the base plot:
plot_base + plot_ci
plot.margin()
plot.bckground()
coord_flip()
scale_x_reverse()
and scale_y_reverse()
labs()
guides()
scale_aesthetic_vartype()
element_type()
and extensions for everything elselibrary("ggsci") library("ggplot2") library("gridExtra") data("diamonds") p1 = ggplot(subset(diamonds, carat >= 2.2), aes(x = table, y = price, colour = cut)) + geom_point(alpha = 0.7) + geom_smooth(method = "loess", alpha = 0.05, size = 1, span = 1) + theme_bw() p2 = ggplot(subset(diamonds, carat > 2.2 & depth > 55 & depth < 70), aes(x = depth, fill = cut)) + geom_histogram(colour = "black", binwidth = 1, position = "dodge") + theme_bw() p1_d3 = p1 + scale_color_d3() p2_d3 = p2 + scale_fill_d3() grid.arrange(p1_d3, p2_d3, ncol = 2)