us_data <- filter(gapminder, country == "United States")
ggplot(data = us_data, aes(x = gdpPercap, y = lifeExp)) +
geom_point()
us_data %>%
ggplot(data = ., aes(x = gdpPercap, y = lifeExp)) +
geom_point() +
labs(x = "GDP per Capita",
y = "Life Expectancy",
title = "GDP Per Capita vs Life Exp. in US")
us_data %>%
ggplot(data = ., aes(x = gdpPercap, y = lifeExp)) +
geom_point() +
geom_line() +
geom_smooth()
## `geom_smooth()` using method = 'loess' and formula = 'y ~ x'
ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp)) +
geom_point() +
facet_wrap(~ year)
ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp, color = continent)) +
geom_point()
ggplot(
data = gapminder,
aes(x = gdpPercap, y = lifeExp, color = continent, fill = year)
) +
geom_point(shape = 21)
ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp)) +
geom_point(aes(shape = continent))
They are ‘color’, ‘fill’, and ‘shape’. The last ‘aes’ we’ll talk about is ‘size’, which adjusts point size. Add 2 ‘aes’ to this plot: map ‘continent’ to ‘color’, and map ‘pop’ to ‘size’. How many legends are there now?
ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp)) +
geom_point(aes(color = continent, size = pop))
ggplot(
data = gapminder,
aes(x = lifeExp, color = continent, fill = continent)
) +
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(
data = gapminder,
aes(x = lifeExp, color = continent, fill = continent)
) +
geom_histogram(binwidth = 5)
Again, experiment with binwidth.
ggplot(
data = gapminder, aes(x = lifeExp, color = continent)
) +
geom_freqpoly(binwidth = 10)
In the blank, experiment with setting ‘color’ versus ‘fill’ as ‘continent’.
ggplot(data = gapminder, aes(x = lifeExp, color = continent)) +
geom_area(stat = "bin")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = gapminder, aes(x = lifeExp, fill = continent)) +
geom_area(stat = "bin")
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
To change that behavior, set position = “dodge”.
ggplot(data = gapminder, aes(x = lifeExp, fill = continent)) +
geom_area(stat = "bin", position = 'dodge')
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.
ggplot(data = gapminder, aes(x = lifeExp, fill = continent)) +
geom_density()
ggplot(data = gapminder, aes(x = lifeExp, color = continent)) +
geom_density()
ggplot(data = gapminder, aes(x = lifeExp, fill = continent)) +
geom_density(alpha = 0.6)
Hint: for a vertical line, you’ll need to specify an x-intercept (and for a horizontal line, you’d need to specify a y-intercept). Do this with ‘xintercept = 70’.
ggplot(
data = gapminder,
aes(x = lifeExp, color = continent, fill = continent)
) +
geom_density(alpha = .3) +
geom_vline(xintercept = 70)
ggplot(
data = gapminder,
aes(x = lifeExp, color = continent, fill = continent)
) +
geom_density(alpha = .3) +
geom_vline(xintercept = 70) +
annotate(
"text", x = 70, y = .075, label = "70 years", angle = 90
)
ggplot(
data = gapminder,
aes(x = gdpPercap, y = lifeExp)
) +
geom_point()
ggplot(
data = gapminder,
aes(x = gdpPercap, y = lifeExp)
) +
geom_point() +
geom_smooth()
## `geom_smooth()` using method = 'gam' and formula = 'y ~ s(x, bs = "cs")'
ggplot(
data = gapminder,
aes(x = gdpPercap, y = lifeExp)
) +
geom_point(se = FALSE)
## Warning in geom_point(se = FALSE): Ignoring unknown parameters: `se`
“lm” stands for “linear model”.
ggplot(
data = gapminder,
aes(x = gdpPercap, y = lifeExp)
) +
geom_point() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
You should find that a linear model seems to fit this transformation much better.
ggplot(data = gapminder, aes(x = log(gdpPercap), y = lifeExp)) +
geom_point() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
Notice that the units on the x-axis of the previous plot are in log terms, which is hard to interpret. I prefer this method: Do a log transformation of the x-axis using ‘scale_x_log10()’, and use ‘labels = scales::comma’ to suppress scientific notation on the labels for the x-axis.
ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp)) +
scale_x_log10(labels = scales::comma) +
geom_point() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
I’d like a way to visualize how dense the points are getting. For this, we can replace ‘geom_point()’ with ‘geom_hex()’.
ggplot(data = gapminder, aes(x = gdpPercap, y = lifeExp)) +
scale_x_log10(labels = scales::comma) +
geom_hex() +
geom_smooth(method = "lm")
## `geom_smooth()` using formula = 'y ~ x'
gapminder %>%
filter(country %in% c("United States", "Costa Rica", "Mexico")) %>%
ggplot(aes(x = year, y = gdpPercap, color = country)) +
geom_line()
Experiment with applying a log transformation to gdpPercap (now on the y axis).
ggplot(
data = gapminder,
aes(x = continent, y = gdpPercap, color = continent)
) +
geom_boxplot()
ggplot(
data = gapminder,
aes(x = continent, y = log(gdpPercap), color = continent)
) +
geom_boxplot()
Also include ‘fill’. Make sure you use ‘scale_y_log10()’ to transform the y-axis into log terms.
ggplot(
data = gapminder,
aes(x = continent, y = log(gdpPercap), color = continent, fill = continent)
) +
scale_y_log10() +
geom_violin()
The ‘facet_wrap’ version:
ggplot(
data = gapminder,
aes(x = log(gdpPercap), y = lifeExp)
) +
geom_point(aes(color = continent)) +
geom_density2d(color = "grey", alpha = .5) +
facet_wrap(~ year)