Solutions

filter()

Q01: Filter gapminder for all the observations from Europe in 2007

gapminder %>% 
  filter(continent == "Europe" & year == 2007)
## # A tibble: 30 × 6
##    country                continent  year lifeExp      pop gdpPercap
##    <fct>                  <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Albania                Europe     2007    76.4  3600523     5937.
##  2 Austria                Europe     2007    79.8  8199783    36126.
##  3 Belgium                Europe     2007    79.4 10392226    33693.
##  4 Bosnia and Herzegovina Europe     2007    74.9  4552198     7446.
##  5 Bulgaria               Europe     2007    73.0  7322858    10681.
##  6 Croatia                Europe     2007    75.7  4493312    14619.
##  7 Czech Republic         Europe     2007    76.5 10228744    22833.
##  8 Denmark                Europe     2007    78.3  5468120    35278.
##  9 Finland                Europe     2007    79.3  5238460    33207.
## 10 France                 Europe     2007    80.7 61083916    30470.
## # ℹ 20 more rows

Q02: Filter gapminder for all the observations where lifeExp was exactly equal to 68 years old.

gapminder %>% 
  filter(lifeExp == 68)
## # A tibble: 2 × 6
##   country  continent  year lifeExp      pop gdpPercap
##   <fct>    <fct>     <int>   <dbl>    <int>     <dbl>
## 1 Belgium  Europe     1952      68  8730405     8343.
## 2 Malaysia Asia       1982      68 14441916     4920.

Logical Operators

Q03: Filter gapminder for a short list of the richest countries in Asia.

gapminder %>%
  filter(continent == "Asia", gdpPercap > 30000)
## # A tibble: 19 × 6
##    country          continent  year lifeExp       pop gdpPercap
##    <fct>            <fct>     <int>   <dbl>     <int>     <dbl>
##  1 Hong Kong, China Asia       2002    81.5   6762476    30209.
##  2 Hong Kong, China Asia       2007    82.2   6980412    39725.
##  3 Japan            Asia       2007    82.6 127467972    31656.
##  4 Kuwait           Asia       1952    55.6    160000   108382.
##  5 Kuwait           Asia       1957    58.0    212846   113523.
##  6 Kuwait           Asia       1962    60.5    358266    95458.
##  7 Kuwait           Asia       1967    64.6    575003    80895.
##  8 Kuwait           Asia       1972    67.7    841934   109348.
##  9 Kuwait           Asia       1977    69.3   1140357    59265.
## 10 Kuwait           Asia       1982    71.3   1497494    31354.
## 11 Kuwait           Asia       1992    75.2   1418095    34933.
## 12 Kuwait           Asia       1997    76.2   1765345    40301.
## 13 Kuwait           Asia       2002    76.9   2111561    35110.
## 14 Kuwait           Asia       2007    77.6   2505559    47307.
## 15 Saudi Arabia     Asia       1977    58.7   8128505    34168.
## 16 Saudi Arabia     Asia       1982    63.0  11254672    33693.
## 17 Singapore        Asia       1997    77.2   3802309    33519.
## 18 Singapore        Asia       2002    78.8   4197776    36023.
## 19 Singapore        Asia       2007    80.0   4553009    47143.

Q04: Filter gapminder for observations IN the United States, Germany, and Brazil.

gapminder %>%
  filter(country %in% c("United States","Germany","Brazil"))
## # A tibble: 36 × 6
##    country continent  year lifeExp       pop gdpPercap
##    <fct>   <fct>     <int>   <dbl>     <int>     <dbl>
##  1 Brazil  Americas   1952    50.9  56602560     2109.
##  2 Brazil  Americas   1957    53.3  65551171     2487.
##  3 Brazil  Americas   1962    55.7  76039390     3337.
##  4 Brazil  Americas   1967    57.6  88049823     3430.
##  5 Brazil  Americas   1972    59.5 100840058     4986.
##  6 Brazil  Americas   1977    61.5 114313951     6660.
##  7 Brazil  Americas   1982    63.3 128962939     7031.
##  8 Brazil  Americas   1987    65.2 142938076     7807.
##  9 Brazil  Americas   1992    67.1 155975974     6950.
## 10 Brazil  Americas   1997    69.4 168546719     7958.
## # ℹ 26 more rows

select()

Q05: Select the last 3 variables of ‘gapminder’ by name

gapminder %>%
  select(lifeExp, pop, gdpPercap)
## # A tibble: 1,704 × 3
##    lifeExp      pop gdpPercap
##      <dbl>    <int>     <dbl>
##  1    28.8  8425333      779.
##  2    30.3  9240934      821.
##  3    32.0 10267083      853.
##  4    34.0 11537966      836.
##  5    36.1 13079460      740.
##  6    38.4 14880372      786.
##  7    39.9 12881816      978.
##  8    40.8 13867957      852.
##  9    41.7 16317921      649.
## 10    41.8 22227415      635.
## # ℹ 1,694 more rows

mutate()

Q06: Use ‘mutate()’ to create a new variable ‘pop_in_thousands’. So if the observation has ‘pop’ = 97,462, then ‘pop_in_thousands’ will be 97.462.

gapminder %>%
  mutate(pop_in_thousands = pop/1000)
## # A tibble: 1,704 × 7
##    country     continent  year lifeExp      pop gdpPercap pop_in_thousands
##    <fct>       <fct>     <int>   <dbl>    <int>     <dbl>            <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.            8425.
##  2 Afghanistan Asia       1957    30.3  9240934      821.            9241.
##  3 Afghanistan Asia       1962    32.0 10267083      853.           10267.
##  4 Afghanistan Asia       1967    34.0 11537966      836.           11538.
##  5 Afghanistan Asia       1972    36.1 13079460      740.           13079.
##  6 Afghanistan Asia       1977    38.4 14880372      786.           14880.
##  7 Afghanistan Asia       1982    39.9 12881816      978.           12882.
##  8 Afghanistan Asia       1987    40.8 13867957      852.           13868.
##  9 Afghanistan Asia       1992    41.7 16317921      649.           16318.
## 10 Afghanistan Asia       1997    41.8 22227415      635.           22227.
## # ℹ 1,694 more rows

summarize()

Q07. Take ‘gapminder’, filter for only observations in Africa, and summarize to find the:

  • median life expectancy,
  • median population, and
  • median GDP per capita.
gapminder %>%
  filter(continent == "Africa") %>%
  summarise(
    median_life = median(lifeExp),
    median_pop = median(pop),
    median_gdp = median(gdpPercap)
  )
## # A tibble: 1 × 3
##   median_life median_pop median_gdp
##         <dbl>      <dbl>      <dbl>
## 1        47.8    4579311      1192.

Q08. Take ‘gapminder’, add a new column (mutate) for the total gdp, and summarize to find the mean and median total gdp.

gapminder %>%
  mutate(total_gdp = sum(gdpPercap)) %>%
  summarise(
    median_gdp = median(total_gdp),
    mean_gdp = mean(total_gdp)
  )
## # A tibble: 1 × 2
##   median_gdp  mean_gdp
##        <dbl>     <dbl>
## 1  12294917. 12294917.

group_by()

Q08. Take ‘gapminder’, filter for only observations in Africa, and summarize to find the median life expectancy, population, and GDP per capita for each country.

gapminder %>%
  filter(continent == "Africa") %>%
  group_by(country) %>%
  summarise(
    median_life = median(lifeExp),
    median_pop = median(pop),
    median_gdp = median(gdpPercap)
  )
## # A tibble: 52 × 4
##    country                  median_life median_pop median_gdp
##    <fct>                          <dbl>      <dbl>      <dbl>
##  1 Algeria                         59.7  18593278.      4854.
##  2 Angola                          39.7   6589530.      3265.
##  3 Benin                           50.0   3404935       1139.
##  4 Botswana                        52.9    875910.      3883.
##  5 Burkina Faso                    47.1   6262085        831.
##  6 Burundi                         45.0   4207412.       455.
##  7 Cameroon                        49.6   8605348       1739.
##  8 Central African Republic        44.1   2322252       1013.
##  9 Chad                            48.4   4631689       1145.
## 10 Comoros                         51.9    326691       1229.
## # ℹ 42 more rows

Q09. Summarize ‘gapminder’ to find the mean GDP per capita for each continent, for each year (use 2 variables inside ‘group_by’).

gapminder %>%
  group_by(continent, year) %>%
  summarise(
    mean_gdp = mean(gdpPercap)
  )
## `summarise()` has grouped output by 'continent'. You can override using the
## `.groups` argument.
## # A tibble: 60 × 3
## # Groups:   continent [5]
##    continent  year mean_gdp
##    <fct>     <int>    <dbl>
##  1 Africa     1952    1253.
##  2 Africa     1957    1385.
##  3 Africa     1962    1598.
##  4 Africa     1967    2050.
##  5 Africa     1972    2340.
##  6 Africa     1977    2586.
##  7 Africa     1982    2482.
##  8 Africa     1987    2283.
##  9 Africa     1992    2282.
## 10 Africa     1997    2379.
## # ℹ 50 more rows

count()

Q10. How many observations are there from each country?

gapminder %>%
  count(country)
## # A tibble: 142 × 2
##    country         n
##    <fct>       <int>
##  1 Afghanistan    12
##  2 Albania        12
##  3 Algeria        12
##  4 Angola         12
##  5 Argentina      12
##  6 Australia      12
##  7 Austria        12
##  8 Bahrain        12
##  9 Bangladesh     12
## 10 Belgium        12
## # ℹ 132 more rows

arrange() and slice()

Q11. Take all the observations in Asia and sort them from the lowest life expectancies to the highest.

gapminder %>%
  filter(continent == "Asia") %>%
  arrange(lifeExp)
## # A tibble: 396 × 6
##    country     continent  year lifeExp      pop gdpPercap
##    <fct>       <fct>     <int>   <dbl>    <int>     <dbl>
##  1 Afghanistan Asia       1952    28.8  8425333      779.
##  2 Afghanistan Asia       1957    30.3  9240934      821.
##  3 Cambodia    Asia       1977    31.2  6978607      525.
##  4 Afghanistan Asia       1962    32.0 10267083      853.
##  5 Yemen, Rep. Asia       1952    32.5  4963829      782.
##  6 Yemen, Rep. Asia       1957    34.0  5498090      805.
##  7 Afghanistan Asia       1967    34.0 11537966      836.
##  8 Yemen, Rep. Asia       1962    35.2  6120081      826.
##  9 Afghanistan Asia       1972    36.1 13079460      740.
## 10 Nepal       Asia       1952    36.2  9182536      546.
## # ℹ 386 more rows

Q12. Take all the observations in Asia and sort them from the highest life expectancies to the lowest.

gapminder %>%
  filter(continent == "Asia") %>%
  arrange(desc(lifeExp))
## # A tibble: 396 × 6
##    country          continent  year lifeExp       pop gdpPercap
##    <fct>            <fct>     <int>   <dbl>     <int>     <dbl>
##  1 Japan            Asia       2007    82.6 127467972    31656.
##  2 Hong Kong, China Asia       2007    82.2   6980412    39725.
##  3 Japan            Asia       2002    82   127065841    28605.
##  4 Hong Kong, China Asia       2002    81.5   6762476    30209.
##  5 Israel           Asia       2007    80.7   6426679    25523.
##  6 Japan            Asia       1997    80.7 125956499    28817.
##  7 Hong Kong, China Asia       1997    80     6495918    28378.
##  8 Singapore        Asia       2007    80.0   4553009    47143.
##  9 Israel           Asia       2002    79.7   6029529    21906.
## 10 Japan            Asia       1992    79.4 124329269    26825.
## # ℹ 386 more rows

Q13. Select row 853 to row 864.

gapminder %>%
  slice(853:864)
## # A tibble: 12 × 6
##    country continent  year lifeExp     pop gdpPercap
##    <fct>   <fct>     <int>   <dbl>   <int>     <dbl>
##  1 Kuwait  Asia       1952    55.6  160000   108382.
##  2 Kuwait  Asia       1957    58.0  212846   113523.
##  3 Kuwait  Asia       1962    60.5  358266    95458.
##  4 Kuwait  Asia       1967    64.6  575003    80895.
##  5 Kuwait  Asia       1972    67.7  841934   109348.
##  6 Kuwait  Asia       1977    69.3 1140357    59265.
##  7 Kuwait  Asia       1982    71.3 1497494    31354.
##  8 Kuwait  Asia       1987    74.2 1891487    28118.
##  9 Kuwait  Asia       1992    75.2 1418095    34933.
## 10 Kuwait  Asia       1997    76.2 1765345    40301.
## 11 Kuwait  Asia       2002    76.9 2111561    35110.
## 12 Kuwait  Asia       2007    77.6 2505559    47307.

Q14. Take all the observations in Asia and return the five with the highest life expectancies (hint: use arrange()).

gapminder %>%
  filter(continent == "Asia") %>%
  arrange(desc(lifeExp)) %>%
  slice_head(n = 5)
## # A tibble: 5 × 6
##   country          continent  year lifeExp       pop gdpPercap
##   <fct>            <fct>     <int>   <dbl>     <int>     <dbl>
## 1 Japan            Asia       2007    82.6 127467972    31656.
## 2 Hong Kong, China Asia       2007    82.2   6980412    39725.
## 3 Japan            Asia       2002    82   127065841    28605.
## 4 Hong Kong, China Asia       2002    81.5   6762476    30209.
## 5 Israel           Asia       2007    80.7   6426679    25523.

Q15. Which (one) country has the highest life expectancy in each continent? (hint: use arrange() and also group_by()).

gapminder %>%
  group_by(continent) %>%
  arrange(desc(lifeExp)) %>%
  slice_head(n = 1)
## # A tibble: 5 × 6
## # Groups:   continent [5]
##   country   continent  year lifeExp       pop gdpPercap
##   <fct>     <fct>     <int>   <dbl>     <int>     <dbl>
## 1 Reunion   Africa     2007    76.4    798094     7670.
## 2 Canada    Americas   2007    80.7  33390141    36319.
## 3 Japan     Asia       2007    82.6 127467972    31656.
## 4 Iceland   Europe     2007    81.8    301931    36181.
## 5 Australia Oceania    2007    81.2  20434176    34435.