# load libraries
library(here)
library(readr)
library(DT)
# define variables
url_ac <- 'https://oceanview.pfeg.noaa.gov/erddap/tabledap/cciea_AC.csv'
csv_ac <- here('data/cciea_AC_url_download.csv')
# Download data
if (!file.exists(csv_ac)) # No {}?
download.file(url_ac, csv_ac)
# Read data
d_ac <- read_csv(csv_ac, col_names=F, skip=2)
##
## ── Column specification ────────────────────────────────────────────────────────────────────────────────
## cols(
## .default = col_double(),
## X1 = col_datetime(format = "")
## )
## ℹ Use `spec()` for the full column specifications.
names(d_ac) <- names(read_csv(csv_ac))
##
## ── Column specification ────────────────────────────────────────────────────────────────────────────────
## cols(
## .default = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
# Show data
datatable(d_ac)
ggplot2
+ geom_line()
library(dplyr)
library(ggplot2)
# Subset data
d_coast <- d_ac %>%
# Select columns
select(time, total_fisheries_revenue_coastwide) %>%
# Filter rows
filter(!is.na(total_fisheries_revenue_coastwide))
datatable(d_coast)
# Create ggplot object
p_coast <- d_coast %>%
# Setup aesthetics
ggplot(aes(x = time, y = total_fisheries_revenue_coastwide)) +
# Add geometry
geom_line()
# Show plot
p_coast
+ geom_smooth()
p_coast +
geom_smooth(method = 'lm')
## `geom_smooth()` using formula 'y ~ x'
+ geom_histogram()
Note that geom_histogram()
plots the frequency of values for a single variable.
d_coast %>%
# Setup aesthetics
ggplot(aes(x = total_fisheries_revenue_coastwide)) +
# Add geometry
geom_histogram(bins = 10)
aes(color = region)
library(stringr)
library(tidyr)
# Create data aggregated by region
d_rgn <- d_ac %>%
# Select columns
select(
time,
starts_with('total_fisheries_revenue')) %>%
# Exclude coastwide data
select(-total_fisheries_revenue_coastwide) %>%
# Pivot longer
pivot_longer(-time) %>%
# Create region column
mutate(
region = name %>%
str_replace('total_fisheries_revenue_', '') %>%
str_to_upper()) %>%
# Filter missing values
filter(!is.na(value)) %>%
# Select columns
select(
time,
region,
value
)
# Create plot object
p_rgn <- d_rgn %>%
# Setup aesthetics
ggplot(aes(
x = time,
y = value,
group = region,
color = region)) +
# Add geometry
geom_line()
# Show
p_rgn
+ labs()
p_rgn <- p_rgn +
labs(
title = 'Fisheries Revenue',
x = 'Year',
y = 'Millions $ (year 2015)',
color = 'Region'
)
p_rgn
facet_wrap()
p_rgn +
facet_wrap(vars(region))
+ geom_col()
Note that geom_col()
plots both an x and y variable (e.g. revenue by region), while geom_bar()
plots only an x variable (e.g. the number of revenue values for each region).
library(glue)
library(lubridate)
# Get most recent year in the data
yr_max <- year(max(d_rgn$time))
# Plot revenue by region for most recent year
d_rgn %>%
# Filter by most recent time
filter(year(time) == yr_max) %>%
# Setup aesthetics
ggplot(aes(x = region, y = value, fill = region)) +
# Add geometry
geom_col() +
# Add labels
labs(
title = glue('Fisheries Revenue for {yr_max}'),
x = 'Region',
y = 'Millions $ (year 2015)',
fill = 'Region'
)
Contrast with geom_bar()
. Also note that color
only changes the outline color, while fill
changes the color of the entire bar.
d_rgn %>%
ggplot(aes(x=region, color=region)) + geom_bar()
+ geom_boxplot()
d_rgn %>%
ggplot(aes(x = region, y = value, fill = region)) +
geom_boxplot() +
labs(
title = 'Fisheries Revenue Variability',
x = 'Region',
y = 'Millions $ (year 2015)') +
# Do not include a legend since it's redundant with the x axis
theme (
legend.position = 'none'
)
+ geom_violin()
p_rgn_violin <- d_rgn %>%
ggplot(aes(x = region, y = value, fill = region)) +
geom_violin() +
labs(
title = 'Fisheries Revenue Variability, Violin Plot',
x = 'Region',
y = 'Millions $ (year 2015)') +
theme(
legend.position = 'none'
)
p_rgn_violin
theme()
p_rgn_violin +
theme_classic()
plotly
or dygraphs
plotly::ggplotly()
plotly::ggplotly(p_rgn)
dygraphs::dygraph()
This package is written more specifically for time series data. It requires wide format data.
library(dygraphs)
d_rgn_wide <- d_rgn %>%
mutate(
Year = year(time)) %>%
select(Year, region, value) %>%
pivot_wider(
names_from = region,
values_from = value)
datatable(d_rgn_wide)
d_rgn_wide %>%
dygraph() %>%
dyRangeSelector()