# Set variables
csv_url <- 'https://oceanview.pfeg.noaa.gov/erddap/tabledap/cciea_AC.csv'
dir_data <- 'data'
# Create derived variables
csv <- file.path(dir_data, basename(csv_url))
# Create directory
dir.create(dir_data)
## Warning in dir.create(dir_data): 'data' already exists
# Download data
download.file(csv_url, csv)
read.csv()
# Read data
d <- read.csv(csv)
# Show
# head(d)
The second line contains the units; we want to skip this.
# Read data, skipping first two lines, no header
d <- read.csv(csv, skip=2, header=F)
# Update column names
names(d) <- names(read.csv(csv))
#head(d)
We can render these data frames in a nicer way.
DT::datatable()
# Show table
DT::datatable(d)
dplyr
library(DT)
library(dplyr)
d <- d %>%
# Transform to tibble
tibble() %>%
# Change time from str to datetime
mutate(
time = as.Date(substr(time, 1, 10))
) %>%
# Select columns that start with total_fisheries_revenue
select(
time,
starts_with('total_fisheries_revenue')
) %>%
# Filter for rows in the last 40 years
filter(
time >= as.Date('1981-01-01')
)
datatable(d)
tidyr
library(tidyr)
d <- d %>%
# Change from wide to long format
pivot_longer(-time)
datatable(d)
dplyr
library(stringr)
# Get region column
d <- d %>%
# Replace total_fisheries_revenue in name column with ''
mutate(
region = str_replace(name, 'total_fisheries_revenue_', '')
) %>%
# Select time, region, and value columns (don't need name anymore)
select(
time,
region,
value
)
# Find average revenue by region
d_sum <- d %>%
# Group by region
group_by(region) %>%
# Take the mean
summarize(
avg_revenue = mean(value)
)
# Show data table with the avg_revenue column formatted as currency
datatable(d_sum) %>%
formatCurrency('avg_revenue')
purrr
on a nest
’ed tibble
library(purrr)
# Create nest
n <- d %>%
# Create grouping variable (region)
group_by(region) %>%
# Create nest of paired date, revenue points for each region
nest(
data = c(time, value)
)
n <- n %>%
# Create new columns lm and trend containing a linear model (revenue ~ date) and its slope
mutate(
lm = map(data, function(d){
lm(value ~ time, d)
}),
trend = map_dbl(lm, function(m){
coef(summary(m))['time', 'Estimate']
})
)
# Select just the region and the trend (slope) and display
n %>%
select(region, trend) %>%
datatable()