vignettes/get_drivers_coordinates.Rmd
get_drivers_coordinates.Rmd
This vignette runs through the workflow to obtain an object like
p_model_drivers
(from the rsofun
package) for
arbitrary locations in the globe, given by their coordinates (lon, lat).
These sites aren’t necessarily flux sites.
First, we compile all the necessary data (complete site information, meteorological data, climatic variables, etc.) from the raw data files. In this vignette, we use the paths corresponding to the GECO data archive (more information here). Then, we format the data to run the rsofun P-model implementation.
# Load libraries
library(dplyr)
library(tidyr)
library(tibble)
library(ingestr)
library(raster)
library(ggplot2)
if(!require(devtools)){install.packages(devtools)}
devtools::install_github("geco-bern/rsofun")
library(rsofun)
As an example, we will use a subset of site locations from Atkin et
al. 2017 (the original data is available via the TRY Plant Trait
Database). All the columns in the siteinfo_globresp
data.frame are necessary to retrieve the P-model drivers data: a site
name for reference sitename
, the coordinates of the site
location in degrees lon
and lat
, its elevation
in meters elv
, and the first and last year of observations
we want to collect year_start
and
year_end
.
# Load siteinfo data
load("../data/siteinfo_globresp.rda")
We use data from Stocker et al. 2021 to get the WHC for our sites. The data is in 0.05 degree resolution, which is appropriate for this use case.
# Define function to extract whc
extract_whc <- function(file, siteinfo){
siteinfo <- siteinfo[, c("lon", "lat")] |>
unique() # Remove repeated coordinates
rasta <- raster::brick(file)
raster::extract(
rasta, # raster object
sp::SpatialPoints(siteinfo[, c("lon", "lat")]), # points
sp = TRUE # add raster values to siteinfo
) |>
tibble::as_tibble() |>
dplyr::rename(whc = layer)
}
# Get WHC data
path_whc <- "/data/archive/whc_stocker_2021/data"
df_whc <- extract_whc(file = paste0(path_whc, "/cwdx80.nc"),
siteinfo = siteinfo_globresp)
# Merge whc with site information
siteinfo <- dplyr::left_join(
siteinfo_globresp,
df_whc,
by = c("lon", "lat")
)
# Fill gaps by median value (here there are no NAs)
siteinfo$whc[is.na(siteinfo$whc)] <- median(siteinfo$whc,
na.rm = TRUE)
Based on the siteinfo
object, we collect all climate
forcing data.
The following meteorological variables are obtained for the P-model
forcing from the WATCH-WFDEI data: -
temp
: Daily temperature - prec
: Daily
precipitation - ppfd
: Photosynthetic photon flux density -
vpd
: Vapor pressure deficit - patm
:
Atmospheric pressure
Loading this data can take a while, up to hours if you need several years of data for many sites.
# Get WATCH data
path_watch <- "/data/archive/wfdei_weedon_2014/data"
path_worldclim <- "/data/archive/worldclim_fick_2017/data" # for debiasing
df_watch <- ingestr::ingest(
siteinfo = siteinfo,
source = "watch_wfdei",
getvars = c("temp", "prec", "ppfd", "vpd", "patm"),
dir = path_watch,
settings = list(
correct_bias = "worldclim", # 0.5 deg
dir_bias = path_worldclim
)
) |>
suppressWarnings() |> suppressMessages()
# Variables tmin and tmax missing but not currently in use
# Memory intensive, purge memory
gc()
Now, let’s complete the forcing with cloud cover ccov
values from CRU
data.
Let’s put together the previous data, into a single data.frame:
df_meteo <- df_watch |>
tidyr::unnest(data) |>
left_join(
df_cru |>
tidyr::unnest(data),
by = c("sitename", "date")
) |>
dplyr::ungroup() |> # keep unnested
dplyr::mutate(tmin = temp, # placeholders for variables tmin and tmax
tmax = temp) |> # (not used in P-model runs)
dplyr::mutate(doy = lubridate::yday(date)) # add day of year
head(df_meteo)
The following chunk downloads the CO2 yearly average data from the Mauna Loa observatory and appends it to the meteorological drivers from above.
# Download CO2 data
df_co2 <- read.csv(
url("https://gml.noaa.gov/webdata/ccgg/trends/co2/co2_annmean_mlo.csv"),
skip = 59) |>
dplyr::select(year, mean) |>
dplyr::rename(co2 = mean)
# Merge with meteo drivers
df_meteo <- df_meteo |>
dplyr::mutate(year = lubridate::year(date)) |>
dplyr::left_join(df_co2, by = "year") # keep unnested
Set fAPAR by default to 1 for all observations, which is the same as
what ingest(siiteinfo_globresp, source = "fapar_unity")
would do. This makes sense for leaf-level simulations.
# Add fapar column with value 1
df_meteo <- df_meteo |>
dplyr::mutate(fapar = 1)
Up to here, we have merged all of the forcing data into
df_meteo
. Let’s put it in a nested format, such that there
is a single data.frame per site:
# Nest forcing
df_meteo <- df_meteo |>
dplyr::group_by(sitename) |>
tidyr::nest() |>
dplyr::rename(forcing = data) |>
dplyr::ungroup()
# Add site information
df_siteinfo <- siteinfo |>
dplyr::group_by(sitename) |>
tidyr::nest() |>
dplyr::rename(site_info = data) |>
dplyr::ungroup()
We will use default simulation and soil texture parameters.
# Define default model parameters, soil data, etc
params_siml <- list(
spinup = TRUE, # to bring soil moisture to steady state
spinupyears = 10, # 10 is enough for soil moisture.
recycle = 1, # number of years recycled during spinup
soilmstress = FALSE, # soil moisture stress function is included
tempstress = FALSE, # temperature stress function is included
calc_aet_fapar_vpd = FALSE, # set to FALSE - should be dropped again
in_ppfd = TRUE, # if available from forcing files, set to TRUE
in_netrad = FALSE, # if available from forcing files, set to TRUE
outdt = 1,
ltre = FALSE,
ltne = FALSE,
ltrd = FALSE,
ltnd = FALSE,
lgr3 = TRUE,
lgn3 = FALSE,
lgr4 = FALSE
)
df_soiltexture <- bind_rows(
top = tibble(
layer = "top",
fsand = 0.4,
fclay = 0.3,
forg = 0.1,
fgravel = 0.1),
bottom = tibble(
layer = "bottom",
fsand = 0.4,
fclay = 0.3,
forg = 0.1,
fgravel = 0.1)
)
Finally, we can compile all the data together into a
p_model_drivers
object. Note that this object must be an
ungrouped tibble object.
We can now run the P-model with the rsofun
implementation.
# Run P-model with parameters from previous optimizations
params_modl <- list(
kphio = 0.09423773,
soilm_par_a = 0.33349283,
soilm_par_b = 1.45602286,
tau_acclim_tempstress = 10,
par_shape_tempstress = 0.0
)
# run the model for these parameters
output <- rsofun::runread_pmodel_f(
drivers = drivers,
par = params_modl
)
And take a look at the results:
ggplot() +
geom_line(
data = output |> tidyr::unnest(),
aes(
x = date,
y = vcmax25
),
alpha = 0.8
) +
facet_grid(sitename ~ .) +
labs(
x = "Date",
y = "Vcmax25"
)
ggplot() +
geom_line(
data = output |> tidyr::unnest(),
aes(
x = date,
y = gpp
),
alpha = 0.8
) +
facet_grid(sitename ~ .) +
labs(
x = "Date",
y = "GPP"
)
To run the P-model for leaf trait calibration, we may want to have the average daily forcing over a span of years. This is because leaf traits are measured once and assumed constant over a span of time.
# Define function to aggregate forcing over day of year
aggregate_forcing_doy <- function(forcing){
forcing |>
dplyr::group_by(doy) |>
summarise(date = first(date), # 2001 as symbolic date
temp = mean(temp, na.rm = TRUE),
rain = mean(rain, na.rm = TRUE),
vpd = mean(vpd, na.rm = TRUE),
ppfd = mean(ppfd, na.rm = TRUE),
snow = mean(snow, na.rm = TRUE),
co2 = mean(co2, na.rm = TRUE),
fapar = mean(fapar, na.rm = TRUE),
patm = mean(patm, na.rm = TRUE),
tmin = mean(tmin, na.rm = TRUE),
tmax = mean(tmax, na.rm = TRUE),
ccov = mean(ccov, na.rm = TRUE)
) |> # already ungrouped
dplyr::slice(1:365) # ignore leap years
}
# Compute aggregated drivers
drivers_aggregated <- drivers |>
dplyr::mutate(forcing =
lapply(drivers$forcing, aggregate_forcing_doy))
Finally, let’s run the P-model on the aggregated data and visualise the results.
# run the model for these parameters
output_aggregated <- rsofun::runread_pmodel_f(
drivers = drivers_aggregated,
par = params_modl
)
ggplot() +
geom_line(
data = output_aggregated |> tidyr::unnest(),
aes(
x = date,
y = vcmax25
),
alpha = 0.8
) +
facet_grid(sitename ~ .) +
labs(
x = "Date",
y = "Vcmax25"
)
ggplot() +
geom_line(
data = output_aggregated |> tidyr::unnest(),
aes(
x = date,
y = gpp
),
alpha = 0.8
) +
facet_grid(sitename ~ .) +
labs(
x = "Date",
y = "GPP"
)
```