mirror of
https://github.com/appelmar/gdalcubes.git
synced 2025-02-23 07:54:15 +01:00
297 lines
12 KiB
R
297 lines
12 KiB
R
#' Create an image collection from a STAC feature collection
|
|
#'
|
|
#' This function creates an image collection from a STAC API collection response. It does not
|
|
#' need to read any image data. Additionally, bands can be filtered and asset links can be transformed to make them
|
|
#' readable for GDAL.
|
|
#'
|
|
#' @param s STAC feature collection
|
|
#' @param asset_names character vector with names of assets (e.g., bands) to be used, other assets will be ignored. By default (NULL), all asset names with "eo:bands" attributes will be used
|
|
#' @param asset_regex length 1 character defining a regular expression asset names must match to be considered
|
|
#' @param url_fun optional function to modify URLs of assets, e.g, to add /vsicurl/ to URLS (the default)
|
|
#' @param out_file optional name of the output SQLite database file, defaults to a temporary file
|
|
#' @param property_filter optional function to filter STAC items (images) by their properties; see Details
|
|
#' @param skip_image_metadata logical, if TRUE per-image metadata (STAC item properties) will not be added to the image collection
|
|
#' @param srs character spatial reference system of images used either for images without corresponding STAC property ony or for all images
|
|
#' @param srs_overwrite logical, if FALSE, use srs only for images with unknown srs (missing STAC metadata)
|
|
#' @param duration character, if images represent time intervals, use either the"start" or "center" of time intervals
|
|
#' @note Currently, bbox results are expected to be WGS84 coordinates, even if bbox-crs is given in the STAC response.
|
|
#' @details
|
|
#'
|
|
#' The property_filter argument can be used to filter images by metadata such as cloud coverage.
|
|
#' The functions receives all properties of a STAC item (image) as input list and is expected to produce a single logical value,
|
|
#' where an image will be ignored if the function returns FALSE.
|
|
#'
|
|
#' Some STAC API endpoints may return items with duplicte IDs (image names), pointing to
|
|
#' identical URLs. Such items are only added once during creation of the image collection.
|
|
#'
|
|
#' @export
|
|
stac_image_collection <- function(s, out_file = tempfile(fileext = ".db"),
|
|
asset_names = NULL, asset_regex = NULL,
|
|
url_fun = .default_url_fun,
|
|
property_filter = NULL, skip_image_metadata = FALSE,
|
|
srs = NULL, srs_overwrite = FALSE, duration = c("center", "start")) {
|
|
SUBBAND_SPLIT_CHAR = ":"
|
|
if ("STACItemCollection" %in% class(s)) {
|
|
s = s$features
|
|
}
|
|
if (!is.list(s)) {
|
|
stop ("Input must be either a list or a STACItemCollection")
|
|
}
|
|
if (length(s) == 0) {
|
|
stop ("Input does not include any STAC items")
|
|
}
|
|
|
|
if (file.exists(out_file)) {
|
|
stop ("output file already exists")
|
|
}
|
|
|
|
|
|
# add band metadata if available
|
|
bands = NULL
|
|
asset_names_exist = rep(FALSE, length(asset_names))
|
|
for (i in 1:length(s)) {
|
|
for (j in 1:length(s[[i]]$assets)) {
|
|
asset_name = names(s[[i]]$assets)[j]
|
|
|
|
if (!is.null(asset_names)) {
|
|
if (asset_name %in% asset_names) {
|
|
asset_names_exist[which(asset_names == asset_name)] = TRUE
|
|
}
|
|
else {
|
|
next
|
|
}
|
|
}
|
|
if (!is.null(asset_regex)) {
|
|
if (!grepl(pattern = asset_regex, asset_name)) next
|
|
}
|
|
|
|
# assumption: use only assets with "eo:bands" metadata, unless its is part of asset_names
|
|
if ("eo:bands" %in% names(s[[i]]$assets[[j]])) {
|
|
nb = length(s[[i]]$assets[[j]][["eo:bands"]])
|
|
if (nb == 1) {
|
|
bands = union(bands, asset_name)
|
|
}
|
|
else {
|
|
subnames = sapply(s[[i]]$assets[[j]][["eo:bands"]], function(x){x$name})
|
|
joined_names = paste0(asset_name, SUBBAND_SPLIT_CHAR, subnames)
|
|
bands = union(bands, joined_names)
|
|
}
|
|
next
|
|
}
|
|
|
|
# if no eo:bands metadata found, consider asset if type contains "image/*" and role equals "data"
|
|
sroles = s[[i]]$assets[[j]]$roles
|
|
stype = s[[i]]$assets[[j]]$type
|
|
|
|
if (is.character(sroles) && is.character(stype)) {
|
|
if (grepl("image/", stype, fixed=TRUE) && "data" %in% sroles) {
|
|
bands = union(bands, asset_name)
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
# compare asset_names to found assets
|
|
if (!is.null(asset_names)) {
|
|
a = which(!(asset_names %in% bands))
|
|
if (length(a) > 0) {
|
|
for (i in 1:length(a)) {
|
|
if (asset_names_exist[a[i]]) {
|
|
bands = union(bands, asset_names[a[i]])
|
|
warning(paste0("STAC asset with name '", asset_names[a[i]] ,"' does not include eo:bands metadata and will be considered as a single band source"))
|
|
}
|
|
else {
|
|
warning(paste0("STAC asset with name '", asset_names[a[i]] ,"' does not exist and will be ignored"))
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
bands_df = data.frame(id = 1:length(bands), name = bands, type = "", offset = NA,
|
|
scale = NA, unit = "", nodata = "", stringsAsFactors = FALSE)
|
|
|
|
images_df = data.frame(id = integer(), name = character(), left = numeric(),
|
|
top = numeric(), bottom = numeric(), right = numeric(),
|
|
datetime = character(), proj = character())
|
|
gdalrefs_df = data.frame(image_id = integer(), band_id = integer(),
|
|
descriptor = character(),band_num = integer())
|
|
image_md_df = data.frame(image_id = integer(),key = character(), value = character())
|
|
|
|
|
|
for (i in 1:length(s)) {
|
|
|
|
tryCatch(
|
|
expr = {
|
|
images_df_temp = data.frame(id = integer(), name = character(), left = numeric(),
|
|
top = numeric(), bottom = numeric(), right = numeric(),
|
|
datetime = character(), proj = character())
|
|
gdalrefs_df_temp = data.frame(image_id = integer(), band_id = integer(),
|
|
descriptor = character(),band_num = integer())
|
|
image_md_df_temp = data.frame(image_id = integer(),key = character(), value = character())
|
|
if (!is.null(property_filter)) {
|
|
if (!property_filter(s[[i]]$properties)) {
|
|
next
|
|
}
|
|
}
|
|
|
|
# bands
|
|
img_has_bands = FALSE
|
|
for (j in 1:nrow(bands_df)) {
|
|
if (bands_df$name[j] %in% names(s[[i]]$assets)) {
|
|
gdalrefs_df_temp = rbind(gdalrefs_df_temp, data.frame(image_id = i, band_id = j,
|
|
descriptor = url_fun(s[[i]]$assets[[bands_df$name[j]]]$href),
|
|
band_num = 1, stringsAsFactors = FALSE))
|
|
img_has_bands = TRUE
|
|
}
|
|
else {
|
|
spl = strsplit(bands_df$name[j], SUBBAND_SPLIT_CHAR)[[1]]
|
|
if (length(spl) == 2) {
|
|
asset_name = spl[1]
|
|
band_num = which(which(startsWith(bands_df$name, paste0(asset_name, SUBBAND_SPLIT_CHAR))) == j)
|
|
|
|
if (length(band_num) == 1) {
|
|
gdalrefs_df_temp = rbind(gdalrefs_df_temp, data.frame(image_id = i, band_id = j,
|
|
descriptor = url_fun(s[[i]]$assets[[asset_name]]$href),
|
|
band_num = band_num, stringsAsFactors = FALSE))
|
|
img_has_bands = TRUE
|
|
}
|
|
}
|
|
}
|
|
}
|
|
if (img_has_bands) {
|
|
# fixes #60
|
|
if (!is.null(srs) && srs_overwrite) {
|
|
proj = srs
|
|
}
|
|
else {
|
|
proj = s[[i]]$properties$"proj:epsg"
|
|
if (!is.null(proj)) {
|
|
if (!startsWith(toupper(proj), "EPSG:")) {
|
|
proj = paste0("EPSG:", proj)
|
|
}
|
|
}
|
|
if (is.null(proj)) {
|
|
proj = s[[i]]$properties$"proj:wkt2"
|
|
}
|
|
if (is.null(proj)) {
|
|
proj = s[[i]]$properties$"proj:projjson"
|
|
}
|
|
if (is.null(proj)) {
|
|
if (!is.null(srs)) {
|
|
proj = srs
|
|
}
|
|
else {
|
|
stop(paste0("No projection info found in STAC item for image ", s[[i]]$id))
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
temp_datetime = NULL
|
|
if (!is.null(s[[i]]$properties$datetime)) {
|
|
temp_datetime = s[[i]]$properties$datetime
|
|
}
|
|
else if (!is.null(s[[i]]$properties$start_datetime)) {
|
|
temp_starttime = s[[i]]$properties$start_datetime
|
|
temp_endtime = s[[i]]$properties$end_datetime
|
|
|
|
m = duration[1]
|
|
if (m == "center") {
|
|
if (!requireNamespace("lubridate", quietly = TRUE)) {
|
|
stop("package lubridate required; please install first")
|
|
}
|
|
a = lubridate::ymd_hms(temp_starttime)
|
|
b = lubridate::ymd_hms(temp_endtime)
|
|
temp_datetime = format(a + (b-a)/2, "%Y-%m-%dT%H:%M:%S")
|
|
}
|
|
else {
|
|
temp_datetime = temp_starttime
|
|
}
|
|
}
|
|
else {
|
|
stop(paste0("No datetime / start_datetime property found in STAC item for image ", s[[i]]$id))
|
|
}
|
|
|
|
# BBOX
|
|
bbox = s[[i]]$bbox
|
|
if (is.list(bbox)) {
|
|
bbox = unlist(bbox)
|
|
}
|
|
|
|
# TO CHECK IN STAC SPEC, does BBOX always exist? Is it always WGS84?
|
|
# TODO: transform, if bbox-crs is given
|
|
images_df_temp = data.frame(id = i, name = s[[i]]$id, left = bbox[1], top = bbox[4], bottom = bbox[2],
|
|
right = bbox[3], datetime = temp_datetime, proj = proj, stringsAsFactors = FALSE)
|
|
|
|
# Duplicate image name check
|
|
if (s[[i]]$id %in% images_df$name) {
|
|
if (.pkgenv$debug) {
|
|
message(paste0("Skipping STAC item ", s[[i]]$id) , " due to duplicate id (image with identical name already exists)")
|
|
}
|
|
next
|
|
}
|
|
|
|
# image metadata
|
|
if (!skip_image_metadata) {
|
|
image_md_df_temp = data.frame(image_id = rep(i, length(s[[i]]$properties)),
|
|
key = names(s[[i]]$properties),
|
|
value = as.character(s[[i]]$properties))
|
|
}
|
|
|
|
# add to result tables
|
|
images_df = rbind(images_df, images_df_temp)
|
|
gdalrefs_df = rbind(gdalrefs_df, gdalrefs_df_temp)
|
|
if (!skip_image_metadata) {
|
|
image_md_df = rbind(image_md_df, image_md_df_temp)
|
|
}
|
|
|
|
}
|
|
},
|
|
error = function(e){
|
|
warning(paste0("Skipping STAC item ", i, " due to the following error: ", e))
|
|
})
|
|
}
|
|
|
|
|
|
if (nrow(images_df) == 0) {
|
|
stop("Collection does not contain any images")
|
|
}
|
|
|
|
|
|
if (skip_image_metadata) {
|
|
gc_create_stac_collection(bands_df, images_df, gdalrefs_df, path.expand(out_file), data.frame())
|
|
}
|
|
else {
|
|
gc_create_stac_collection(bands_df, images_df, gdalrefs_df, path.expand(out_file), image_md_df)
|
|
}
|
|
return(image_collection(out_file))
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
#' Default function to convert hrefs of STAC response aassets to GDAL dataset identifiers including VSI prefixes
|
|
#' @param url a single URL
|
|
#' @examples
|
|
#' .default_url_fun("s3://bucket/object/image.tif")
|
|
#' @noRd
|
|
.default_url_fun <- function(url) {
|
|
|
|
if (startsWith(url, "s3://")) {
|
|
return(paste0("/vsis3/", substr(url, 6, nchar(url))))
|
|
}
|
|
else if (startsWith(url, "gs://")) {
|
|
return(paste0("/vsigs/", substr(url, 6, nchar(url))))
|
|
}
|
|
else if (startsWith(url, "http")) {
|
|
return(paste0("/vsicurl/", url))
|
|
}
|
|
|
|
# default: try vsicurl
|
|
return(paste0("/vsicurl/", url))
|
|
}
|
|
|
|
|