diff --git a/DESCRIPTION b/DESCRIPTION index a89c366..44ad798 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -9,6 +9,7 @@ Imports: httr, digest, readr, + purrr, magrittr Suggests: maps, diff --git a/NAMESPACE b/NAMESPACE index 235b279..6e26815 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -5,6 +5,8 @@ S3method("[",sensebox) S3method(osem_measurements,bbox) S3method(osem_measurements,default) S3method(osem_measurements,sensebox) +S3method(osem_measurements_archive,default) +S3method(osem_measurements_archive,sensebox) S3method(osem_phenomena,sensebox) S3method(plot,osem_measurements) S3method(plot,sensebox) @@ -19,6 +21,7 @@ export(osem_clear_cache) export(osem_counts) export(osem_endpoint) export(osem_measurements) +export(osem_measurements_archive) export(osem_phenomena) importFrom(graphics,legend) importFrom(graphics,par) diff --git a/R/api.R b/R/api.R index 7f10223..85fe61f 100644 --- a/R/api.R +++ b/R/api.R @@ -34,12 +34,10 @@ get_box_ = function (boxId, endpoint, ...) { parse_senseboxdata() } -get_measurements_ = function (..., endpoint) { - result = osem_get_resource(endpoint, c('boxes', 'data'), ..., type = 'text') - +parse_measurement_csv = function (resText) { # parse the CSV response manually & mute readr suppressWarnings({ - result = readr::read_csv(result, col_types = readr::cols( + result = readr::read_csv(resText, col_types = readr::cols( # factor as default would raise issues with concatenation of multiple requests .default = readr::col_character(), createdAt = readr::col_datetime(), @@ -53,6 +51,11 @@ get_measurements_ = function (..., endpoint) { osem_as_measurements(result) } +get_measurements_ = function (..., endpoint) { + osem_get_resource(endpoint, c('boxes', 'data'), ..., type = 'text') %>% + parse_measurement_csv +} + get_stats_ = function (endpoint, cache) { result = osem_get_resource(endpoint, path = c('stats'), progress = FALSE, cache = cache) names(result) = c('boxes', 'measurements', 'measurements_per_minute') diff --git a/R/archive.R b/R/archive.R new file mode 100644 index 0000000..8bfd17b --- /dev/null +++ b/R/archive.R @@ -0,0 +1,135 @@ +# client for archive.opensensemap.org +# in this archive, CSV files for measurements of each sensor per day is provided. + +#' Returns the default endpoint for the archive *download* +#' While the front end domain is archive.opensensemap.org, file downloads +#' are provided via sciebo. +osem_archive_endpoint = function () { + 'https://uni-muenster.sciebo.de/index.php/s/HyTbguBP4EkqBcp/download?path=/data' +} + +#' Fetch day-wise measurements for a single box from the openSenseMap archive. +#' +#' This function is significantly faster than \code{\link{osem_measurements}} for large +#' time-frames, as daily CSV dumps for each sensor from +#' \href{http://archive.opensensemap.org}{archive.opensensemap.org} are used. +#' Note that the latest data available is from the previous day. +#' +#' By default, data for all sensors of a box is fetched, but you can select a +#' subset with a \code{\link[dplyr]{dplyr}}-style NSE filter expression. +#' +#' The function will warn when no data is available in the selected period, +#' but continue the remaining download. +#' +#' @param x A `sensebox data.frame` of a single box, as retrieved via \code{\link{osem_box}}, +#' to download measurements for. +#' @param fromDate Start date for measurement download. +#' @param toDate End date for measurement download (inclusive). +#' @param sensorFilter A NSE formula matching to \code{x$sensors}, selecting a subset of sensors. +#' @param progress Whether to print download progress information, defaults to \code{TRUE}. +#' @return A \code{tbl_df} Containing observations of all selected sensors for each time stamp. +#' +#' @seealso \href{https://archive.opensensemap.org}{openSenseMap archive} +#' @seealso \code{\link{osem_measurements}} +#' @seealso \code{\link{osem_box}} +#' +#' @export +osem_measurements_archive = function (x, ...) UseMethod('osem_measurements_archive') + +#' @export +osem_measurements_archive.default = function (x, ...) { + # NOTE: to implement for a different class: + # in order to call `archive_fetch_measurements()`, `box` must be a dataframe + # with a single row and the columns `X_id` and `name` + stop(paste('not implemented for class', toString(class(x)))) +} + +#' @describeIn osem_measurements_archive Get daywise measurements for one or +#' more sensors of a single box +#' @export +#' @examples +#' # fetch measurements for a single day +#' box = osem_box('593bcd656ccf3b0011791f5a') +#' m = osem_measurements_archive(box, as.POSIXlt('2018-09-13')) +#' +#' \donttest{ +#' # fetch measurements for a date range and selected sensors +#' sensors = ~ phenomenon %in% c('Temperatur', 'Beleuchtungsstärke') +#' m = osem_measurements_archive(box, as.POSIXlt('2018-09-01'), as.POSIXlt('2018-09-30'), sensorFilter = sensors) +#' } +osem_measurements_archive.sensebox = function (x, fromDate, toDate = fromDate, sensorFilter = ~ T, progress = T) { + if (nrow(x) != 1) + stop('this function only works for exactly one senseBox!') + + # filter sensors using NSE, for example: `~ phenomenon == 'Temperatur'` + sensors = x$sensors[[1]] %>% + dplyr::filter(lazyeval::f_eval(sensorFilter, .)) + + # fetch each sensor separately + dfs = by(sensors, 1:nrow(sensors), function (sensor) { + df = archive_fetch_measurements(x, sensor$id, fromDate, toDate, progress) %>% + dplyr::select(createdAt, value) %>% + #dplyr::mutate(unit = sensor$unit, sensor = sensor$sensor) %>% # inject sensor metadata + dplyr::rename_at(., 'value', function(v) sensor$phenomenon) + }) + + # merge all data.frames by timestamp + dfs %>% purrr::reduce(dplyr::full_join, 'createdAt') +} + +#' fetch measurements from archive from a single box, and a single sensor +archive_fetch_measurements = function (box, sensor, fromDate, toDate, progress) { + dates = list() + from = fromDate + while (from <= toDate) { + dates = append(dates, list(from)) + from = from + as.difftime(1, units = 'days') + } + + http_handle = httr::handle(osem_archive_endpoint()) # reuse the http connection for speed! + progress = if (progress && !is_non_interactive()) httr::progress() else NULL + + measurements = lapply(dates, function(date) { + url = build_archive_url(date, box, sensor) + res = httr::GET(url, progress, handle = http_handle) + + if (httr::http_error(res)) { + warning(paste( + httr::status_code(res), + 'on day', format.Date(date, '%F'), + 'for sensor', sensor + )) + + if (httr::status_code(res) == 404) + return(data.frame(createdAt = character(), value = character())) + } + + measurements = httr::content(res, type = 'text', encoding = 'UTF-8') %>% + parse_measurement_csv + }) + + measurements %>% dplyr::bind_rows() +} + +#' returns URL to fetch measurements from a sensor for a specific date, +#' based on `osem_archive_endpoint()` +build_archive_url = function (date, box, sensor) { + sensorId = sensor + d = format.Date(date, '%F') + format = 'csv' + + paste( + osem_archive_endpoint(), + d, + osem_box_to_archivename(box), + paste(paste(sensorId, d, sep = '-'), format, sep = '.'), + sep = '/' + ) +} + +#' replace chars in box name according to archive script: +#' https://github.com/sensebox/osem-archiver/blob/612e14b/helpers.sh#L66 +osem_box_to_archivename = function (box) { + name = gsub('[^A-Za-z0-9._-]', '_', box$name) + paste(box$X_id, name, sep='-') +} diff --git a/R/box.R b/R/box.R index 2e73a2f..a140b02 100644 --- a/R/box.R +++ b/R/box.R @@ -155,8 +155,18 @@ parse_senseboxdata = function (boxdata) { if (!is.null(thebox$updatedAt)) thebox$updatedAt = as.POSIXct(strptime(thebox$updatedAt, format = '%FT%T', tz = 'GMT')) + # create a dataframe of sensors + thebox$sensors = sensors %>% + lapply(as.data.frame, stringsAsFactors = F) %>% + dplyr::bind_rows(.) %>% + dplyr::select(phenomenon = title, id = X_id, unit, sensor = sensorType) %>% + list + # extract metadata from sensors - thebox$phenomena = lapply(sensors, function(s) s$title) %>% unlist %>% list + thebox$phenomena = sensors %>% + setNames(lapply(., function (s) s$`_id`)) %>% + lapply(function(s) s$title) %>% + unlist %>% list # convert to vector # FIXME: if one sensor has NA, max() returns bullshit get_last_measurement = function(s) { diff --git a/R/opensensmapr.R b/R/opensensmapr.R index fe2bcd3..0273df2 100644 --- a/R/opensensmapr.R +++ b/R/opensensmapr.R @@ -37,16 +37,27 @@ #' } #' #' @section Retrieving measurements: -#' Measurements can be retrieved through \code{\link{osem_measurements}} for a -#' given phenomenon only. A subset of measurements may be selected by -#' +#' There are two ways to retrieve measurements: #' \itemize{ -#' \item a list of senseBoxes, previously retrieved through -#' \code{\link{osem_box}} or \code{\link{osem_boxes}}. -#' \item a geographic bounding box, which can be generated with the -#' \code{\link[sf]{sf}} package. -#' \item a time frame -#' \item a exposure type of the given box +#' \item \code{\link{osem_measurements_archive}}: +#' Downloads measurements for a \emph{single box} from the openSenseMap archive. +#' This function does not provide realtime data, but is suitable for long time frames. +#' +#' \item \code{\link{osem_measurements}}: +#' This function retrieves (realtime) measurements from the API. It works for a +#' \emph{single phenomenon} only, but provides various filters to select sensors by +#' +#' \itemize{ +#' \item a list of senseBoxes, previously retrieved through +#' \code{\link{osem_box}} or \code{\link{osem_boxes}}. +#' \item a geographic bounding box, which can be generated with the +#' \code{\link[sf]{sf}} package. +#' \item a time frame +#' \item a exposure type of the given box +#' } +#' +#' Use this function with caution for long time frames, as the API becomes +#' quite slow is limited to 10.000 measurements per 30 day interval. #' } #' #' Data is returned as \code{tibble} with the class \code{osem_measurements}. diff --git a/man/build_archive_url.Rd b/man/build_archive_url.Rd new file mode 100644 index 0000000..3508ba6 --- /dev/null +++ b/man/build_archive_url.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/archive.R +\name{build_archive_url} +\alias{build_archive_url} +\title{returns URL to fetch measurements from a sensor for a specific date, +based on `osem_archive_endpoint()`} +\usage{ +build_archive_url(date, box, sensor) +} +\description{ +returns URL to fetch measurements from a sensor for a specific date, +based on `osem_archive_endpoint()` +} diff --git a/man/osem_archive_endpoint.Rd b/man/osem_archive_endpoint.Rd new file mode 100644 index 0000000..b7cdab3 --- /dev/null +++ b/man/osem_archive_endpoint.Rd @@ -0,0 +1,15 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/archive.R +\name{osem_archive_endpoint} +\alias{osem_archive_endpoint} +\title{Default endpoint for the archive download +front end domain is archive.opensensemap.org, but file download +is provided via sciebo} +\usage{ +osem_archive_endpoint() +} +\description{ +Default endpoint for the archive download +front end domain is archive.opensensemap.org, but file download +is provided via sciebo +} diff --git a/man/osem_box_to_archivename.Rd b/man/osem_box_to_archivename.Rd new file mode 100644 index 0000000..8d2707d --- /dev/null +++ b/man/osem_box_to_archivename.Rd @@ -0,0 +1,13 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/archive.R +\name{osem_box_to_archivename} +\alias{osem_box_to_archivename} +\title{replace chars in box name according to archive script: +https://github.com/sensebox/osem-archiver/blob/612e14b/helpers.sh#L66} +\usage{ +osem_box_to_archivename(box) +} +\description{ +replace chars in box name according to archive script: +https://github.com/sensebox/osem-archiver/blob/612e14b/helpers.sh#L66 +} diff --git a/man/osem_measurements_archive.Rd b/man/osem_measurements_archive.Rd new file mode 100644 index 0000000..7a30334 --- /dev/null +++ b/man/osem_measurements_archive.Rd @@ -0,0 +1,38 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/archive.R +\name{osem_measurements_archive} +\alias{osem_measurements_archive} +\alias{osem_measurements_archive.sensebox} +\title{Get day-wise measurements for a single box from the openSenseMap archive.} +\usage{ +osem_measurements_archive(x, ...) + +\method{osem_measurements_archive}{sensebox}(x, fromDate, + toDate = fromDate, sensorFilter = ~T, progress = T) +} +\description{ +This function is significantly faster than `osem_measurements()` for large +time-frames, as dayly CSV dumps for each sensor from + are used. +Note that the latest data available is from the previous day. +By default, data for all sensors of a box is fetched, but you can select a +subset with a `dplyr`-style NSE filter expression. +} +\section{Methods (by class)}{ +\itemize{ +\item \code{sensebox}: Get daywise measurements for one or +more sensors of a single box +}} + +\examples{ + +\donttest{ + # fetch measurements for a single day + box = osem_box('593bcd656ccf3b0011791f5a') + m = osem_measurements_archive(box, as.POSIXlt('2018-09-13')) + + # fetch measurements for a date range and selected sensors + sensors = ~ phenomenon \%in\% c('Temperatur', 'Beleuchtungsstärke') + m = osem_measurements_archive(box, as.POSIXlt('2018-09-01'), as.POSIXlt('2018-09-30'), sensorFilter = sensors) +} +} diff --git a/tests/testthat/test_archive.R b/tests/testthat/test_archive.R new file mode 100644 index 0000000..7aa9374 --- /dev/null +++ b/tests/testthat/test_archive.R @@ -0,0 +1,53 @@ +source('testhelpers.R') + +context('osem_box_to_archivename()') + +try({ + boxes = osem_boxes(grouptag = 'ifgi') + box = filter(boxes, row_number() == 1) +}) + +test_that('osem_box_to_archive_name does the correct character replacements', { + b = data.frame( + name = 'aA1._- äß!"?$%&/', + X_id = 'UUID' + ) + + archivename = opensensmapr:::osem_box_to_archivename(b) + expect_equal(archivename, 'UUID-aA1._-__________') +}) + +test_that('osem_box_to_archive_name works for one box', { + if (is.null(box)) skip('no box data could be fetched') + + archivename = opensensmapr:::osem_box_to_archivename(box) + expect_length(archivename, 1) + expect_type(archivename, 'character') +}) + +test_that('osem_box_to_archive_name works for multiple boxes', { + if (is.null(boxes)) skip('no box data available') + + archivename = opensensmapr:::osem_box_to_archivename(boxes) + expect_length(archivename, nrow(boxes)) + expect_type(archivename, 'character') +}) + +context('osem_measurements_archive()') + +test_that('osem_measurements_archive works for one box', { + if (is.null(box)) skip('no box data could be fetched') + + m = osem_measurements_archive(box, as.POSIXlt('2018-08-08')) + expect_length(m, nrow(box$sensors[[1]]) + 1) # one column for each sensor + createdAt + expect_s3_class(m, c('osem_measurements', 'tbl_df')) +}) + +test_that('osem_measurements_archive fails for multiple boxes', { + if (is.null(boxes)) skip('no box data available') + + expect_error( + osem_measurements_archive(boxes, as.POSIXlt('2018-08-08')), + 'this function only works for exactly one senseBox!' + ) +})