mirror of
https://github.com/sensebox/opensensmapr
synced 2025-02-21 13:23:57 +01:00
Merge branch 'measurements_archive' into development
This commit is contained in:
commit
de3a05bf97
11 changed files with 309 additions and 14 deletions
|
@ -9,6 +9,7 @@ Imports:
|
|||
httr,
|
||||
digest,
|
||||
readr,
|
||||
purrr,
|
||||
magrittr
|
||||
Suggests:
|
||||
maps,
|
||||
|
|
|
@ -5,6 +5,8 @@ S3method("[",sensebox)
|
|||
S3method(osem_measurements,bbox)
|
||||
S3method(osem_measurements,default)
|
||||
S3method(osem_measurements,sensebox)
|
||||
S3method(osem_measurements_archive,default)
|
||||
S3method(osem_measurements_archive,sensebox)
|
||||
S3method(osem_phenomena,sensebox)
|
||||
S3method(plot,osem_measurements)
|
||||
S3method(plot,sensebox)
|
||||
|
@ -19,6 +21,7 @@ export(osem_clear_cache)
|
|||
export(osem_counts)
|
||||
export(osem_endpoint)
|
||||
export(osem_measurements)
|
||||
export(osem_measurements_archive)
|
||||
export(osem_phenomena)
|
||||
importFrom(graphics,legend)
|
||||
importFrom(graphics,par)
|
||||
|
|
11
R/api.R
11
R/api.R
|
@ -34,12 +34,10 @@ get_box_ = function (boxId, endpoint, ...) {
|
|||
parse_senseboxdata()
|
||||
}
|
||||
|
||||
get_measurements_ = function (..., endpoint) {
|
||||
result = osem_get_resource(endpoint, c('boxes', 'data'), ..., type = 'text')
|
||||
|
||||
parse_measurement_csv = function (resText) {
|
||||
# parse the CSV response manually & mute readr
|
||||
suppressWarnings({
|
||||
result = readr::read_csv(result, col_types = readr::cols(
|
||||
result = readr::read_csv(resText, col_types = readr::cols(
|
||||
# factor as default would raise issues with concatenation of multiple requests
|
||||
.default = readr::col_character(),
|
||||
createdAt = readr::col_datetime(),
|
||||
|
@ -53,6 +51,11 @@ get_measurements_ = function (..., endpoint) {
|
|||
osem_as_measurements(result)
|
||||
}
|
||||
|
||||
get_measurements_ = function (..., endpoint) {
|
||||
osem_get_resource(endpoint, c('boxes', 'data'), ..., type = 'text') %>%
|
||||
parse_measurement_csv
|
||||
}
|
||||
|
||||
get_stats_ = function (endpoint, cache) {
|
||||
result = osem_get_resource(endpoint, path = c('stats'), progress = FALSE, cache = cache)
|
||||
names(result) = c('boxes', 'measurements', 'measurements_per_minute')
|
||||
|
|
135
R/archive.R
Normal file
135
R/archive.R
Normal file
|
@ -0,0 +1,135 @@
|
|||
# client for archive.opensensemap.org
|
||||
# in this archive, CSV files for measurements of each sensor per day is provided.
|
||||
|
||||
#' Returns the default endpoint for the archive *download*
|
||||
#' While the front end domain is archive.opensensemap.org, file downloads
|
||||
#' are provided via sciebo.
|
||||
osem_archive_endpoint = function () {
|
||||
'https://uni-muenster.sciebo.de/index.php/s/HyTbguBP4EkqBcp/download?path=/data'
|
||||
}
|
||||
|
||||
#' Fetch day-wise measurements for a single box from the openSenseMap archive.
|
||||
#'
|
||||
#' This function is significantly faster than \code{\link{osem_measurements}} for large
|
||||
#' time-frames, as daily CSV dumps for each sensor from
|
||||
#' \href{http://archive.opensensemap.org}{archive.opensensemap.org} are used.
|
||||
#' Note that the latest data available is from the previous day.
|
||||
#'
|
||||
#' By default, data for all sensors of a box is fetched, but you can select a
|
||||
#' subset with a \code{\link[dplyr]{dplyr}}-style NSE filter expression.
|
||||
#'
|
||||
#' The function will warn when no data is available in the selected period,
|
||||
#' but continue the remaining download.
|
||||
#'
|
||||
#' @param x A `sensebox data.frame` of a single box, as retrieved via \code{\link{osem_box}},
|
||||
#' to download measurements for.
|
||||
#' @param fromDate Start date for measurement download.
|
||||
#' @param toDate End date for measurement download (inclusive).
|
||||
#' @param sensorFilter A NSE formula matching to \code{x$sensors}, selecting a subset of sensors.
|
||||
#' @param progress Whether to print download progress information, defaults to \code{TRUE}.
|
||||
#' @return A \code{tbl_df} Containing observations of all selected sensors for each time stamp.
|
||||
#'
|
||||
#' @seealso \href{https://archive.opensensemap.org}{openSenseMap archive}
|
||||
#' @seealso \code{\link{osem_measurements}}
|
||||
#' @seealso \code{\link{osem_box}}
|
||||
#'
|
||||
#' @export
|
||||
osem_measurements_archive = function (x, ...) UseMethod('osem_measurements_archive')
|
||||
|
||||
#' @export
|
||||
osem_measurements_archive.default = function (x, ...) {
|
||||
# NOTE: to implement for a different class:
|
||||
# in order to call `archive_fetch_measurements()`, `box` must be a dataframe
|
||||
# with a single row and the columns `X_id` and `name`
|
||||
stop(paste('not implemented for class', toString(class(x))))
|
||||
}
|
||||
|
||||
#' @describeIn osem_measurements_archive Get daywise measurements for one or
|
||||
#' more sensors of a single box
|
||||
#' @export
|
||||
#' @examples
|
||||
#' # fetch measurements for a single day
|
||||
#' box = osem_box('593bcd656ccf3b0011791f5a')
|
||||
#' m = osem_measurements_archive(box, as.POSIXlt('2018-09-13'))
|
||||
#'
|
||||
#' \donttest{
|
||||
#' # fetch measurements for a date range and selected sensors
|
||||
#' sensors = ~ phenomenon %in% c('Temperatur', 'Beleuchtungsstärke')
|
||||
#' m = osem_measurements_archive(box, as.POSIXlt('2018-09-01'), as.POSIXlt('2018-09-30'), sensorFilter = sensors)
|
||||
#' }
|
||||
osem_measurements_archive.sensebox = function (x, fromDate, toDate = fromDate, sensorFilter = ~ T, progress = T) {
|
||||
if (nrow(x) != 1)
|
||||
stop('this function only works for exactly one senseBox!')
|
||||
|
||||
# filter sensors using NSE, for example: `~ phenomenon == 'Temperatur'`
|
||||
sensors = x$sensors[[1]] %>%
|
||||
dplyr::filter(lazyeval::f_eval(sensorFilter, .))
|
||||
|
||||
# fetch each sensor separately
|
||||
dfs = by(sensors, 1:nrow(sensors), function (sensor) {
|
||||
df = archive_fetch_measurements(x, sensor$id, fromDate, toDate, progress) %>%
|
||||
dplyr::select(createdAt, value) %>%
|
||||
#dplyr::mutate(unit = sensor$unit, sensor = sensor$sensor) %>% # inject sensor metadata
|
||||
dplyr::rename_at(., 'value', function(v) sensor$phenomenon)
|
||||
})
|
||||
|
||||
# merge all data.frames by timestamp
|
||||
dfs %>% purrr::reduce(dplyr::full_join, 'createdAt')
|
||||
}
|
||||
|
||||
#' fetch measurements from archive from a single box, and a single sensor
|
||||
archive_fetch_measurements = function (box, sensor, fromDate, toDate, progress) {
|
||||
dates = list()
|
||||
from = fromDate
|
||||
while (from <= toDate) {
|
||||
dates = append(dates, list(from))
|
||||
from = from + as.difftime(1, units = 'days')
|
||||
}
|
||||
|
||||
http_handle = httr::handle(osem_archive_endpoint()) # reuse the http connection for speed!
|
||||
progress = if (progress && !is_non_interactive()) httr::progress() else NULL
|
||||
|
||||
measurements = lapply(dates, function(date) {
|
||||
url = build_archive_url(date, box, sensor)
|
||||
res = httr::GET(url, progress, handle = http_handle)
|
||||
|
||||
if (httr::http_error(res)) {
|
||||
warning(paste(
|
||||
httr::status_code(res),
|
||||
'on day', format.Date(date, '%F'),
|
||||
'for sensor', sensor
|
||||
))
|
||||
|
||||
if (httr::status_code(res) == 404)
|
||||
return(data.frame(createdAt = character(), value = character()))
|
||||
}
|
||||
|
||||
measurements = httr::content(res, type = 'text', encoding = 'UTF-8') %>%
|
||||
parse_measurement_csv
|
||||
})
|
||||
|
||||
measurements %>% dplyr::bind_rows()
|
||||
}
|
||||
|
||||
#' returns URL to fetch measurements from a sensor for a specific date,
|
||||
#' based on `osem_archive_endpoint()`
|
||||
build_archive_url = function (date, box, sensor) {
|
||||
sensorId = sensor
|
||||
d = format.Date(date, '%F')
|
||||
format = 'csv'
|
||||
|
||||
paste(
|
||||
osem_archive_endpoint(),
|
||||
d,
|
||||
osem_box_to_archivename(box),
|
||||
paste(paste(sensorId, d, sep = '-'), format, sep = '.'),
|
||||
sep = '/'
|
||||
)
|
||||
}
|
||||
|
||||
#' replace chars in box name according to archive script:
|
||||
#' https://github.com/sensebox/osem-archiver/blob/612e14b/helpers.sh#L66
|
||||
osem_box_to_archivename = function (box) {
|
||||
name = gsub('[^A-Za-z0-9._-]', '_', box$name)
|
||||
paste(box$X_id, name, sep='-')
|
||||
}
|
12
R/box.R
12
R/box.R
|
@ -155,8 +155,18 @@ parse_senseboxdata = function (boxdata) {
|
|||
if (!is.null(thebox$updatedAt))
|
||||
thebox$updatedAt = as.POSIXct(strptime(thebox$updatedAt, format = '%FT%T', tz = 'GMT'))
|
||||
|
||||
# create a dataframe of sensors
|
||||
thebox$sensors = sensors %>%
|
||||
lapply(as.data.frame, stringsAsFactors = F) %>%
|
||||
dplyr::bind_rows(.) %>%
|
||||
dplyr::select(phenomenon = title, id = X_id, unit, sensor = sensorType) %>%
|
||||
list
|
||||
|
||||
# extract metadata from sensors
|
||||
thebox$phenomena = lapply(sensors, function(s) s$title) %>% unlist %>% list
|
||||
thebox$phenomena = sensors %>%
|
||||
setNames(lapply(., function (s) s$`_id`)) %>%
|
||||
lapply(function(s) s$title) %>%
|
||||
unlist %>% list # convert to vector
|
||||
|
||||
# FIXME: if one sensor has NA, max() returns bullshit
|
||||
get_last_measurement = function(s) {
|
||||
|
|
|
@ -37,16 +37,27 @@
|
|||
#' }
|
||||
#'
|
||||
#' @section Retrieving measurements:
|
||||
#' Measurements can be retrieved through \code{\link{osem_measurements}} for a
|
||||
#' given phenomenon only. A subset of measurements may be selected by
|
||||
#'
|
||||
#' There are two ways to retrieve measurements:
|
||||
#' \itemize{
|
||||
#' \item a list of senseBoxes, previously retrieved through
|
||||
#' \code{\link{osem_box}} or \code{\link{osem_boxes}}.
|
||||
#' \item a geographic bounding box, which can be generated with the
|
||||
#' \code{\link[sf]{sf}} package.
|
||||
#' \item a time frame
|
||||
#' \item a exposure type of the given box
|
||||
#' \item \code{\link{osem_measurements_archive}}:
|
||||
#' Downloads measurements for a \emph{single box} from the openSenseMap archive.
|
||||
#' This function does not provide realtime data, but is suitable for long time frames.
|
||||
#'
|
||||
#' \item \code{\link{osem_measurements}}:
|
||||
#' This function retrieves (realtime) measurements from the API. It works for a
|
||||
#' \emph{single phenomenon} only, but provides various filters to select sensors by
|
||||
#'
|
||||
#' \itemize{
|
||||
#' \item a list of senseBoxes, previously retrieved through
|
||||
#' \code{\link{osem_box}} or \code{\link{osem_boxes}}.
|
||||
#' \item a geographic bounding box, which can be generated with the
|
||||
#' \code{\link[sf]{sf}} package.
|
||||
#' \item a time frame
|
||||
#' \item a exposure type of the given box
|
||||
#' }
|
||||
#'
|
||||
#' Use this function with caution for long time frames, as the API becomes
|
||||
#' quite slow is limited to 10.000 measurements per 30 day interval.
|
||||
#' }
|
||||
#'
|
||||
#' Data is returned as \code{tibble} with the class \code{osem_measurements}.
|
||||
|
|
13
man/build_archive_url.Rd
Normal file
13
man/build_archive_url.Rd
Normal file
|
@ -0,0 +1,13 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/archive.R
|
||||
\name{build_archive_url}
|
||||
\alias{build_archive_url}
|
||||
\title{returns URL to fetch measurements from a sensor for a specific date,
|
||||
based on `osem_archive_endpoint()`}
|
||||
\usage{
|
||||
build_archive_url(date, box, sensor)
|
||||
}
|
||||
\description{
|
||||
returns URL to fetch measurements from a sensor for a specific date,
|
||||
based on `osem_archive_endpoint()`
|
||||
}
|
15
man/osem_archive_endpoint.Rd
Normal file
15
man/osem_archive_endpoint.Rd
Normal file
|
@ -0,0 +1,15 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/archive.R
|
||||
\name{osem_archive_endpoint}
|
||||
\alias{osem_archive_endpoint}
|
||||
\title{Default endpoint for the archive download
|
||||
front end domain is archive.opensensemap.org, but file download
|
||||
is provided via sciebo}
|
||||
\usage{
|
||||
osem_archive_endpoint()
|
||||
}
|
||||
\description{
|
||||
Default endpoint for the archive download
|
||||
front end domain is archive.opensensemap.org, but file download
|
||||
is provided via sciebo
|
||||
}
|
13
man/osem_box_to_archivename.Rd
Normal file
13
man/osem_box_to_archivename.Rd
Normal file
|
@ -0,0 +1,13 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/archive.R
|
||||
\name{osem_box_to_archivename}
|
||||
\alias{osem_box_to_archivename}
|
||||
\title{replace chars in box name according to archive script:
|
||||
https://github.com/sensebox/osem-archiver/blob/612e14b/helpers.sh#L66}
|
||||
\usage{
|
||||
osem_box_to_archivename(box)
|
||||
}
|
||||
\description{
|
||||
replace chars in box name according to archive script:
|
||||
https://github.com/sensebox/osem-archiver/blob/612e14b/helpers.sh#L66
|
||||
}
|
38
man/osem_measurements_archive.Rd
Normal file
38
man/osem_measurements_archive.Rd
Normal file
|
@ -0,0 +1,38 @@
|
|||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/archive.R
|
||||
\name{osem_measurements_archive}
|
||||
\alias{osem_measurements_archive}
|
||||
\alias{osem_measurements_archive.sensebox}
|
||||
\title{Get day-wise measurements for a single box from the openSenseMap archive.}
|
||||
\usage{
|
||||
osem_measurements_archive(x, ...)
|
||||
|
||||
\method{osem_measurements_archive}{sensebox}(x, fromDate,
|
||||
toDate = fromDate, sensorFilter = ~T, progress = T)
|
||||
}
|
||||
\description{
|
||||
This function is significantly faster than `osem_measurements()` for large
|
||||
time-frames, as dayly CSV dumps for each sensor from
|
||||
<archive.opensensemap.org> are used.
|
||||
Note that the latest data available is from the previous day.
|
||||
By default, data for all sensors of a box is fetched, but you can select a
|
||||
subset with a `dplyr`-style NSE filter expression.
|
||||
}
|
||||
\section{Methods (by class)}{
|
||||
\itemize{
|
||||
\item \code{sensebox}: Get daywise measurements for one or
|
||||
more sensors of a single box
|
||||
}}
|
||||
|
||||
\examples{
|
||||
|
||||
\donttest{
|
||||
# fetch measurements for a single day
|
||||
box = osem_box('593bcd656ccf3b0011791f5a')
|
||||
m = osem_measurements_archive(box, as.POSIXlt('2018-09-13'))
|
||||
|
||||
# fetch measurements for a date range and selected sensors
|
||||
sensors = ~ phenomenon \%in\% c('Temperatur', 'Beleuchtungsstärke')
|
||||
m = osem_measurements_archive(box, as.POSIXlt('2018-09-01'), as.POSIXlt('2018-09-30'), sensorFilter = sensors)
|
||||
}
|
||||
}
|
53
tests/testthat/test_archive.R
Normal file
53
tests/testthat/test_archive.R
Normal file
|
@ -0,0 +1,53 @@
|
|||
source('testhelpers.R')
|
||||
|
||||
context('osem_box_to_archivename()')
|
||||
|
||||
try({
|
||||
boxes = osem_boxes(grouptag = 'ifgi')
|
||||
box = filter(boxes, row_number() == 1)
|
||||
})
|
||||
|
||||
test_that('osem_box_to_archive_name does the correct character replacements', {
|
||||
b = data.frame(
|
||||
name = 'aA1._- äß!"?$%&/',
|
||||
X_id = 'UUID'
|
||||
)
|
||||
|
||||
archivename = opensensmapr:::osem_box_to_archivename(b)
|
||||
expect_equal(archivename, 'UUID-aA1._-__________')
|
||||
})
|
||||
|
||||
test_that('osem_box_to_archive_name works for one box', {
|
||||
if (is.null(box)) skip('no box data could be fetched')
|
||||
|
||||
archivename = opensensmapr:::osem_box_to_archivename(box)
|
||||
expect_length(archivename, 1)
|
||||
expect_type(archivename, 'character')
|
||||
})
|
||||
|
||||
test_that('osem_box_to_archive_name works for multiple boxes', {
|
||||
if (is.null(boxes)) skip('no box data available')
|
||||
|
||||
archivename = opensensmapr:::osem_box_to_archivename(boxes)
|
||||
expect_length(archivename, nrow(boxes))
|
||||
expect_type(archivename, 'character')
|
||||
})
|
||||
|
||||
context('osem_measurements_archive()')
|
||||
|
||||
test_that('osem_measurements_archive works for one box', {
|
||||
if (is.null(box)) skip('no box data could be fetched')
|
||||
|
||||
m = osem_measurements_archive(box, as.POSIXlt('2018-08-08'))
|
||||
expect_length(m, nrow(box$sensors[[1]]) + 1) # one column for each sensor + createdAt
|
||||
expect_s3_class(m, c('osem_measurements', 'tbl_df'))
|
||||
})
|
||||
|
||||
test_that('osem_measurements_archive fails for multiple boxes', {
|
||||
if (is.null(boxes)) skip('no box data available')
|
||||
|
||||
expect_error(
|
||||
osem_measurements_archive(boxes, as.POSIXlt('2018-08-08')),
|
||||
'this function only works for exactly one senseBox!'
|
||||
)
|
||||
})
|
Loading…
Add table
Reference in a new issue