add osem_measurement_archive()

TODO: tests, documentation
measurements_archive
Norwin 6 years ago
parent c4da876761
commit 33a9c42e54

@ -9,6 +9,7 @@ Imports:
httr,
digest,
readr,
purrr,
magrittr
Suggests:
maps,

@ -5,12 +5,15 @@ S3method("[",sensebox)
S3method(osem_measurements,bbox)
S3method(osem_measurements,default)
S3method(osem_measurements,sensebox)
S3method(osem_measurements_archive,default)
S3method(osem_measurements_archive,sensebox)
S3method(osem_phenomena,sensebox)
S3method(plot,osem_measurements)
S3method(plot,sensebox)
S3method(print,osem_measurements)
S3method(print,sensebox)
S3method(summary,sensebox)
export(osem_archive_endpoint)
export(osem_as_measurements)
export(osem_as_sensebox)
export(osem_box)
@ -19,6 +22,7 @@ export(osem_clear_cache)
export(osem_counts)
export(osem_endpoint)
export(osem_measurements)
export(osem_measurements_archive)
export(osem_phenomena)
importFrom(graphics,legend)
importFrom(graphics,par)

@ -34,12 +34,10 @@ get_box_ = function (boxId, endpoint, ...) {
parse_senseboxdata()
}
get_measurements_ = function (..., endpoint) {
result = osem_get_resource(endpoint, c('boxes', 'data'), ..., type = 'text')
parse_measurement_csv = function (resText) {
# parse the CSV response manually & mute readr
suppressWarnings({
result = readr::read_csv(result, col_types = readr::cols(
result = readr::read_csv(resText, col_types = readr::cols(
# factor as default would raise issues with concatenation of multiple requests
.default = readr::col_character(),
createdAt = readr::col_datetime(),
@ -53,6 +51,11 @@ get_measurements_ = function (..., endpoint) {
osem_as_measurements(result)
}
get_measurements_ = function (..., endpoint) {
osem_get_resource(endpoint, c('boxes', 'data'), ..., type = 'text') %>%
parse_measurement_csv
}
get_stats_ = function (endpoint, cache) {
result = osem_get_resource(endpoint, path = c('stats'), progress = FALSE, cache = cache)
names(result) = c('boxes', 'measurements', 'measurements_per_minute')

@ -0,0 +1,131 @@
# client for archive.opensensemap.org
# in this archive, a zip bundle for measurements of each box per day is provided.
#' Default endpoint for the archive download
#' front end domain is archive.opensensemap.org, but file download
#' is provided via sciebo
#' @export
osem_archive_endpoint = function () {
'https://uni-muenster.sciebo.de/index.php/s/HyTbguBP4EkqBcp/download?path=/data'
}
#' Get day-wise measurements for a single box from the openSenseMap archive.
#'
#' This function is significantly faster than `osem_measurements()` for large
#' time-frames, as dayly CSV dumps for each sensor from
#' <archive.opensensemap.org> are used.
#' Note that the latest data available is from the previous day.
#' By default, data for all sensors of a box is fetched, but you can select a
#' subset with a `dplyr`-style NSE filter expression.
#'
#' @export
osem_measurements_archive = function (x, ...) UseMethod('osem_measurements_archive')
#' @export
osem_measurements_archive.default = function (x, ...) {
# NOTE: to implement for a different class:
# in order to call `archive_fetch_measurements()`, `box` must be a dataframe
# with a single row and the columns `X_id` and `name`
stop(paste('not implemented for class', toString(class(x))))
}
#' @describeIn osem_measurements_archive Get daywise measurements for one or
#' more sensors of a single box
#' @export
#' @examples
#'
#' \donttest{
#' # fetch measurements for a single day
#' box = osem_box('593bcd656ccf3b0011791f5a')
#' m = osem_measurements_archive(box, as.POSIXlt('2018-09-13'))
#'
#' # fetch measurements for a date range and selected sensors
#' sensors = ~ phenomenon %in% c('Temperatur', 'Beleuchtungsstärke')
#' m = osem_measurements_archive(box, as.POSIXlt('2018-09-01'), as.POSIXlt('2018-09-30'), sensorFilter = sensors)
#' }
osem_measurements_archive.sensebox = function (x, fromDate, toDate = fromDate, sensorFilter = ~ T, progress = T) {
if (nrow(x) != 1)
stop('this function only works for exactly one senseBox!')
# filter sensors using NSE, for example: `~ phenomenon == 'Temperatur'`
sensors = x$sensors[[1]] %>%
dplyr::filter(lazyeval::f_eval(sensorFilter, .))
# fetch each sensor separately
dfs = by(sensors, 1:nrow(sensors), function (sensor) {
df = archive_fetch_measurements(x, sensor$id, fromDate, toDate, progress) %>%
dplyr::select(createdAt, value) %>%
#dplyr::mutate(unit = sensor$unit, sensor = sensor$sensor) %>% # inject sensor metadata
dplyr::rename_at(., 'value', function(v) sensor$phenomenon)
})
# merge all data.frames by timestamp
dfs %>% purrr::reduce(dplyr::full_join, 'createdAt')
}
#' fetch measurements from archive from a single box, and a single sensor
#'
#' @param box
#' @param sensor
#' @param fromDate
#' @param toDate
#' @param progress
#'
#' @return
#'
#' @examples
archive_fetch_measurements = function (box, sensor, fromDate, toDate, progress) {
dates = list()
from = fromDate
while (from <= toDate) {
dates = append(dates, list(from))
from = from + as.difftime(1, units = 'days')
}
http_handle = httr::handle(osem_archive_endpoint()) # reuse the http connection for speed!
progress = if (progress && !is_non_interactive()) httr::progress() else NULL
measurements = lapply(dates, function(date) {
url = build_archive_url(date, box, sensor)
res = httr::GET(url, progress, handle = http_handle)
if (httr::http_error(res)) {
warning(paste(
httr::status_code(res),
'on day', format.Date(date, '%F'),
'for sensor', sensor
))
if (httr::status_code(res) == 404)
return(as.data.frame)
}
measurements = httr::content(res, type = 'text', encoding = 'UTF-8') %>%
parse_measurement_csv
})
measurements %>% dplyr::bind_rows()
}
#' returns URL to fetch measurements from a sensor for a specific date,
#' based on `osem_archive_endpoint()`
build_archive_url = function (date, box, sensor) {
sensorId = sensor
d = format.Date(date, '%F')
format = 'csv'
paste(
osem_archive_endpoint(),
d,
osem_box_to_archivename(box),
paste(paste(sensorId, d, sep = '-'), format, sep = '.'),
sep = '/'
)
}
#' replace chars in box name according to archive script:
#' https://github.com/sensebox/osem-archiver/blob/612e14b/helpers.sh#L66
osem_box_to_archivename = function (box) {
name = gsub('[^A-Za-z0-9._-]', '_', box$name)
paste(box$X_id, name, sep='-')
}

@ -155,8 +155,18 @@ parse_senseboxdata = function (boxdata) {
if (!is.null(thebox$updatedAt))
thebox$updatedAt = as.POSIXct(strptime(thebox$updatedAt, format = '%FT%T', tz = 'GMT'))
# create a dataframe of sensors
thebox$sensors = sensors %>%
lapply(as.data.frame, stringsAsFactors = F) %>%
dplyr::bind_rows(.) %>%
dplyr::select(phenomenon = title, id = X_id, unit, sensor = sensorType) %>%
list
# extract metadata from sensors
thebox$phenomena = lapply(sensors, function(s) s$title) %>% unlist %>% list
thebox$phenomena = sensors %>%
setNames(lapply(., function (s) s$`_id`)) %>%
lapply(function(s) s$title) %>%
unlist %>% list # convert to vector
# FIXME: if one sensor has NA, max() returns bullshit
get_last_measurement = function(s) {

@ -0,0 +1,13 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/archive.R
\name{build_archive_url}
\alias{build_archive_url}
\title{returns URL to fetch measurements from a sensor for a specific date,
based on `osem_archive_endpoint()`}
\usage{
build_archive_url(date, box, sensor)
}
\description{
returns URL to fetch measurements from a sensor for a specific date,
based on `osem_archive_endpoint()`
}

@ -0,0 +1,15 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/archive.R
\name{osem_archive_endpoint}
\alias{osem_archive_endpoint}
\title{Default endpoint for the archive download
front end domain is archive.opensensemap.org, but file download
is provided via sciebo}
\usage{
osem_archive_endpoint()
}
\description{
Default endpoint for the archive download
front end domain is archive.opensensemap.org, but file download
is provided via sciebo
}

@ -0,0 +1,13 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/archive.R
\name{osem_box_to_archivename}
\alias{osem_box_to_archivename}
\title{replace chars in box name according to archive script:
https://github.com/sensebox/osem-archiver/blob/612e14b/helpers.sh#L66}
\usage{
osem_box_to_archivename(box)
}
\description{
replace chars in box name according to archive script:
https://github.com/sensebox/osem-archiver/blob/612e14b/helpers.sh#L66
}

@ -0,0 +1,38 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/archive.R
\name{osem_measurements_archive}
\alias{osem_measurements_archive}
\alias{osem_measurements_archive.sensebox}
\title{Get day-wise measurements for a single box from the openSenseMap archive.}
\usage{
osem_measurements_archive(x, ...)
\method{osem_measurements_archive}{sensebox}(x, fromDate,
toDate = fromDate, sensorFilter = ~T, progress = T)
}
\description{
This function is significantly faster than `osem_measurements()` for large
time-frames, as dayly CSV dumps for each sensor from
<archive.opensensemap.org> are used.
Note that the latest data available is from the previous day.
By default, data for all sensors of a box is fetched, but you can select a
subset with a `dplyr`-style NSE filter expression.
}
\section{Methods (by class)}{
\itemize{
\item \code{sensebox}: Get daywise measurements for one or
more sensors of a single box
}}
\examples{
\donttest{
# fetch measurements for a single day
box = osem_box('593bcd656ccf3b0011791f5a')
m = osem_measurements_archive(box, as.POSIXlt('2018-09-13'))
# fetch measurements for a date range and selected sensors
sensors = ~ phenomenon \%in\% c('Temperatur', 'Beleuchtungsstärke')
m = osem_measurements_archive(box, as.POSIXlt('2018-09-01'), as.POSIXlt('2018-09-30'), sensorFilter = sensors)
}
}
Loading…
Cancel
Save