update docs + vignette builds

measurements_archive
noerw 6 years ago
parent e49ae4bb50
commit 553772d209

@ -7,6 +7,9 @@
#' @details Note that the API caches these values for 5 minutes.
#'
#' @param endpoint The URL of the openSenseMap API
#' @param cache Whether to cache the result, defaults to false.
#' If a valid path to a directory is given, the response will be cached there.
#' Subsequent identical requests will return the cached data instead.
#' @return A named \code{list} containing the counts
#'
#' @export

@ -70,3 +70,4 @@ st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = T)
## ------------------------------------------------------------------------
pm25 %>% filter(invalid == FALSE) %>% plot()

File diff suppressed because one or more lines are too long

@ -0,0 +1,96 @@
## ----cache---------------------------------------------------------------
b = osem_boxes(cache = tempdir())
list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
# the next identical request will hit the cache only!
b = osem_boxes(cache = tempdir())
# requests without the cache parameter will still be performed normally
b = osem_boxes()
## ----cache_custom--------------------------------------------------------
cacheDir = getwd() # current working directory
b = osem_boxes(cache = cacheDir)
# the next identical request will hit the cache only!
b = osem_boxes(cache = cacheDir)
## ----clearcache----------------------------------------------------------
osem_clear_cache() # clears default cache
osem_clear_cache(getwd()) # clears a custom cache
## ----setup, results='hide'-----------------------------------------------
# this section requires:
library(opensensmapr)
library(jsonlite)
library(readr)
# first get our example data:
boxes = osem_boxes(grouptag = 'ifgi')
measurements = osem_measurements(boxes, phenomenon = 'PM10')
## ----serialize_json------------------------------------------------------
# serializing senseBoxes to JSON, and loading from file again:
write(jsonlite::serializeJSON(measurements), 'boxes.json')
boxes_from_file = jsonlite::unserializeJSON(readr::read_file('boxes.json'))
## ----serialize_attrs-----------------------------------------------------
# note the toJSON call
write(jsonlite::toJSON(measurements), 'boxes_bad.json')
boxes_without_attrs = jsonlite::fromJSON('boxes_bad.json')
boxes_with_attrs = osem_as_sensebox(boxes_without_attrs)
class(boxes_with_attrs)
## ----osem_offline--------------------------------------------------------
# offline logic
osem_offline = function (func, file, format='rds', ...) {
# deserialize if file exists, otherwise download and serialize
if (file.exists(file)) {
if (format == 'json')
jsonlite::unserializeJSON(readr::read_file(file))
else
readRDS(file)
} else {
data = func(...)
if (format == 'json')
write(jsonlite::serializeJSON(data), file = file)
else
saveRDS(data, file)
data
}
}
# wrappers for each download function
osem_measurements_offline = function (file, ...) {
osem_offline(opensensmapr::osem_measurements, file, ...)
}
osem_boxes_offline = function (file, ...) {
osem_offline(opensensmapr::osem_boxes, file, ...)
}
osem_box_offline = function (file, ...) {
osem_offline(opensensmapr::osem_box, file, ...)
}
osem_counts_offline = function (file, ...) {
osem_offline(opensensmapr::osem_counts, file, ...)
}
## ----test----------------------------------------------------------------
# first run; will download and save to disk
b1 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
# consecutive runs; will read from disk
b2 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
class(b1) == class(b2)
# we can even omit the arguments now (though thats not really the point here)
b3 = osem_boxes_offline('mobileboxes.rds')
nrow(b1) == nrow(b3)
# verify that the custom sensebox methods are still working
summary(b2)
plot(b3)
## ----cleanup, results='hide'---------------------------------------------
file.remove('mobileboxes.rds', 'boxes_bad.json', 'boxes.json', 'measurements.rds')

@ -0,0 +1,169 @@
---
title: "opensensmapr reproducibility: Loading openSenseMap Data from Files"
author: "Norwin Roosen"
date: "`r Sys.Date()`"
output: rmarkdown::html_vignette
vignette: >
%\VignetteIndexEntry{opensensmapr reproducibility: Loading openSenseMap Data from Files}
%\VignetteEngine{knitr::rmarkdown}
%\VignetteEncoding{UTF-8}
---
It may be useful to download data from openSenseMap only once.
For reproducible results, the data could be saved to disk, and reloaded at a
later point.
This avoids..
- changed results for queries without date parameters,
- unnecessary wait times,
- risk of API changes / API unavailability,
- stress on the openSenseMap-server.
This vignette shows how to use this built in `opensensmapr` feature, and
how to do it yourself, if you want to store to other data formats.
## Using openSensMapr Caching Feature
All data retrieval functions of `opensensmapr` have a built in caching feature,
which serializes an API response to disk.
Subsequent identical requests will then return the serialized data instead of making
another request.
To do so, each request is given a unique ID based on its parameters.
To use this feature, just add a path to a directory to the `cache` parameter:
```{r cache}
b = osem_boxes(cache = tempdir())
list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
# the next identical request will hit the cache only!
b = osem_boxes(cache = tempdir())
# requests without the cache parameter will still be performed normally
b = osem_boxes()
```
You can maintain multiple caches simultaneously which allows to store only
serialized data related to a script in its directory:
```{r cache_custom}
cacheDir = getwd() # current working directory
b = osem_boxes(cache = cacheDir)
# the next identical request will hit the cache only!
b = osem_boxes(cache = cacheDir)
```
To get fresh results again, just call `osem_clear_cache()` for the respective cache:
```{r clearcache}
osem_clear_cache() # clears default cache
osem_clear_cache(getwd()) # clears a custom cache
```
## Custom (De-) Serialization
If you want to roll your own serialization method to support custom data formats,
here's how:
```{r setup, results='hide'}
# this section requires:
library(opensensmapr)
library(jsonlite)
library(readr)
# first get our example data:
boxes = osem_boxes(grouptag = 'ifgi')
measurements = osem_measurements(boxes, phenomenon = 'PM10')
```
If you are paranoid and worry about `.rds` files not being decodable anymore
in the (distant) future, you could serialize to a plain text format such as JSON.
This of course comes at the cost of storage space and performance.
```{r serialize_json}
# serializing senseBoxes to JSON, and loading from file again:
write(jsonlite::serializeJSON(measurements), 'boxes.json')
boxes_from_file = jsonlite::unserializeJSON(readr::read_file('boxes.json'))
```
Both methods also persist the R object metadata (classes, attributes).
If you were to use a serialization method that can't persist object metadata, you
could re-apply it with the following functions:
```{r serialize_attrs}
# note the toJSON call
write(jsonlite::toJSON(measurements), 'boxes_bad.json')
boxes_without_attrs = jsonlite::fromJSON('boxes_bad.json')
boxes_with_attrs = osem_as_sensebox(boxes_without_attrs)
class(boxes_with_attrs)
```
The same goes for measurements via `osem_as_measurements()`.
## Workflow for reproducible code
For truly reproducible code you want it to work and return the same results --
no matter if you run it the first time or a consecutive time, and without making
changes to it.
Therefore we need a wrapper around the save-to-file & load-from-file logic.
The following examples show a way to do just that, and where inspired by
[this reproducible analysis by Daniel Nuest](https://github.com/nuest/sensebox-binder).
```{r osem_offline}
# offline logic
osem_offline = function (func, file, format='rds', ...) {
# deserialize if file exists, otherwise download and serialize
if (file.exists(file)) {
if (format == 'json')
jsonlite::unserializeJSON(readr::read_file(file))
else
readRDS(file)
} else {
data = func(...)
if (format == 'json')
write(jsonlite::serializeJSON(data), file = file)
else
saveRDS(data, file)
data
}
}
# wrappers for each download function
osem_measurements_offline = function (file, ...) {
osem_offline(opensensmapr::osem_measurements, file, ...)
}
osem_boxes_offline = function (file, ...) {
osem_offline(opensensmapr::osem_boxes, file, ...)
}
osem_box_offline = function (file, ...) {
osem_offline(opensensmapr::osem_box, file, ...)
}
osem_counts_offline = function (file, ...) {
osem_offline(opensensmapr::osem_counts, file, ...)
}
```
Thats it! Now let's try it out:
```{r test}
# first run; will download and save to disk
b1 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
# consecutive runs; will read from disk
b2 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
class(b1) == class(b2)
# we can even omit the arguments now (though thats not really the point here)
b3 = osem_boxes_offline('mobileboxes.rds')
nrow(b1) == nrow(b3)
# verify that the custom sensebox methods are still working
summary(b2)
plot(b3)
```
To re-download the data, just clear the files that were created in the process:
```{r cleanup, results='hide'}
file.remove('mobileboxes.rds', 'boxes_bad.json', 'boxes.json', 'measurements.rds')
```
A possible extension to this scheme comes to mind: Omit the specification of a
filename, and assign a unique ID to the request instead.
For example, one could calculate the SHA-1 hash of the parameters, and use it
as filename.

File diff suppressed because one or more lines are too long

@ -4,12 +4,15 @@
\alias{osem_box}
\title{Get a single senseBox by its ID}
\usage{
osem_box(boxId, endpoint = osem_endpoint())
osem_box(boxId, endpoint = osem_endpoint(), cache = NA)
}
\arguments{
\item{boxId}{A string containing a senseBox ID}
\item{endpoint}{The URL of the openSenseMap API instance}
\item{cache}{Whether to cache the result, defaults to false.
If a valid path to a directory is given, the response will be cached there. Subsequent identical requests will return the cached data instead.}
}
\value{
A \code{sensebox data.frame} containing a box in each row
@ -18,9 +21,17 @@ A \code{sensebox data.frame} containing a box in each row
Get a single senseBox by its ID
}
\examples{
# get a specific box by ID
b = osem_box('57000b8745fd40c8196ad04c')
\donttest{
# get a specific box by ID
b = osem_box('57000b8745fd40c8196ad04c')
# get a specific box by ID from a custom (selfhosted) openSenseMap API
b = osem_box('51030b8725fd30c2196277da', 'http://api.my-custom-osem.com')
# get a specific box by ID and cache the response, in order to provide
# reproducible results in the future.
b = osem_box('51030b8725fd30c2196277da', cache = tempdir())
}
}
\seealso{
\href{https://docs.opensensemap.org/#api-Measurements-findAllBoxes}{openSenseMap API documentation (web)}
@ -28,4 +39,6 @@ b = osem_box('57000b8745fd40c8196ad04c')
\code{\link{osem_phenomena}}
\code{\link{osem_boxes}}
\code{\link{osem_clear_cache}}
}

@ -6,7 +6,7 @@
\usage{
osem_boxes(exposure = NA, model = NA, grouptag = NA, date = NA,
from = NA, to = NA, phenomenon = NA, endpoint = osem_endpoint(),
progress = TRUE)
progress = TRUE, cache = NA)
}
\arguments{
\item{exposure}{Only return boxes with the given exposure ('indoor', 'outdoor', 'mobile')}
@ -26,7 +26,11 @@ time interval as specified through \code{date} or \code{from / to}}
\item{endpoint}{The URL of the openSenseMap API instance}
\item{progress}{Whether to print download progress information defaults to \code{TRUE}}
\item{progress}{Whether to print download progress information, defaults to \code{TRUE}}
\item{cache}{Whether to cache the result, defaults to false.
If a valid path to a directory is given, the response will be cached there.
Subsequent identical requests will return the cached data instead.}
}
\value{
A \code{sensebox data.frame} containing a box in each row
@ -41,6 +45,7 @@ Note that some filters do not work together:
}
}
\examples{
\donttest{
# get *all* boxes available on the API
b = osem_boxes()
@ -48,8 +53,31 @@ Note that some filters do not work together:
# get all boxes with grouptag 'ifgi' that are placed outdoors
b = osem_boxes(grouptag = 'ifgi', exposure = 'outdoor')
# get all boxes with model 'luftdaten_sds011_dht22'
b = osem_boxes(grouptag = 'ifgi')
# get all boxes that have measured PM2.5 in the last 4 hours
b = osem_boxes(date = Sys.time(), phenomenon = 'PM2.5')
# get all boxes that have measured PM2.5 between Jan & Feb 2018
library(lubridate)
b = osem_boxes(
from = date('2018-01-01'),
to = date('2018-02-01'),
phenomenon = 'PM2.5'
)
# get all boxes from a custom (selfhosted) openSenseMap API
b = osem_box(endpoint = 'http://api.my-custom-osem.com')
# get all boxes and cache the response, in order to provide
# reproducible results in the future. Also useful for development
# to avoid repeated loading times!
b = osem_boxes(cache = getwd())
b = osem_boxes(cache = getwd())
# get *all* boxes available on the API, without showing download progress
b = osem_boxes(progress = FALSE)
}
}
\seealso{
@ -58,4 +86,6 @@ Note that some filters do not work together:
\code{\link{osem_phenomena}}
\code{\link{osem_box}}
\code{\link{osem_clear_cache}}
}

@ -0,0 +1,28 @@
% Generated by roxygen2: do not edit by hand
% Please edit documentation in R/api.R
\name{osem_clear_cache}
\alias{osem_clear_cache}
\title{Purge cached responses from the given cache directory}
\usage{
osem_clear_cache(location = tempdir())
}
\arguments{
\item{location}{A path to the cache directory, defaults to the
sessions' \code{tempdir()}}
}
\value{
Boolean whether the deletion was successful
}
\description{
Purge cached responses from the given cache directory
}
\examples{
\donttest{
osem_boxes(cache = tempdir())
osem_clear_cache()
cachedir = paste(getwd(), 'osemcache', sep = '/')
osem_boxes(cache = cachedir)
osem_clear_cache(cachedir)
}
}

@ -4,10 +4,14 @@
\alias{osem_counts}
\title{Get count statistics of the openSenseMap Instance}
\usage{
osem_counts(endpoint = osem_endpoint())
osem_counts(endpoint = osem_endpoint(), cache = NA)
}
\arguments{
\item{endpoint}{The URL of the openSenseMap API}
\item{cache}{Whether to cache the result, defaults to false.
If a valid path to a directory is given, the response will be cached there.
Subsequent identical requests will return the cached data instead.}
}
\value{
A named \code{list} containing the counts

@ -12,11 +12,12 @@ osem_measurements(x, ...)
\method{osem_measurements}{default}(x, ...)
\method{osem_measurements}{bbox}(x, phenomenon, exposure = NA, from = NA,
to = NA, columns = NA, ..., endpoint = osem_endpoint(), progress = T)
to = NA, columns = NA, ..., endpoint = osem_endpoint(), progress = T,
cache = NA)
\method{osem_measurements}{sensebox}(x, phenomenon, exposure = NA,
from = NA, to = NA, columns = NA, ..., endpoint = osem_endpoint(),
progress = T)
progress = T, cache = NA)
}
\arguments{
\item{x}{Depending on the method, either
@ -42,6 +43,9 @@ osem_measurements(x, ...)
\item{endpoint}{The URL of the openSenseMap API}
\item{progress}{Whether to print download progress information}
\item{cache}{Whether to cache the result, defaults to false.
If a valid path to a directory is given, the response will be cached there. Subsequent identical requests will return the cached data instead.}
}
\value{
An \code{osem_measurements data.frame} containing the
@ -63,17 +67,49 @@ a bounding box spanning the whole world.
\examples{
\donttest{
# get measurements from all boxes
m1 = osem_measurements('Windrichtung')
# get measurements from all boxes on the phenomenon 'PM10' from the last 48h
m = osem_measurements('PM10')
# get measurements from all mobile boxes on the phenomenon 'PM10' from the last 48h
m = osem_measurements('PM10', exposure = 'mobile')
# get measurements and cache them locally in the working directory.
# subsequent identical requests will load from the cache instead, ensuring
# reproducibility and saving time and bandwidth!
m = osem_measurements('PM10', exposure = 'mobile', cache = getwd())
m = osem_measurements('PM10', exposure = 'mobile', cache = getwd())
# get measurements returning a custom selection of columns
m = osem_measurements('PM10', exposure = 'mobile', columns = c(
'value',
'boxId',
'sensorType',
'lat',
'lon',
'height'
))
}
\donttest{
# get measurements from sensors within a bounding box
# get measurements from sensors within a custom WGS84 bounding box
bbox = structure(c(7, 51, 8, 52), class = 'bbox')
m2 = osem_measurements(bbox, 'Temperatur')
points = sf::st_multipoint(matrix(c(7.5, 7.8, 51.7, 52), 2, 2))
bbox2 = sf::st_bbox(points)
m3 = osem_measurements(bbox2, 'Temperatur', exposure = 'outdoor')
m = osem_measurements(bbox, 'Temperatur')
# construct a bounding box 12km around berlin using the sf package,
# and get measurements from stations within that box
library(sf)
bbox2 = st_point(c(13.4034, 52.5120)) \%>\%
st_sfc(crs = 4326) \%>\%
st_transform(3857) \%>\% # allow setting a buffer in meters
st_buffer(set_units(12, km)) \%>\%
st_transform(4326) \%>\% # the opensensemap expects WGS 84
st_bbox()
m = osem_measurements(bbox2, 'Temperatur', exposure = 'outdoor')
# construct a bounding box from two points,
# and get measurements from stations within that box
points = st_multipoint(matrix(c(7.5, 7.8, 51.7, 52), 2, 2))
bbox3 = st_bbox(points)
m = osem_measurements(bbox2, 'Temperatur', exposure = 'outdoor')
}
\donttest{
# get measurements from a set of boxes
@ -83,10 +119,25 @@ a bounding box spanning the whole world.
# ...or a single box
b = osem_box('57000b8745fd40c8196ad04c')
m5 = osem_measurements(b, phenomenon = 'Temperatur')
# get measurements from a single box on the from the last 40 days.
# requests are paged for long time frames, so the APIs limitation
# does not apply!
library(lubridate)
m1 = osem_measurements(
b,
'Temperatur',
to = now(),
from = now() - days(40)
)
}
}
\seealso{
\href{https://docs.opensensemap.org/#api-Measurements-getDataMulti}{openSenseMap API documentation (web)}
\code{\link{osem_box}}
\code{\link{osem_boxes}}
\code{\link{osem_clear_cache}}
}

Loading…
Cancel
Save