1
0
Fork 0
mirror of https://github.com/sensebox/opensensmapr synced 2025-02-22 14:53:57 +01:00

add caching feature

This commit is contained in:
noerw 2018-05-25 01:34:47 +02:00
parent 4f95ae19a8
commit dd6d8c8539
11 changed files with 244 additions and 30 deletions

View file

@ -7,6 +7,7 @@ BugReports: http://github.com/noerw/opensensmapR/issues
Imports: Imports:
dplyr, dplyr,
httr, httr,
digest,
magrittr magrittr
Suggests: Suggests:
maps, maps,

View file

@ -19,6 +19,7 @@ export(osem_as_measurements)
export(osem_as_sensebox) export(osem_as_sensebox)
export(osem_box) export(osem_box)
export(osem_boxes) export(osem_boxes)
export(osem_clear_cache)
export(osem_counts) export(osem_counts)
export(osem_endpoint) export(osem_endpoint)
export(osem_measurements) export(osem_measurements)

68
R/api.R
View file

@ -12,7 +12,7 @@ osem_endpoint = function() {
} }
get_boxes_ = function (..., endpoint) { get_boxes_ = function (..., endpoint) {
response = osem_request_(endpoint, path = c('boxes'), ...) response = osem_get_resource(endpoint, path = c('boxes'), ...)
if (length(response) == 0) { if (length(response) == 0) {
warning('no senseBoxes found for this query') warning('no senseBoxes found for this query')
@ -29,13 +29,13 @@ get_boxes_ = function (..., endpoint) {
df df
} }
get_box_ = function (boxId, endpoint) { get_box_ = function (boxId, endpoint, ...) {
osem_request_(endpoint, path = c('boxes', boxId), progress = F) %>% osem_get_resource(endpoint, path = c('boxes', boxId), ..., progress = FALSE) %>%
parse_senseboxdata() parse_senseboxdata()
} }
get_measurements_ = function (..., endpoint) { get_measurements_ = function (..., endpoint) {
result = osem_request_(endpoint, c('boxes', 'data'), ..., type = 'text') result = osem_get_resource(endpoint, c('boxes', 'data'), ..., type = 'text')
# parse the CSV response manually & mute readr # parse the CSV response manually & mute readr
suppressWarnings({ suppressWarnings({
@ -53,15 +53,67 @@ get_measurements_ = function (..., endpoint) {
osem_as_measurements(result) osem_as_measurements(result)
} }
get_stats_ = function (endpoint) { get_stats_ = function (endpoint, cache) {
result = osem_request_(endpoint, path = c('stats'), progress = F) result = osem_get_resource(endpoint, path = c('stats'), progress = FALSE, cache = cache)
names(result) = c('boxes', 'measurements', 'measurements_per_minute') names(result) = c('boxes', 'measurements', 'measurements_per_minute')
result result
} }
osem_request_ = function (host, path, ..., type = 'parsed', progress) { #' Get any resource from openSenseMap API, possibly cache the response
#'
#' @param host API host
#' @param path resource URL
#' @param ... All other parameters interpreted as request query parameters
#' @param type Passed to httr; 'parsed' to return an R object from the response, 'text for a raw response
#' @param progress Boolean whether to print download progress information
#' @param cache Optional path to a directory were responses will be cached. If not NA, no requests will be made when a request for the given is already cached.
#' @return Result of a Request to openSenseMap API
#' @noRd
osem_get_resource = function (host, path, ..., type = 'parsed', progress = T, cache = NA) {
query = list(...)
if (!is.na(cache)) {
filename = osem_cache_filename(path, query, host) %>% paste(cache, ., sep = '/')
if (file.exists(filename))
return(readRDS(filename))
}
res = osem_request_(host, path, query, type, progress)
if (!is.na(cache)) saveRDS(res, filename)
res
}
osem_cache_filename = function (path, query = list(), host = osem_endpoint()) {
httr::modify_url(url = host, path = path, query = query) %>%
digest::digest(algo = 'sha1') %>%
paste('osemcache', ., 'rds', sep = '.')
}
#' Purge cached responses from the given cache directory
#'
#' @param location A path to the cache directory, defaults to the
#' sessions' \code{tempdir()}
#' @return Boolean whether the deletion was successful
#'
#' @export
#' @examples
#' \donttest{
#' osem_boxes(cache = tempdir())
#' osem_clear_cache()
#'
#' cachedir = paste(getwd(), 'osemcache', sep = '/')
#' osem_boxes(cache = cachedir)
#' osem_clear_cache(cachedir)
#' }
osem_clear_cache = function (location = tempdir()) {
list.files(location, pattern = 'osemcache\\..*\\.rds') %>%
lapply(function (f) file.remove(paste(location, f, sep = '/'))) %>%
unlist() %>%
all()
}
osem_request_ = function (host, path, query = list(), type = 'parsed', progress = TRUE) {
progress = if (progress && !is_non_interactive()) httr::progress() else NULL progress = if (progress && !is_non_interactive()) httr::progress() else NULL
res = httr::GET(host, progress, path = path, query = list(...)) res = httr::GET(host, progress, path = path, query = query)
if (httr::http_error(res)) { if (httr::http_error(res)) {
content = httr::content(res, 'parsed', encoding = 'UTF-8') content = httr::content(res, 'parsed', encoding = 'UTF-8')

20
R/box.R
View file

@ -19,14 +19,20 @@
#' @param phenomenon Only return boxes that measured the given phenomenon in the #' @param phenomenon Only return boxes that measured the given phenomenon in the
#' time interval as specified through \code{date} or \code{from / to} #' time interval as specified through \code{date} or \code{from / to}
#' @param endpoint The URL of the openSenseMap API instance #' @param endpoint The URL of the openSenseMap API instance
#' @param progress Whether to print download progress information defaults to \code{TRUE} #' @param progress Whether to print download progress information, defaults to \code{TRUE}
#' @param cache Whether to cache the result, defaults to false.
#' If a valid path to a directory is given, the response will be cached there.
#' Subsequent identical requests will return the cached data instead.
#' @return A \code{sensebox data.frame} containing a box in each row #' @return A \code{sensebox data.frame} containing a box in each row
#' #'
#' @seealso \href{https://docs.opensensemap.org/#api-Measurements-findAllBoxes}{openSenseMap API documentation (web)} #' @seealso \href{https://docs.opensensemap.org/#api-Measurements-findAllBoxes}{openSenseMap API documentation (web)}
#' @seealso \code{\link{osem_phenomena}} #' @seealso \code{\link{osem_phenomena}}
#' @seealso \code{\link{osem_box}} #' @seealso \code{\link{osem_box}}
#' @seealso \code{\link{osem_clear_cache}}
#'
#' @export #' @export
#' @examples #' @examples
#'
#' \donttest{ #' \donttest{
#' # get *all* boxes available on the API #' # get *all* boxes available on the API
#' b = osem_boxes() #' b = osem_boxes()
@ -40,7 +46,8 @@
osem_boxes = function (exposure = NA, model = NA, grouptag = NA, osem_boxes = function (exposure = NA, model = NA, grouptag = NA,
date = NA, from = NA, to = NA, phenomenon = NA, date = NA, from = NA, to = NA, phenomenon = NA,
endpoint = osem_endpoint(), endpoint = osem_endpoint(),
progress = TRUE) { progress = TRUE,
cache = NA) {
# error, if phenomenon, but no time given # error, if phenomenon, but no time given
if (!is.na(phenomenon) && is.na(date) && is.na(to) && is.na(from)) if (!is.na(phenomenon) && is.na(date) && is.na(to) && is.na(from))
@ -58,7 +65,7 @@ osem_boxes = function (exposure = NA, model = NA, grouptag = NA,
stop('Parameter "from"/"to" must be used together') stop('Parameter "from"/"to" must be used together')
} }
query = list(endpoint = endpoint, progress = progress) query = list(endpoint = endpoint, progress = progress, cache = cache)
if (!is.na(exposure)) query$exposure = exposure if (!is.na(exposure)) query$exposure = exposure
if (!is.na(model)) query$model = model if (!is.na(model)) query$model = model
if (!is.na(grouptag)) query$grouptag = grouptag if (!is.na(grouptag)) query$grouptag = grouptag
@ -78,18 +85,21 @@ osem_boxes = function (exposure = NA, model = NA, grouptag = NA,
#' #'
#' @param boxId A string containing a senseBox ID #' @param boxId A string containing a senseBox ID
#' @param endpoint The URL of the openSenseMap API instance #' @param endpoint The URL of the openSenseMap API instance
#' @param cache Whether to cache the result, defaults to false.
#' If a valid path to a directory is given, the response will be cached there. Subsequent identical requests will return the cached data instead.
#' @return A \code{sensebox data.frame} containing a box in each row #' @return A \code{sensebox data.frame} containing a box in each row
#' #'
#' @seealso \href{https://docs.opensensemap.org/#api-Measurements-findAllBoxes}{openSenseMap API documentation (web)} #' @seealso \href{https://docs.opensensemap.org/#api-Measurements-findAllBoxes}{openSenseMap API documentation (web)}
#' @seealso \code{\link{osem_phenomena}} #' @seealso \code{\link{osem_phenomena}}
#' @seealso \code{\link{osem_boxes}} #' @seealso \code{\link{osem_boxes}}
#' @seealso \code{\link{osem_clear_cache}}
#' @export #' @export
#' @examples #' @examples
#' # get a specific box by ID #' # get a specific box by ID
#' b = osem_box('57000b8745fd40c8196ad04c') #' b = osem_box('57000b8745fd40c8196ad04c')
#' #'
osem_box = function (boxId, endpoint = osem_endpoint()) { osem_box = function (boxId, endpoint = osem_endpoint(), cache = NA) {
get_box_(boxId, endpoint = endpoint) get_box_(boxId, endpoint = endpoint, cache = cache)
} }
# ============================================================================== # ==============================================================================

View file

@ -11,6 +11,6 @@
#' #'
#' @export #' @export
#' @seealso \href{https://docs.opensensemap.org/#api-Misc-getStatistics}{openSenseMap API documentation (web)} #' @seealso \href{https://docs.opensensemap.org/#api-Misc-getStatistics}{openSenseMap API documentation (web)}
osem_counts = function(endpoint = osem_endpoint()) { osem_counts = function(endpoint = osem_endpoint(), cache = NA) {
get_stats_(endpoint) get_stats_(endpoint, cache)
} }

View file

@ -21,13 +21,17 @@
#' @param columns Select specific column in the output (see openSenseMap API documentation) #' @param columns Select specific column in the output (see openSenseMap API documentation)
#' @param endpoint The URL of the openSenseMap API #' @param endpoint The URL of the openSenseMap API
#' @param progress Whether to print download progress information #' @param progress Whether to print download progress information
#' @param cache Whether to cache the result, defaults to false.
#' If a valid path to a directory is given, the response will be cached there. Subsequent identical requests will return the cached data instead.
#' #'
#' @return An \code{osem_measurements data.frame} containing the #' @return An \code{osem_measurements data.frame} containing the
#' requested measurements #' requested measurements
#' #'
#' @export #' @export
#' @seealso \href{https://docs.opensensemap.org/#api-Measurements-getDataMulti}{openSenseMap API documentation (web)} #' @seealso \href{https://docs.opensensemap.org/#api-Measurements-getDataMulti}{openSenseMap API documentation (web)}
#' @seealso \code{\link{osem_box}}
#' @seealso \code{\link{osem_boxes}} #' @seealso \code{\link{osem_boxes}}
#' @seealso \code{\link{osem_clear_cache}}
osem_measurements = function (x, ...) UseMethod('osem_measurements') osem_measurements = function (x, ...) UseMethod('osem_measurements')
# ============================================================================== # ==============================================================================
@ -62,7 +66,8 @@ osem_measurements.bbox = function (x, phenomenon, exposure = NA,
from = NA, to = NA, columns = NA, from = NA, to = NA, columns = NA,
..., ...,
endpoint = osem_endpoint(), endpoint = osem_endpoint(),
progress = T) { progress = T,
cache = NA) {
bbox = x bbox = x
environment() %>% environment() %>%
as.list() %>% as.list() %>%
@ -88,7 +93,8 @@ osem_measurements.sensebox = function (x, phenomenon, exposure = NA,
from = NA, to = NA, columns = NA, from = NA, to = NA, columns = NA,
..., ...,
endpoint = osem_endpoint(), endpoint = osem_endpoint(),
progress = T) { progress = T,
cache = NA) {
boxes = x boxes = x
environment() %>% environment() %>%
as.list() %>% as.list() %>%
@ -122,7 +128,8 @@ parse_get_measurements_params = function (params) {
query = list( query = list(
endpoint = params$endpoint, endpoint = params$endpoint,
phenomenon = params$phenomenon, phenomenon = params$phenomenon,
progress = params$progress progress = params$progress,
cache = params$cache
) )
if (!is.null(params$boxes)) query$boxId = paste(params$boxes$X_id, collapse = ',') if (!is.null(params$boxes)) query$boxId = paste(params$boxes$X_id, collapse = ',')

View file

@ -50,3 +50,30 @@ test_that("summary.sensebox outputs all metrics for a single box", {
expect_true(any(grepl('boxes by exposure:', msg))) expect_true(any(grepl('boxes by exposure:', msg)))
expect_true(any(grepl('boxes total: 1', msg))) expect_true(any(grepl('boxes total: 1', msg)))
}) })
test_that('requests can be cached', {
check_api()
osem_clear_cache(tempdir())
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
b = osem_box('57000b8745fd40c8196ad04c', cache = tempdir())
cacheFile = paste(
tempdir(),
opensensmapr:::osem_cache_filename('/boxes/57000b8745fd40c8196ad04c'),
sep = '/'
)
expect_true(file.exists(cacheFile))
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)
# no download output (works only in interactive mode..)
out = capture.output({
b = osem_box('57000b8745fd40c8196ad04c', cache = tempdir())
})
expect_length(out, 0)
expect_length(length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')), 1)
osem_clear_cache(tempdir())
expect_false(file.exists(cacheFile))
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
})

View file

@ -20,7 +20,7 @@ test_that('both from and to are required when requesting boxes, error otherwise'
test_that('a list of boxes with phenomenon filter returns only the requested phenomenon', { test_that('a list of boxes with phenomenon filter returns only the requested phenomenon', {
check_api() check_api()
boxes = osem_boxes(phenomenon='Temperatur', date=Sys.time()) boxes = osem_boxes(phenomenon = 'Temperatur', date=Sys.time())
expect_true(all(grep('Temperatur', boxes$phenomena))) expect_true(all(grep('Temperatur', boxes$phenomena)))
}) })
@ -139,3 +139,29 @@ test_that('summary.sensebox outputs all metrics for a set of boxes', {
expect_true(any(grepl('boxes by exposure:', msg))) expect_true(any(grepl('boxes by exposure:', msg)))
expect_true(any(grepl('boxes total:', msg))) expect_true(any(grepl('boxes total:', msg)))
}) })
test_that('requests can be cached', {
check_api()
osem_clear_cache()
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
b = osem_boxes(cache = tempdir())
cacheFile = paste(
tempdir(),
opensensmapr:::osem_cache_filename('/boxes'),
sep = '/'
)
expect_true(file.exists(cacheFile))
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)
# no download output (works only in interactive mode..)
out = capture.output({
b = osem_boxes(cache = tempdir())
})
expect_length(out, 0)
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)
osem_clear_cache()
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
})

View file

@ -10,3 +10,29 @@ test_that('counts can be retrieved as a list of numbers', {
expect_true(is.numeric(unlist(counts))) expect_true(is.numeric(unlist(counts)))
expect_length(counts, 3) expect_length(counts, 3)
}) })
test_that('requests can be cached', {
check_api()
osem_clear_cache()
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
c = osem_counts(cache = tempdir())
cacheFile = paste(
tempdir(),
opensensmapr:::osem_cache_filename('/stats'),
sep = '/'
)
expect_true(file.exists(cacheFile))
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)
# no download output (works only in interactive mode..)
out = capture.output({
c = osem_counts(cache = tempdir())
})
expect_length(out, 0)
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)
osem_clear_cache()
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
})

View file

@ -105,6 +105,17 @@ test_that('both from and to are required when requesting measurements, error oth
expect_error(osem_measurements(x = 'Temperature', to = as.POSIXct('2017-01-01')), 'only together with') expect_error(osem_measurements(x = 'Temperature', to = as.POSIXct('2017-01-01')), 'only together with')
}) })
test_that('phenomenon is required when requesting measurements, error otherwise', {
check_api()
expect_error(osem_measurements(), 'missing, with no default')
expect_error(osem_measurements(boxes), 'Parameter "phenomenon" is required')
sfc = sf::st_sfc(sf::st_linestring(x = matrix(data = c(7, 8, 50, 51), ncol = 2)), crs = 4326)
bbox = sf::st_bbox(sfc)
expect_error(osem_measurements(bbox), 'Parameter "phenomenon" is required')
})
test_that('[.osem_measurements maintains attributes', { test_that('[.osem_measurements maintains attributes', {
check_api() check_api()
@ -114,3 +125,23 @@ test_that('[.osem_measurements maintains attributes', {
expect_true(all(attributes(m[1:nrow(m), ]) %in% attributes(m))) expect_true(all(attributes(m[1:nrow(m), ]) %in% attributes(m)))
}) })
test_that('requests can be cached', {
check_api()
osem_clear_cache()
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
osem_measurements('Windrichtung', cache = tempdir())
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)
# no download output (works only in interactive mode..)
out = capture.output({
m = osem_measurements('Windrichtung', cache = tempdir())
})
expect_length(out, 0)
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)
osem_clear_cache()
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
})

View file

@ -20,8 +20,50 @@ This avoids..
- risk of API changes / API unavailability, - risk of API changes / API unavailability,
- stress on the openSenseMap-server. - stress on the openSenseMap-server.
This vignette shows how to use this built in `opensensmapr` feature, and
how to do it yourself, if you want to store to other data formats.
## Using openSensMapr Caching Feature
All data retrieval functions of `opensensmapr` have a built in caching feature,
which serializes an API response to disk.
Subsequent identical requests will then return the serialized data instead of making
another request.
To do so, each request is given a unique ID based on its parameters.
To use this feature, just add a path to a directory to the `cache` parameter:
```{r cache}
b = osem_boxes(cache = tempdir())
list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
# the next identical request will hit the cache only!
b = osem_boxes(cache = tempdir())
# requests without the cache parameter will still be performed normally
b = osem_boxes()
```
You can maintain multiple caches simultaneously which allows to store only
serialized data related to a script in its directory:
```{r cache_custom}
cacheDir = getwd() # current working directory
b = osem_boxes(cache = cacheDir)
# the next identical request will hit the cache only!
b = osem_boxes(cache = cacheDir)
```
To get fresh results again, just call `osem_clear_cache()` for the respective cache:
```{r clearcache}
osem_clear_cache() # clears default cache
osem_clear_cache(getwd()) # clears a custom cache
```
## Custom (De-) Serialization
If you want to roll your own serialization method to support custom data formats,
here's how:
```{r setup, results='hide'} ```{r setup, results='hide'}
# this vignette requires: # this section requires:
library(opensensmapr) library(opensensmapr)
library(jsonlite) library(jsonlite)
library(readr) library(readr)
@ -31,16 +73,7 @@ boxes = osem_boxes(grouptag = 'ifgi')
measurements = osem_measurements(boxes, phenomenon = 'PM10') measurements = osem_measurements(boxes, phenomenon = 'PM10')
``` ```
## (De-) Serializing Data If you are paranoid and worry about `.rds` files not being decodable anymore
The standard way of serialization in R is through the custom binary `.rds` (single object)
or `.RData` (full environment) formats:
```{r serialize_rds}
# serializing measurements to RDS, and loading it from the file again:
saveRDS(measurements, 'measurements.rds')
measurements_from_file = readRDS('measurements.rds')
```
Or, if you are paranoid and worry about `.rds` files not being decodable anymore
in the (distant) future, you could serialize to a plain text format such as JSON. in the (distant) future, you could serialize to a plain text format such as JSON.
This of course comes at the cost of storage space and performance. This of course comes at the cost of storage space and performance.
```{r serialize_json} ```{r serialize_json}