From dd6d8c85398c9ea7decdfeac2ef3542b3780574d Mon Sep 17 00:00:00 2001 From: noerw Date: Fri, 25 May 2018 01:34:47 +0200 Subject: [PATCH] add caching feature --- DESCRIPTION | 1 + NAMESPACE | 1 + R/api.R | 68 ++++++++++++++++++++++++++---- R/box.R | 20 ++++++--- R/counts.R | 4 +- R/measurement.R | 13 ++++-- tests/testthat/test_box.R | 27 ++++++++++++ tests/testthat/test_boxes.R | 28 +++++++++++- tests/testthat/test_counts.R | 26 ++++++++++++ tests/testthat/test_measurements.R | 31 ++++++++++++++ vignettes/osem-serialization.Rmd | 55 +++++++++++++++++++----- 11 files changed, 244 insertions(+), 30 deletions(-) diff --git a/DESCRIPTION b/DESCRIPTION index 10bc591..0f8ca5a 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -7,6 +7,7 @@ BugReports: http://github.com/noerw/opensensmapR/issues Imports: dplyr, httr, + digest, magrittr Suggests: maps, diff --git a/NAMESPACE b/NAMESPACE index cfff8db..14c7cab 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -19,6 +19,7 @@ export(osem_as_measurements) export(osem_as_sensebox) export(osem_box) export(osem_boxes) +export(osem_clear_cache) export(osem_counts) export(osem_endpoint) export(osem_measurements) diff --git a/R/api.R b/R/api.R index aabeee2..7f10223 100644 --- a/R/api.R +++ b/R/api.R @@ -12,7 +12,7 @@ osem_endpoint = function() { } get_boxes_ = function (..., endpoint) { - response = osem_request_(endpoint, path = c('boxes'), ...) + response = osem_get_resource(endpoint, path = c('boxes'), ...) if (length(response) == 0) { warning('no senseBoxes found for this query') @@ -29,13 +29,13 @@ get_boxes_ = function (..., endpoint) { df } -get_box_ = function (boxId, endpoint) { - osem_request_(endpoint, path = c('boxes', boxId), progress = F) %>% +get_box_ = function (boxId, endpoint, ...) { + osem_get_resource(endpoint, path = c('boxes', boxId), ..., progress = FALSE) %>% parse_senseboxdata() } get_measurements_ = function (..., endpoint) { - result = osem_request_(endpoint, c('boxes', 'data'), ..., type = 'text') + result = osem_get_resource(endpoint, c('boxes', 'data'), ..., type = 'text') # parse the CSV response manually & mute readr suppressWarnings({ @@ -53,15 +53,67 @@ get_measurements_ = function (..., endpoint) { osem_as_measurements(result) } -get_stats_ = function (endpoint) { - result = osem_request_(endpoint, path = c('stats'), progress = F) +get_stats_ = function (endpoint, cache) { + result = osem_get_resource(endpoint, path = c('stats'), progress = FALSE, cache = cache) names(result) = c('boxes', 'measurements', 'measurements_per_minute') result } -osem_request_ = function (host, path, ..., type = 'parsed', progress) { +#' Get any resource from openSenseMap API, possibly cache the response +#' +#' @param host API host +#' @param path resource URL +#' @param ... All other parameters interpreted as request query parameters +#' @param type Passed to httr; 'parsed' to return an R object from the response, 'text for a raw response +#' @param progress Boolean whether to print download progress information +#' @param cache Optional path to a directory were responses will be cached. If not NA, no requests will be made when a request for the given is already cached. +#' @return Result of a Request to openSenseMap API +#' @noRd +osem_get_resource = function (host, path, ..., type = 'parsed', progress = T, cache = NA) { + query = list(...) + if (!is.na(cache)) { + filename = osem_cache_filename(path, query, host) %>% paste(cache, ., sep = '/') + if (file.exists(filename)) + return(readRDS(filename)) + } + + res = osem_request_(host, path, query, type, progress) + if (!is.na(cache)) saveRDS(res, filename) + res +} + +osem_cache_filename = function (path, query = list(), host = osem_endpoint()) { + httr::modify_url(url = host, path = path, query = query) %>% + digest::digest(algo = 'sha1') %>% + paste('osemcache', ., 'rds', sep = '.') +} + +#' Purge cached responses from the given cache directory +#' +#' @param location A path to the cache directory, defaults to the +#' sessions' \code{tempdir()} +#' @return Boolean whether the deletion was successful +#' +#' @export +#' @examples +#' \donttest{ +#' osem_boxes(cache = tempdir()) +#' osem_clear_cache() +#' +#' cachedir = paste(getwd(), 'osemcache', sep = '/') +#' osem_boxes(cache = cachedir) +#' osem_clear_cache(cachedir) +#' } +osem_clear_cache = function (location = tempdir()) { + list.files(location, pattern = 'osemcache\\..*\\.rds') %>% + lapply(function (f) file.remove(paste(location, f, sep = '/'))) %>% + unlist() %>% + all() +} + +osem_request_ = function (host, path, query = list(), type = 'parsed', progress = TRUE) { progress = if (progress && !is_non_interactive()) httr::progress() else NULL - res = httr::GET(host, progress, path = path, query = list(...)) + res = httr::GET(host, progress, path = path, query = query) if (httr::http_error(res)) { content = httr::content(res, 'parsed', encoding = 'UTF-8') diff --git a/R/box.R b/R/box.R index 26bf9f3..b40e45e 100644 --- a/R/box.R +++ b/R/box.R @@ -19,14 +19,20 @@ #' @param phenomenon Only return boxes that measured the given phenomenon in the #' time interval as specified through \code{date} or \code{from / to} #' @param endpoint The URL of the openSenseMap API instance -#' @param progress Whether to print download progress information defaults to \code{TRUE} +#' @param progress Whether to print download progress information, defaults to \code{TRUE} +#' @param cache Whether to cache the result, defaults to false. +#' If a valid path to a directory is given, the response will be cached there. +#' Subsequent identical requests will return the cached data instead. #' @return A \code{sensebox data.frame} containing a box in each row #' #' @seealso \href{https://docs.opensensemap.org/#api-Measurements-findAllBoxes}{openSenseMap API documentation (web)} #' @seealso \code{\link{osem_phenomena}} #' @seealso \code{\link{osem_box}} +#' @seealso \code{\link{osem_clear_cache}} +#' #' @export #' @examples +#' #' \donttest{ #' # get *all* boxes available on the API #' b = osem_boxes() @@ -40,7 +46,8 @@ osem_boxes = function (exposure = NA, model = NA, grouptag = NA, date = NA, from = NA, to = NA, phenomenon = NA, endpoint = osem_endpoint(), - progress = TRUE) { + progress = TRUE, + cache = NA) { # error, if phenomenon, but no time given if (!is.na(phenomenon) && is.na(date) && is.na(to) && is.na(from)) @@ -58,7 +65,7 @@ osem_boxes = function (exposure = NA, model = NA, grouptag = NA, stop('Parameter "from"/"to" must be used together') } - query = list(endpoint = endpoint, progress = progress) + query = list(endpoint = endpoint, progress = progress, cache = cache) if (!is.na(exposure)) query$exposure = exposure if (!is.na(model)) query$model = model if (!is.na(grouptag)) query$grouptag = grouptag @@ -78,18 +85,21 @@ osem_boxes = function (exposure = NA, model = NA, grouptag = NA, #' #' @param boxId A string containing a senseBox ID #' @param endpoint The URL of the openSenseMap API instance +#' @param cache Whether to cache the result, defaults to false. +#' If a valid path to a directory is given, the response will be cached there. Subsequent identical requests will return the cached data instead. #' @return A \code{sensebox data.frame} containing a box in each row #' #' @seealso \href{https://docs.opensensemap.org/#api-Measurements-findAllBoxes}{openSenseMap API documentation (web)} #' @seealso \code{\link{osem_phenomena}} #' @seealso \code{\link{osem_boxes}} +#' @seealso \code{\link{osem_clear_cache}} #' @export #' @examples #' # get a specific box by ID #' b = osem_box('57000b8745fd40c8196ad04c') #' -osem_box = function (boxId, endpoint = osem_endpoint()) { - get_box_(boxId, endpoint = endpoint) +osem_box = function (boxId, endpoint = osem_endpoint(), cache = NA) { + get_box_(boxId, endpoint = endpoint, cache = cache) } # ============================================================================== diff --git a/R/counts.R b/R/counts.R index 854b002..fa0a1b0 100644 --- a/R/counts.R +++ b/R/counts.R @@ -11,6 +11,6 @@ #' #' @export #' @seealso \href{https://docs.opensensemap.org/#api-Misc-getStatistics}{openSenseMap API documentation (web)} -osem_counts = function(endpoint = osem_endpoint()) { - get_stats_(endpoint) +osem_counts = function(endpoint = osem_endpoint(), cache = NA) { + get_stats_(endpoint, cache) } diff --git a/R/measurement.R b/R/measurement.R index f345bbe..0fe1cce 100644 --- a/R/measurement.R +++ b/R/measurement.R @@ -21,13 +21,17 @@ #' @param columns Select specific column in the output (see openSenseMap API documentation) #' @param endpoint The URL of the openSenseMap API #' @param progress Whether to print download progress information +#' @param cache Whether to cache the result, defaults to false. +#' If a valid path to a directory is given, the response will be cached there. Subsequent identical requests will return the cached data instead. #' #' @return An \code{osem_measurements data.frame} containing the #' requested measurements #' #' @export #' @seealso \href{https://docs.opensensemap.org/#api-Measurements-getDataMulti}{openSenseMap API documentation (web)} +#' @seealso \code{\link{osem_box}} #' @seealso \code{\link{osem_boxes}} +#' @seealso \code{\link{osem_clear_cache}} osem_measurements = function (x, ...) UseMethod('osem_measurements') # ============================================================================== @@ -62,7 +66,8 @@ osem_measurements.bbox = function (x, phenomenon, exposure = NA, from = NA, to = NA, columns = NA, ..., endpoint = osem_endpoint(), - progress = T) { + progress = T, + cache = NA) { bbox = x environment() %>% as.list() %>% @@ -88,7 +93,8 @@ osem_measurements.sensebox = function (x, phenomenon, exposure = NA, from = NA, to = NA, columns = NA, ..., endpoint = osem_endpoint(), - progress = T) { + progress = T, + cache = NA) { boxes = x environment() %>% as.list() %>% @@ -122,7 +128,8 @@ parse_get_measurements_params = function (params) { query = list( endpoint = params$endpoint, phenomenon = params$phenomenon, - progress = params$progress + progress = params$progress, + cache = params$cache ) if (!is.null(params$boxes)) query$boxId = paste(params$boxes$X_id, collapse = ',') diff --git a/tests/testthat/test_box.R b/tests/testthat/test_box.R index 9a05b63..bbf52e2 100644 --- a/tests/testthat/test_box.R +++ b/tests/testthat/test_box.R @@ -50,3 +50,30 @@ test_that("summary.sensebox outputs all metrics for a single box", { expect_true(any(grepl('boxes by exposure:', msg))) expect_true(any(grepl('boxes total: 1', msg))) }) + +test_that('requests can be cached', { + check_api() + + osem_clear_cache(tempdir()) + expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0) + b = osem_box('57000b8745fd40c8196ad04c', cache = tempdir()) + + cacheFile = paste( + tempdir(), + opensensmapr:::osem_cache_filename('/boxes/57000b8745fd40c8196ad04c'), + sep = '/' + ) + expect_true(file.exists(cacheFile)) + expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1) + + # no download output (works only in interactive mode..) + out = capture.output({ + b = osem_box('57000b8745fd40c8196ad04c', cache = tempdir()) + }) + expect_length(out, 0) + expect_length(length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')), 1) + + osem_clear_cache(tempdir()) + expect_false(file.exists(cacheFile)) + expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0) +}) diff --git a/tests/testthat/test_boxes.R b/tests/testthat/test_boxes.R index 271a466..03b43d6 100644 --- a/tests/testthat/test_boxes.R +++ b/tests/testthat/test_boxes.R @@ -20,7 +20,7 @@ test_that('both from and to are required when requesting boxes, error otherwise' test_that('a list of boxes with phenomenon filter returns only the requested phenomenon', { check_api() - boxes = osem_boxes(phenomenon='Temperatur', date=Sys.time()) + boxes = osem_boxes(phenomenon = 'Temperatur', date=Sys.time()) expect_true(all(grep('Temperatur', boxes$phenomena))) }) @@ -139,3 +139,29 @@ test_that('summary.sensebox outputs all metrics for a set of boxes', { expect_true(any(grepl('boxes by exposure:', msg))) expect_true(any(grepl('boxes total:', msg))) }) + +test_that('requests can be cached', { + check_api() + + osem_clear_cache() + expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0) + b = osem_boxes(cache = tempdir()) + + cacheFile = paste( + tempdir(), + opensensmapr:::osem_cache_filename('/boxes'), + sep = '/' + ) + expect_true(file.exists(cacheFile)) + expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1) + + # no download output (works only in interactive mode..) + out = capture.output({ + b = osem_boxes(cache = tempdir()) + }) + expect_length(out, 0) + expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1) + + osem_clear_cache() + expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0) +}) diff --git a/tests/testthat/test_counts.R b/tests/testthat/test_counts.R index 5678ac2..b18e4aa 100644 --- a/tests/testthat/test_counts.R +++ b/tests/testthat/test_counts.R @@ -10,3 +10,29 @@ test_that('counts can be retrieved as a list of numbers', { expect_true(is.numeric(unlist(counts))) expect_length(counts, 3) }) + +test_that('requests can be cached', { + check_api() + + osem_clear_cache() + expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0) + c = osem_counts(cache = tempdir()) + + cacheFile = paste( + tempdir(), + opensensmapr:::osem_cache_filename('/stats'), + sep = '/' + ) + expect_true(file.exists(cacheFile)) + expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1) + + # no download output (works only in interactive mode..) + out = capture.output({ + c = osem_counts(cache = tempdir()) + }) + expect_length(out, 0) + expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1) + + osem_clear_cache() + expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0) +}) diff --git a/tests/testthat/test_measurements.R b/tests/testthat/test_measurements.R index 9d181cc..fab5425 100644 --- a/tests/testthat/test_measurements.R +++ b/tests/testthat/test_measurements.R @@ -105,6 +105,17 @@ test_that('both from and to are required when requesting measurements, error oth expect_error(osem_measurements(x = 'Temperature', to = as.POSIXct('2017-01-01')), 'only together with') }) +test_that('phenomenon is required when requesting measurements, error otherwise', { + check_api() + + expect_error(osem_measurements(), 'missing, with no default') + expect_error(osem_measurements(boxes), 'Parameter "phenomenon" is required') + + sfc = sf::st_sfc(sf::st_linestring(x = matrix(data = c(7, 8, 50, 51), ncol = 2)), crs = 4326) + bbox = sf::st_bbox(sfc) + expect_error(osem_measurements(bbox), 'Parameter "phenomenon" is required') +}) + test_that('[.osem_measurements maintains attributes', { check_api() @@ -114,3 +125,23 @@ test_that('[.osem_measurements maintains attributes', { expect_true(all(attributes(m[1:nrow(m), ]) %in% attributes(m))) }) + +test_that('requests can be cached', { + check_api() + + osem_clear_cache() + expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0) + osem_measurements('Windrichtung', cache = tempdir()) + + expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1) + + # no download output (works only in interactive mode..) + out = capture.output({ + m = osem_measurements('Windrichtung', cache = tempdir()) + }) + expect_length(out, 0) + expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1) + + osem_clear_cache() + expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0) +}) diff --git a/vignettes/osem-serialization.Rmd b/vignettes/osem-serialization.Rmd index a56d196..f7812af 100644 --- a/vignettes/osem-serialization.Rmd +++ b/vignettes/osem-serialization.Rmd @@ -20,8 +20,50 @@ This avoids.. - risk of API changes / API unavailability, - stress on the openSenseMap-server. +This vignette shows how to use this built in `opensensmapr` feature, and +how to do it yourself, if you want to store to other data formats. + +## Using openSensMapr Caching Feature +All data retrieval functions of `opensensmapr` have a built in caching feature, +which serializes an API response to disk. +Subsequent identical requests will then return the serialized data instead of making +another request. +To do so, each request is given a unique ID based on its parameters. + +To use this feature, just add a path to a directory to the `cache` parameter: +```{r cache} +b = osem_boxes(cache = tempdir()) +list.files(tempdir(), pattern = 'osemcache\\..*\\.rds') + +# the next identical request will hit the cache only! +b = osem_boxes(cache = tempdir()) + +# requests without the cache parameter will still be performed normally +b = osem_boxes() +``` + +You can maintain multiple caches simultaneously which allows to store only +serialized data related to a script in its directory: +```{r cache_custom} +cacheDir = getwd() # current working directory +b = osem_boxes(cache = cacheDir) + +# the next identical request will hit the cache only! +b = osem_boxes(cache = cacheDir) +``` + +To get fresh results again, just call `osem_clear_cache()` for the respective cache: +```{r clearcache} +osem_clear_cache() # clears default cache +osem_clear_cache(getwd()) # clears a custom cache +``` + +## Custom (De-) Serialization +If you want to roll your own serialization method to support custom data formats, +here's how: + ```{r setup, results='hide'} -# this vignette requires: +# this section requires: library(opensensmapr) library(jsonlite) library(readr) @@ -31,16 +73,7 @@ boxes = osem_boxes(grouptag = 'ifgi') measurements = osem_measurements(boxes, phenomenon = 'PM10') ``` -## (De-) Serializing Data -The standard way of serialization in R is through the custom binary `.rds` (single object) -or `.RData` (full environment) formats: -```{r serialize_rds} -# serializing measurements to RDS, and loading it from the file again: -saveRDS(measurements, 'measurements.rds') -measurements_from_file = readRDS('measurements.rds') -``` - -Or, if you are paranoid and worry about `.rds` files not being decodable anymore +If you are paranoid and worry about `.rds` files not being decodable anymore in the (distant) future, you could serialize to a plain text format such as JSON. This of course comes at the cost of storage space and performance. ```{r serialize_json}