Browse Source

add caching feature

tags/v0.4.0
noerw 1 year ago
parent
commit
dd6d8c8539

+ 1
- 0
DESCRIPTION View File

@@ -7,6 +7,7 @@ BugReports: http://github.com/noerw/opensensmapR/issues
Imports:
dplyr,
httr,
digest,
magrittr
Suggests:
maps,

+ 1
- 0
NAMESPACE View File

@@ -19,6 +19,7 @@ export(osem_as_measurements)
export(osem_as_sensebox)
export(osem_box)
export(osem_boxes)
export(osem_clear_cache)
export(osem_counts)
export(osem_endpoint)
export(osem_measurements)

+ 60
- 8
R/api.R View File

@@ -12,7 +12,7 @@ osem_endpoint = function() {
}

get_boxes_ = function (..., endpoint) {
response = osem_request_(endpoint, path = c('boxes'), ...)
response = osem_get_resource(endpoint, path = c('boxes'), ...)

if (length(response) == 0) {
warning('no senseBoxes found for this query')
@@ -29,13 +29,13 @@ get_boxes_ = function (..., endpoint) {
df
}

get_box_ = function (boxId, endpoint) {
osem_request_(endpoint, path = c('boxes', boxId), progress = F) %>%
get_box_ = function (boxId, endpoint, ...) {
osem_get_resource(endpoint, path = c('boxes', boxId), ..., progress = FALSE) %>%
parse_senseboxdata()
}

get_measurements_ = function (..., endpoint) {
result = osem_request_(endpoint, c('boxes', 'data'), ..., type = 'text')
result = osem_get_resource(endpoint, c('boxes', 'data'), ..., type = 'text')

# parse the CSV response manually & mute readr
suppressWarnings({
@@ -53,15 +53,67 @@ get_measurements_ = function (..., endpoint) {
osem_as_measurements(result)
}

get_stats_ = function (endpoint) {
result = osem_request_(endpoint, path = c('stats'), progress = F)
get_stats_ = function (endpoint, cache) {
result = osem_get_resource(endpoint, path = c('stats'), progress = FALSE, cache = cache)
names(result) = c('boxes', 'measurements', 'measurements_per_minute')
result
}

osem_request_ = function (host, path, ..., type = 'parsed', progress) {
#' Get any resource from openSenseMap API, possibly cache the response
#'
#' @param host API host
#' @param path resource URL
#' @param ... All other parameters interpreted as request query parameters
#' @param type Passed to httr; 'parsed' to return an R object from the response, 'text for a raw response
#' @param progress Boolean whether to print download progress information
#' @param cache Optional path to a directory were responses will be cached. If not NA, no requests will be made when a request for the given is already cached.
#' @return Result of a Request to openSenseMap API
#' @noRd
osem_get_resource = function (host, path, ..., type = 'parsed', progress = T, cache = NA) {
query = list(...)
if (!is.na(cache)) {
filename = osem_cache_filename(path, query, host) %>% paste(cache, ., sep = '/')
if (file.exists(filename))
return(readRDS(filename))
}

res = osem_request_(host, path, query, type, progress)
if (!is.na(cache)) saveRDS(res, filename)
res
}

osem_cache_filename = function (path, query = list(), host = osem_endpoint()) {
httr::modify_url(url = host, path = path, query = query) %>%
digest::digest(algo = 'sha1') %>%
paste('osemcache', ., 'rds', sep = '.')
}

#' Purge cached responses from the given cache directory
#'
#' @param location A path to the cache directory, defaults to the
#' sessions' \code{tempdir()}
#' @return Boolean whether the deletion was successful
#'
#' @export
#' @examples
#' \donttest{
#' osem_boxes(cache = tempdir())
#' osem_clear_cache()
#'
#' cachedir = paste(getwd(), 'osemcache', sep = '/')
#' osem_boxes(cache = cachedir)
#' osem_clear_cache(cachedir)
#' }
osem_clear_cache = function (location = tempdir()) {
list.files(location, pattern = 'osemcache\\..*\\.rds') %>%
lapply(function (f) file.remove(paste(location, f, sep = '/'))) %>%
unlist() %>%
all()
}

osem_request_ = function (host, path, query = list(), type = 'parsed', progress = TRUE) {
progress = if (progress && !is_non_interactive()) httr::progress() else NULL
res = httr::GET(host, progress, path = path, query = list(...))
res = httr::GET(host, progress, path = path, query = query)

if (httr::http_error(res)) {
content = httr::content(res, 'parsed', encoding = 'UTF-8')

+ 15
- 5
R/box.R View File

@@ -19,14 +19,20 @@
#' @param phenomenon Only return boxes that measured the given phenomenon in the
#' time interval as specified through \code{date} or \code{from / to}
#' @param endpoint The URL of the openSenseMap API instance
#' @param progress Whether to print download progress information defaults to \code{TRUE}
#' @param progress Whether to print download progress information, defaults to \code{TRUE}
#' @param cache Whether to cache the result, defaults to false.
#' If a valid path to a directory is given, the response will be cached there.
#' Subsequent identical requests will return the cached data instead.
#' @return A \code{sensebox data.frame} containing a box in each row
#'
#' @seealso \href{https://docs.opensensemap.org/#api-Measurements-findAllBoxes}{openSenseMap API documentation (web)}
#' @seealso \code{\link{osem_phenomena}}
#' @seealso \code{\link{osem_box}}
#' @seealso \code{\link{osem_clear_cache}}
#'
#' @export
#' @examples
#'
#' \donttest{
#' # get *all* boxes available on the API
#' b = osem_boxes()
@@ -40,7 +46,8 @@
osem_boxes = function (exposure = NA, model = NA, grouptag = NA,
date = NA, from = NA, to = NA, phenomenon = NA,
endpoint = osem_endpoint(),
progress = TRUE) {
progress = TRUE,
cache = NA) {

# error, if phenomenon, but no time given
if (!is.na(phenomenon) && is.na(date) && is.na(to) && is.na(from))
@@ -58,7 +65,7 @@ osem_boxes = function (exposure = NA, model = NA, grouptag = NA,
stop('Parameter "from"/"to" must be used together')
}

query = list(endpoint = endpoint, progress = progress)
query = list(endpoint = endpoint, progress = progress, cache = cache)
if (!is.na(exposure)) query$exposure = exposure
if (!is.na(model)) query$model = model
if (!is.na(grouptag)) query$grouptag = grouptag
@@ -78,18 +85,21 @@ osem_boxes = function (exposure = NA, model = NA, grouptag = NA,
#'
#' @param boxId A string containing a senseBox ID
#' @param endpoint The URL of the openSenseMap API instance
#' @param cache Whether to cache the result, defaults to false.
#' If a valid path to a directory is given, the response will be cached there. Subsequent identical requests will return the cached data instead.
#' @return A \code{sensebox data.frame} containing a box in each row
#'
#' @seealso \href{https://docs.opensensemap.org/#api-Measurements-findAllBoxes}{openSenseMap API documentation (web)}
#' @seealso \code{\link{osem_phenomena}}
#' @seealso \code{\link{osem_boxes}}
#' @seealso \code{\link{osem_clear_cache}}
#' @export
#' @examples
#' # get a specific box by ID
#' b = osem_box('57000b8745fd40c8196ad04c')
#'
osem_box = function (boxId, endpoint = osem_endpoint()) {
get_box_(boxId, endpoint = endpoint)
osem_box = function (boxId, endpoint = osem_endpoint(), cache = NA) {
get_box_(boxId, endpoint = endpoint, cache = cache)
}

# ==============================================================================

+ 2
- 2
R/counts.R View File

@@ -11,6 +11,6 @@
#'
#' @export
#' @seealso \href{https://docs.opensensemap.org/#api-Misc-getStatistics}{openSenseMap API documentation (web)}
osem_counts = function(endpoint = osem_endpoint()) {
get_stats_(endpoint)
osem_counts = function(endpoint = osem_endpoint(), cache = NA) {
get_stats_(endpoint, cache)
}

+ 10
- 3
R/measurement.R View File

@@ -21,13 +21,17 @@
#' @param columns Select specific column in the output (see openSenseMap API documentation)
#' @param endpoint The URL of the openSenseMap API
#' @param progress Whether to print download progress information
#' @param cache Whether to cache the result, defaults to false.
#' If a valid path to a directory is given, the response will be cached there. Subsequent identical requests will return the cached data instead.
#'
#' @return An \code{osem_measurements data.frame} containing the
#' requested measurements
#'
#' @export
#' @seealso \href{https://docs.opensensemap.org/#api-Measurements-getDataMulti}{openSenseMap API documentation (web)}
#' @seealso \code{\link{osem_box}}
#' @seealso \code{\link{osem_boxes}}
#' @seealso \code{\link{osem_clear_cache}}
osem_measurements = function (x, ...) UseMethod('osem_measurements')

# ==============================================================================
@@ -62,7 +66,8 @@ osem_measurements.bbox = function (x, phenomenon, exposure = NA,
from = NA, to = NA, columns = NA,
...,
endpoint = osem_endpoint(),
progress = T) {
progress = T,
cache = NA) {
bbox = x
environment() %>%
as.list() %>%
@@ -88,7 +93,8 @@ osem_measurements.sensebox = function (x, phenomenon, exposure = NA,
from = NA, to = NA, columns = NA,
...,
endpoint = osem_endpoint(),
progress = T) {
progress = T,
cache = NA) {
boxes = x
environment() %>%
as.list() %>%
@@ -122,7 +128,8 @@ parse_get_measurements_params = function (params) {
query = list(
endpoint = params$endpoint,
phenomenon = params$phenomenon,
progress = params$progress
progress = params$progress,
cache = params$cache
)

if (!is.null(params$boxes)) query$boxId = paste(params$boxes$X_id, collapse = ',')

+ 27
- 0
tests/testthat/test_box.R View File

@@ -50,3 +50,30 @@ test_that("summary.sensebox outputs all metrics for a single box", {
expect_true(any(grepl('boxes by exposure:', msg)))
expect_true(any(grepl('boxes total: 1', msg)))
})

test_that('requests can be cached', {
check_api()

osem_clear_cache(tempdir())
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
b = osem_box('57000b8745fd40c8196ad04c', cache = tempdir())

cacheFile = paste(
tempdir(),
opensensmapr:::osem_cache_filename('/boxes/57000b8745fd40c8196ad04c'),
sep = '/'
)
expect_true(file.exists(cacheFile))
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)

# no download output (works only in interactive mode..)
out = capture.output({
b = osem_box('57000b8745fd40c8196ad04c', cache = tempdir())
})
expect_length(out, 0)
expect_length(length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')), 1)

osem_clear_cache(tempdir())
expect_false(file.exists(cacheFile))
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
})

+ 27
- 1
tests/testthat/test_boxes.R View File

@@ -20,7 +20,7 @@ test_that('both from and to are required when requesting boxes, error otherwise'
test_that('a list of boxes with phenomenon filter returns only the requested phenomenon', {
check_api()

boxes = osem_boxes(phenomenon='Temperatur', date=Sys.time())
boxes = osem_boxes(phenomenon = 'Temperatur', date=Sys.time())
expect_true(all(grep('Temperatur', boxes$phenomena)))
})

@@ -139,3 +139,29 @@ test_that('summary.sensebox outputs all metrics for a set of boxes', {
expect_true(any(grepl('boxes by exposure:', msg)))
expect_true(any(grepl('boxes total:', msg)))
})

test_that('requests can be cached', {
check_api()

osem_clear_cache()
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
b = osem_boxes(cache = tempdir())

cacheFile = paste(
tempdir(),
opensensmapr:::osem_cache_filename('/boxes'),
sep = '/'
)
expect_true(file.exists(cacheFile))
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)

# no download output (works only in interactive mode..)
out = capture.output({
b = osem_boxes(cache = tempdir())
})
expect_length(out, 0)
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)

osem_clear_cache()
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
})

+ 26
- 0
tests/testthat/test_counts.R View File

@@ -10,3 +10,29 @@ test_that('counts can be retrieved as a list of numbers', {
expect_true(is.numeric(unlist(counts)))
expect_length(counts, 3)
})

test_that('requests can be cached', {
check_api()

osem_clear_cache()
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
c = osem_counts(cache = tempdir())

cacheFile = paste(
tempdir(),
opensensmapr:::osem_cache_filename('/stats'),
sep = '/'
)
expect_true(file.exists(cacheFile))
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)

# no download output (works only in interactive mode..)
out = capture.output({
c = osem_counts(cache = tempdir())
})
expect_length(out, 0)
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)

osem_clear_cache()
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
})

+ 31
- 0
tests/testthat/test_measurements.R View File

@@ -105,6 +105,17 @@ test_that('both from and to are required when requesting measurements, error oth
expect_error(osem_measurements(x = 'Temperature', to = as.POSIXct('2017-01-01')), 'only together with')
})

test_that('phenomenon is required when requesting measurements, error otherwise', {
check_api()

expect_error(osem_measurements(), 'missing, with no default')
expect_error(osem_measurements(boxes), 'Parameter "phenomenon" is required')

sfc = sf::st_sfc(sf::st_linestring(x = matrix(data = c(7, 8, 50, 51), ncol = 2)), crs = 4326)
bbox = sf::st_bbox(sfc)
expect_error(osem_measurements(bbox), 'Parameter "phenomenon" is required')
})

test_that('[.osem_measurements maintains attributes', {
check_api()

@@ -114,3 +125,23 @@ test_that('[.osem_measurements maintains attributes', {

expect_true(all(attributes(m[1:nrow(m), ]) %in% attributes(m)))
})

test_that('requests can be cached', {
check_api()

osem_clear_cache()
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
osem_measurements('Windrichtung', cache = tempdir())

expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)

# no download output (works only in interactive mode..)
out = capture.output({
m = osem_measurements('Windrichtung', cache = tempdir())
})
expect_length(out, 0)
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)

osem_clear_cache()
expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
})

+ 44
- 11
vignettes/osem-serialization.Rmd View File

@@ -20,8 +20,50 @@ This avoids..
- risk of API changes / API unavailability,
- stress on the openSenseMap-server.

This vignette shows how to use this built in `opensensmapr` feature, and
how to do it yourself, if you want to store to other data formats.

## Using openSensMapr Caching Feature
All data retrieval functions of `opensensmapr` have a built in caching feature,
which serializes an API response to disk.
Subsequent identical requests will then return the serialized data instead of making
another request.
To do so, each request is given a unique ID based on its parameters.

To use this feature, just add a path to a directory to the `cache` parameter:
```{r cache}
b = osem_boxes(cache = tempdir())
list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')

# the next identical request will hit the cache only!
b = osem_boxes(cache = tempdir())

# requests without the cache parameter will still be performed normally
b = osem_boxes()
```

You can maintain multiple caches simultaneously which allows to store only
serialized data related to a script in its directory:
```{r cache_custom}
cacheDir = getwd() # current working directory
b = osem_boxes(cache = cacheDir)

# the next identical request will hit the cache only!
b = osem_boxes(cache = cacheDir)
```

To get fresh results again, just call `osem_clear_cache()` for the respective cache:
```{r clearcache}
osem_clear_cache() # clears default cache
osem_clear_cache(getwd()) # clears a custom cache
```

## Custom (De-) Serialization
If you want to roll your own serialization method to support custom data formats,
here's how:

```{r setup, results='hide'}
# this vignette requires:
# this section requires:
library(opensensmapr)
library(jsonlite)
library(readr)
@@ -31,16 +73,7 @@ boxes = osem_boxes(grouptag = 'ifgi')
measurements = osem_measurements(boxes, phenomenon = 'PM10')
```

## (De-) Serializing Data
The standard way of serialization in R is through the custom binary `.rds` (single object)
or `.RData` (full environment) formats:
```{r serialize_rds}
# serializing measurements to RDS, and loading it from the file again:
saveRDS(measurements, 'measurements.rds')
measurements_from_file = readRDS('measurements.rds')
```

Or, if you are paranoid and worry about `.rds` files not being decodable anymore
If you are paranoid and worry about `.rds` files not being decodable anymore
in the (distant) future, you could serialize to a plain text format such as JSON.
This of course comes at the cost of storage space and performance.
```{r serialize_json}

Loading…
Cancel
Save