Browse Source

add caching feature

tags/v0.4.0
noerw 1 year ago
parent
commit
dd6d8c8539

+ 1
- 0
DESCRIPTION View File

@@ -7,6 +7,7 @@ BugReports: http://github.com/noerw/opensensmapR/issues
7 7
 Imports:
8 8
     dplyr,
9 9
     httr,
10
+    digest,
10 11
     magrittr
11 12
 Suggests:
12 13
     maps,

+ 1
- 0
NAMESPACE View File

@@ -19,6 +19,7 @@ export(osem_as_measurements)
19 19
 export(osem_as_sensebox)
20 20
 export(osem_box)
21 21
 export(osem_boxes)
22
+export(osem_clear_cache)
22 23
 export(osem_counts)
23 24
 export(osem_endpoint)
24 25
 export(osem_measurements)

+ 60
- 8
R/api.R View File

@@ -12,7 +12,7 @@ osem_endpoint = function() {
12 12
 }
13 13
 
14 14
 get_boxes_ = function (..., endpoint) {
15
-  response = osem_request_(endpoint, path = c('boxes'), ...)
15
+  response = osem_get_resource(endpoint, path = c('boxes'), ...)
16 16
 
17 17
   if (length(response) == 0) {
18 18
     warning('no senseBoxes found for this query')
@@ -29,13 +29,13 @@ get_boxes_ = function (..., endpoint) {
29 29
   df
30 30
 }
31 31
 
32
-get_box_ = function (boxId, endpoint) {
33
-  osem_request_(endpoint, path = c('boxes', boxId), progress = F) %>%
32
+get_box_ = function (boxId, endpoint, ...) {
33
+  osem_get_resource(endpoint, path = c('boxes', boxId), ..., progress = FALSE) %>%
34 34
     parse_senseboxdata()
35 35
 }
36 36
 
37 37
 get_measurements_ = function (..., endpoint) {
38
-  result = osem_request_(endpoint, c('boxes', 'data'), ..., type = 'text')
38
+  result = osem_get_resource(endpoint, c('boxes', 'data'), ..., type = 'text')
39 39
 
40 40
   # parse the CSV response manually & mute readr
41 41
   suppressWarnings({
@@ -53,15 +53,67 @@ get_measurements_ = function (..., endpoint) {
53 53
   osem_as_measurements(result)
54 54
 }
55 55
 
56
-get_stats_ = function (endpoint) {
57
-  result = osem_request_(endpoint, path = c('stats'), progress = F)
56
+get_stats_ = function (endpoint, cache) {
57
+  result = osem_get_resource(endpoint, path = c('stats'), progress = FALSE, cache = cache)
58 58
   names(result) = c('boxes', 'measurements', 'measurements_per_minute')
59 59
   result
60 60
 }
61 61
 
62
-osem_request_ = function (host, path, ..., type = 'parsed', progress) {
62
+#' Get any resource from openSenseMap API, possibly cache the response
63
+#'
64
+#' @param host API host
65
+#' @param path resource URL
66
+#' @param ... All other parameters interpreted as request query parameters
67
+#' @param type Passed to httr; 'parsed' to return an R object from the response, 'text for a raw response
68
+#' @param progress Boolean whether to print download progress information
69
+#' @param cache Optional path to a directory were responses will be cached. If not NA, no requests will be made when a request for the given is already cached.
70
+#' @return Result of a Request to openSenseMap API
71
+#' @noRd
72
+osem_get_resource = function (host, path, ..., type = 'parsed', progress = T, cache = NA) {
73
+  query = list(...)
74
+  if (!is.na(cache)) {
75
+    filename = osem_cache_filename(path, query, host) %>% paste(cache, ., sep = '/')
76
+    if (file.exists(filename))
77
+      return(readRDS(filename))
78
+  }
79
+
80
+  res = osem_request_(host, path, query, type, progress)
81
+  if (!is.na(cache)) saveRDS(res, filename)
82
+  res
83
+}
84
+
85
+osem_cache_filename = function (path, query = list(), host = osem_endpoint()) {
86
+  httr::modify_url(url = host, path = path, query = query) %>%
87
+    digest::digest(algo = 'sha1') %>%
88
+    paste('osemcache', ., 'rds', sep = '.')
89
+}
90
+
91
+#' Purge cached responses from the given cache directory
92
+#'
93
+#' @param location A path to the cache directory, defaults to the
94
+#'   sessions' \code{tempdir()}
95
+#' @return Boolean whether the deletion was successful
96
+#'
97
+#' @export
98
+#' @examples
99
+#' \donttest{
100
+#'   osem_boxes(cache = tempdir())
101
+#'   osem_clear_cache()
102
+#'
103
+#'   cachedir = paste(getwd(), 'osemcache', sep = '/')
104
+#'   osem_boxes(cache = cachedir)
105
+#'   osem_clear_cache(cachedir)
106
+#' }
107
+osem_clear_cache = function (location = tempdir()) {
108
+  list.files(location, pattern = 'osemcache\\..*\\.rds') %>%
109
+    lapply(function (f) file.remove(paste(location, f, sep = '/'))) %>%
110
+    unlist() %>%
111
+    all()
112
+}
113
+
114
+osem_request_ = function (host, path, query = list(), type = 'parsed', progress = TRUE) {
63 115
   progress = if (progress && !is_non_interactive()) httr::progress() else NULL
64
-  res = httr::GET(host, progress, path = path, query = list(...))
116
+  res = httr::GET(host, progress, path = path, query = query)
65 117
 
66 118
   if (httr::http_error(res)) {
67 119
     content = httr::content(res, 'parsed', encoding = 'UTF-8')

+ 15
- 5
R/box.R View File

@@ -19,14 +19,20 @@
19 19
 #' @param phenomenon Only return boxes that measured the given phenomenon in the
20 20
 #'   time interval as specified through \code{date} or \code{from / to}
21 21
 #' @param endpoint The URL of the openSenseMap API instance
22
-#' @param progress Whether to print download progress information defaults to \code{TRUE}
22
+#' @param progress Whether to print download progress information, defaults to \code{TRUE}
23
+#' @param cache Whether to cache the result, defaults to false.
24
+#'   If a valid path to a directory is given, the response will be cached there.
25
+#'   Subsequent identical requests will return the cached data instead.
23 26
 #' @return A \code{sensebox data.frame} containing a box in each row
24 27
 #'
25 28
 #' @seealso \href{https://docs.opensensemap.org/#api-Measurements-findAllBoxes}{openSenseMap API documentation (web)}
26 29
 #' @seealso \code{\link{osem_phenomena}}
27 30
 #' @seealso \code{\link{osem_box}}
31
+#' @seealso \code{\link{osem_clear_cache}}
32
+#'
28 33
 #' @export
29 34
 #' @examples
35
+#'
30 36
 #' \donttest{
31 37
 #'   # get *all* boxes available on the API
32 38
 #'   b = osem_boxes()
@@ -40,7 +46,8 @@
40 46
 osem_boxes = function (exposure = NA, model = NA, grouptag = NA,
41 47
                       date = NA, from = NA, to = NA, phenomenon = NA,
42 48
                       endpoint = osem_endpoint(),
43
-                      progress = TRUE) {
49
+                      progress = TRUE,
50
+                      cache = NA) {
44 51
 
45 52
   # error, if phenomenon, but no time given
46 53
   if (!is.na(phenomenon) && is.na(date) && is.na(to) && is.na(from))
@@ -58,7 +65,7 @@ osem_boxes = function (exposure = NA, model = NA, grouptag = NA,
58 65
    stop('Parameter "from"/"to" must be used together')
59 66
   }
60 67
 
61
-  query = list(endpoint = endpoint, progress = progress)
68
+  query = list(endpoint = endpoint, progress = progress, cache = cache)
62 69
   if (!is.na(exposure)) query$exposure = exposure
63 70
   if (!is.na(model)) query$model = model
64 71
   if (!is.na(grouptag)) query$grouptag = grouptag
@@ -78,18 +85,21 @@ osem_boxes = function (exposure = NA, model = NA, grouptag = NA,
78 85
 #'
79 86
 #' @param boxId A string containing a senseBox ID
80 87
 #' @param endpoint The URL of the openSenseMap API instance
88
+#' @param cache Whether to cache the result, defaults to false.
89
+#'   If a valid path to a directory is given, the response will be cached there. Subsequent identical requests will return the cached data instead.
81 90
 #' @return A \code{sensebox data.frame} containing a box in each row
82 91
 #'
83 92
 #' @seealso \href{https://docs.opensensemap.org/#api-Measurements-findAllBoxes}{openSenseMap API documentation (web)}
84 93
 #' @seealso \code{\link{osem_phenomena}}
85 94
 #' @seealso \code{\link{osem_boxes}}
95
+#' @seealso \code{\link{osem_clear_cache}}
86 96
 #' @export
87 97
 #' @examples
88 98
 #' # get a specific box by ID
89 99
 #' b = osem_box('57000b8745fd40c8196ad04c')
90 100
 #'
91
-osem_box = function (boxId, endpoint = osem_endpoint()) {
92
-  get_box_(boxId, endpoint = endpoint)
101
+osem_box = function (boxId, endpoint = osem_endpoint(), cache = NA) {
102
+  get_box_(boxId, endpoint = endpoint, cache = cache)
93 103
 }
94 104
 
95 105
 # ==============================================================================

+ 2
- 2
R/counts.R View File

@@ -11,6 +11,6 @@
11 11
 #'
12 12
 #' @export
13 13
 #' @seealso \href{https://docs.opensensemap.org/#api-Misc-getStatistics}{openSenseMap API documentation (web)}
14
-osem_counts = function(endpoint = osem_endpoint()) {
15
-  get_stats_(endpoint)
14
+osem_counts = function(endpoint = osem_endpoint(), cache = NA) {
15
+  get_stats_(endpoint, cache)
16 16
 }

+ 10
- 3
R/measurement.R View File

@@ -21,13 +21,17 @@
21 21
 #' @param columns Select specific column in the output (see openSenseMap API documentation)
22 22
 #' @param endpoint The URL of the openSenseMap API
23 23
 #' @param progress Whether to print download progress information
24
+#' @param cache Whether to cache the result, defaults to false.
25
+#'   If a valid path to a directory is given, the response will be cached there. Subsequent identical requests will return the cached data instead.
24 26
 #'
25 27
 #' @return An \code{osem_measurements data.frame} containing the
26 28
 #'   requested measurements
27 29
 #'
28 30
 #' @export
29 31
 #' @seealso \href{https://docs.opensensemap.org/#api-Measurements-getDataMulti}{openSenseMap API documentation (web)}
32
+#' @seealso \code{\link{osem_box}}
30 33
 #' @seealso \code{\link{osem_boxes}}
34
+#' @seealso \code{\link{osem_clear_cache}}
31 35
 osem_measurements = function (x, ...) UseMethod('osem_measurements')
32 36
 
33 37
 # ==============================================================================
@@ -62,7 +66,8 @@ osem_measurements.bbox = function (x, phenomenon, exposure = NA,
62 66
                                    from = NA, to = NA, columns = NA,
63 67
                                    ...,
64 68
                                    endpoint = osem_endpoint(),
65
-                                   progress = T) {
69
+                                   progress = T,
70
+                                   cache = NA) {
66 71
   bbox = x
67 72
   environment() %>%
68 73
     as.list() %>%
@@ -88,7 +93,8 @@ osem_measurements.sensebox = function (x, phenomenon, exposure = NA,
88 93
                                        from = NA, to = NA, columns = NA,
89 94
                                        ...,
90 95
                                        endpoint = osem_endpoint(),
91
-                                       progress = T) {
96
+                                       progress = T,
97
+                                       cache = NA) {
92 98
   boxes = x
93 99
   environment() %>%
94 100
     as.list() %>%
@@ -122,7 +128,8 @@ parse_get_measurements_params = function (params) {
122 128
   query = list(
123 129
     endpoint = params$endpoint,
124 130
     phenomenon = params$phenomenon,
125
-    progress = params$progress
131
+    progress = params$progress,
132
+    cache = params$cache
126 133
   )
127 134
 
128 135
   if (!is.null(params$boxes))  query$boxId = paste(params$boxes$X_id, collapse = ',')

+ 27
- 0
tests/testthat/test_box.R View File

@@ -50,3 +50,30 @@ test_that("summary.sensebox outputs all metrics for a single box", {
50 50
   expect_true(any(grepl('boxes by exposure:', msg)))
51 51
   expect_true(any(grepl('boxes total: 1', msg)))
52 52
 })
53
+
54
+test_that('requests can be cached', {
55
+  check_api()
56
+
57
+  osem_clear_cache(tempdir())
58
+  expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
59
+  b = osem_box('57000b8745fd40c8196ad04c', cache = tempdir())
60
+
61
+  cacheFile = paste(
62
+    tempdir(),
63
+    opensensmapr:::osem_cache_filename('/boxes/57000b8745fd40c8196ad04c'),
64
+    sep = '/'
65
+  )
66
+  expect_true(file.exists(cacheFile))
67
+  expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)
68
+
69
+  # no download output (works only in interactive mode..)
70
+  out = capture.output({
71
+    b = osem_box('57000b8745fd40c8196ad04c', cache = tempdir())
72
+  })
73
+  expect_length(out, 0)
74
+  expect_length(length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')), 1)
75
+
76
+  osem_clear_cache(tempdir())
77
+  expect_false(file.exists(cacheFile))
78
+  expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
79
+})

+ 27
- 1
tests/testthat/test_boxes.R View File

@@ -20,7 +20,7 @@ test_that('both from and to are required when requesting boxes, error otherwise'
20 20
 test_that('a list of boxes with phenomenon filter returns only the requested phenomenon', {
21 21
   check_api()
22 22
 
23
-  boxes = osem_boxes(phenomenon='Temperatur', date=Sys.time())
23
+  boxes = osem_boxes(phenomenon = 'Temperatur', date=Sys.time())
24 24
   expect_true(all(grep('Temperatur', boxes$phenomena)))
25 25
 })
26 26
 
@@ -139,3 +139,29 @@ test_that('summary.sensebox outputs all metrics for a set of boxes', {
139 139
   expect_true(any(grepl('boxes by exposure:', msg)))
140 140
   expect_true(any(grepl('boxes total:', msg)))
141 141
 })
142
+
143
+test_that('requests can be cached', {
144
+  check_api()
145
+
146
+  osem_clear_cache()
147
+  expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
148
+  b = osem_boxes(cache = tempdir())
149
+
150
+  cacheFile = paste(
151
+    tempdir(),
152
+    opensensmapr:::osem_cache_filename('/boxes'),
153
+    sep = '/'
154
+  )
155
+  expect_true(file.exists(cacheFile))
156
+  expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)
157
+
158
+  # no download output (works only in interactive mode..)
159
+  out = capture.output({
160
+    b = osem_boxes(cache = tempdir())
161
+  })
162
+  expect_length(out, 0)
163
+  expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)
164
+
165
+  osem_clear_cache()
166
+  expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
167
+})

+ 26
- 0
tests/testthat/test_counts.R View File

@@ -10,3 +10,29 @@ test_that('counts can be retrieved as a list of numbers', {
10 10
   expect_true(is.numeric(unlist(counts)))
11 11
   expect_length(counts, 3)
12 12
 })
13
+
14
+test_that('requests can be cached', {
15
+  check_api()
16
+
17
+  osem_clear_cache()
18
+  expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
19
+  c = osem_counts(cache = tempdir())
20
+
21
+  cacheFile = paste(
22
+    tempdir(),
23
+    opensensmapr:::osem_cache_filename('/stats'),
24
+    sep = '/'
25
+  )
26
+  expect_true(file.exists(cacheFile))
27
+  expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)
28
+
29
+  # no download output (works only in interactive mode..)
30
+  out = capture.output({
31
+    c = osem_counts(cache = tempdir())
32
+  })
33
+  expect_length(out, 0)
34
+  expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)
35
+
36
+  osem_clear_cache()
37
+  expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
38
+})

+ 31
- 0
tests/testthat/test_measurements.R View File

@@ -105,6 +105,17 @@ test_that('both from and to are required when requesting measurements, error oth
105 105
   expect_error(osem_measurements(x = 'Temperature', to   = as.POSIXct('2017-01-01')), 'only together with')
106 106
 })
107 107
 
108
+test_that('phenomenon is required when requesting measurements, error otherwise', {
109
+  check_api()
110
+
111
+  expect_error(osem_measurements(), 'missing, with no default')
112
+  expect_error(osem_measurements(boxes), 'Parameter "phenomenon" is required')
113
+
114
+  sfc = sf::st_sfc(sf::st_linestring(x = matrix(data = c(7, 8, 50, 51), ncol = 2)), crs = 4326)
115
+  bbox = sf::st_bbox(sfc)
116
+  expect_error(osem_measurements(bbox), 'Parameter "phenomenon" is required')
117
+})
118
+
108 119
 test_that('[.osem_measurements maintains attributes', {
109 120
   check_api()
110 121
 
@@ -114,3 +125,23 @@ test_that('[.osem_measurements maintains attributes', {
114 125
 
115 126
   expect_true(all(attributes(m[1:nrow(m), ]) %in% attributes(m)))
116 127
 })
128
+
129
+test_that('requests can be cached', {
130
+  check_api()
131
+
132
+  osem_clear_cache()
133
+  expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
134
+  osem_measurements('Windrichtung', cache = tempdir())
135
+
136
+  expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)
137
+
138
+  # no download output (works only in interactive mode..)
139
+  out = capture.output({
140
+    m = osem_measurements('Windrichtung', cache = tempdir())
141
+  })
142
+  expect_length(out, 0)
143
+  expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 1)
144
+
145
+  osem_clear_cache()
146
+  expect_length(list.files(tempdir(), pattern = 'osemcache\\..*\\.rds'), 0)
147
+})

+ 44
- 11
vignettes/osem-serialization.Rmd View File

@@ -20,8 +20,50 @@ This avoids..
20 20
 - risk of API changes / API unavailability,
21 21
 - stress on the openSenseMap-server.
22 22
 
23
+This vignette shows how to use this built in `opensensmapr` feature, and
24
+how to do it yourself, if you want to store to other data formats.
25
+
26
+## Using openSensMapr Caching Feature
27
+All data retrieval functions of `opensensmapr` have a built in caching feature,
28
+which serializes an API response to disk.
29
+Subsequent identical requests will then return the serialized data instead of making
30
+another request.
31
+To do so, each request is given a unique ID based on its parameters.
32
+
33
+To use this feature, just add a path to a directory to the `cache` parameter:
34
+```{r cache}
35
+b = osem_boxes(cache = tempdir())
36
+list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
37
+
38
+# the next identical request will hit the cache only!
39
+b = osem_boxes(cache = tempdir())
40
+
41
+# requests without the cache parameter will still be performed normally
42
+b = osem_boxes()
43
+```
44
+
45
+You can maintain multiple caches simultaneously which allows to store only
46
+serialized data related to a script in its directory:
47
+```{r cache_custom}
48
+cacheDir = getwd() # current working directory
49
+b = osem_boxes(cache = cacheDir)
50
+
51
+# the next identical request will hit the cache only!
52
+b = osem_boxes(cache = cacheDir)
53
+```
54
+
55
+To get fresh results again, just call `osem_clear_cache()` for the respective cache:
56
+```{r clearcache}
57
+osem_clear_cache() # clears default cache
58
+osem_clear_cache(getwd()) # clears a custom cache
59
+```
60
+
61
+## Custom (De-) Serialization
62
+If you want to roll your own serialization method to support custom data formats,
63
+here's how:
64
+
23 65
 ```{r setup, results='hide'}
24
-# this vignette requires:
66
+# this section requires:
25 67
 library(opensensmapr)
26 68
 library(jsonlite)
27 69
 library(readr)
@@ -31,16 +73,7 @@ boxes = osem_boxes(grouptag = 'ifgi')
31 73
 measurements = osem_measurements(boxes, phenomenon = 'PM10')
32 74
 ```
33 75
 
34
-## (De-) Serializing Data
35
-The standard way of serialization in R is through the custom binary `.rds` (single object)
36
-or `.RData` (full environment) formats:
37
-```{r serialize_rds}
38
-# serializing measurements to RDS, and loading it from the file again:
39
-saveRDS(measurements, 'measurements.rds')
40
-measurements_from_file = readRDS('measurements.rds')
41
-```
42
-
43
-Or, if you are paranoid and worry about `.rds` files not being decodable anymore
76
+If you are paranoid and worry about `.rds` files not being decodable anymore
44 77
 in the (distant) future, you could serialize to a plain text format such as JSON.
45 78
 This of course comes at the cost of storage space and performance.
46 79
 ```{r serialize_json}

Loading…
Cancel
Save