Browse Source

update docs + vignette builds

tags/v0.4.0
noerw 1 year ago
parent
commit
553772d209

+ 3
- 0
R/counts.R View File

@@ -7,6 +7,9 @@
7 7
 #' @details Note that the API caches these values for 5 minutes.
8 8
 #'
9 9
 #' @param endpoint The URL of the openSenseMap API
10
+#' @param cache Whether to cache the result, defaults to false.
11
+#'   If a valid path to a directory is given, the response will be cached there.
12
+#'   Subsequent identical requests will return the cached data instead.
10 13
 #' @return A named \code{list} containing the counts
11 14
 #'
12 15
 #' @export

+ 1
- 0
inst/doc/osem-intro.R View File

@@ -70,3 +70,4 @@ st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = T)
70 70
 
71 71
 ## ------------------------------------------------------------------------
72 72
 pm25 %>% filter(invalid == FALSE) %>% plot()
73
+

+ 504
- 263
inst/doc/osem-intro.html
File diff suppressed because it is too large
View File


+ 96
- 0
inst/doc/osem-serialization.R View File

@@ -0,0 +1,96 @@
1
+## ----cache---------------------------------------------------------------
2
+b = osem_boxes(cache = tempdir())
3
+list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
4
+
5
+# the next identical request will hit the cache only!
6
+b = osem_boxes(cache = tempdir())
7
+
8
+# requests without the cache parameter will still be performed normally
9
+b = osem_boxes()
10
+
11
+## ----cache_custom--------------------------------------------------------
12
+cacheDir = getwd() # current working directory
13
+b = osem_boxes(cache = cacheDir)
14
+
15
+# the next identical request will hit the cache only!
16
+b = osem_boxes(cache = cacheDir)
17
+
18
+## ----clearcache----------------------------------------------------------
19
+osem_clear_cache() # clears default cache
20
+osem_clear_cache(getwd()) # clears a custom cache
21
+
22
+## ----setup, results='hide'-----------------------------------------------
23
+# this section requires:
24
+library(opensensmapr)
25
+library(jsonlite)
26
+library(readr)
27
+
28
+# first get our example data:
29
+boxes = osem_boxes(grouptag = 'ifgi')
30
+measurements = osem_measurements(boxes, phenomenon = 'PM10')
31
+
32
+## ----serialize_json------------------------------------------------------
33
+# serializing senseBoxes to JSON, and loading from file again:
34
+write(jsonlite::serializeJSON(measurements), 'boxes.json')
35
+boxes_from_file = jsonlite::unserializeJSON(readr::read_file('boxes.json'))
36
+
37
+## ----serialize_attrs-----------------------------------------------------
38
+# note the toJSON call
39
+write(jsonlite::toJSON(measurements), 'boxes_bad.json')
40
+boxes_without_attrs = jsonlite::fromJSON('boxes_bad.json')
41
+
42
+boxes_with_attrs = osem_as_sensebox(boxes_without_attrs)
43
+class(boxes_with_attrs)
44
+
45
+## ----osem_offline--------------------------------------------------------
46
+# offline logic
47
+osem_offline = function (func, file, format='rds', ...) {
48
+  # deserialize if file exists, otherwise download and serialize
49
+  if (file.exists(file)) {
50
+    if (format == 'json')
51
+      jsonlite::unserializeJSON(readr::read_file(file))
52
+    else
53
+      readRDS(file)
54
+  } else {
55
+    data = func(...)
56
+    if (format == 'json')
57
+      write(jsonlite::serializeJSON(data), file = file)
58
+    else
59
+      saveRDS(data, file)
60
+    data
61
+  }
62
+}
63
+
64
+# wrappers for each download function
65
+osem_measurements_offline = function (file, ...) {
66
+  osem_offline(opensensmapr::osem_measurements, file, ...)
67
+}
68
+osem_boxes_offline = function (file, ...) {
69
+  osem_offline(opensensmapr::osem_boxes, file, ...)
70
+}
71
+osem_box_offline = function (file, ...) {
72
+  osem_offline(opensensmapr::osem_box, file, ...)
73
+}
74
+osem_counts_offline = function (file, ...) {
75
+  osem_offline(opensensmapr::osem_counts, file, ...)
76
+}
77
+
78
+## ----test----------------------------------------------------------------
79
+# first run; will download and save to disk
80
+b1 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
81
+
82
+# consecutive runs; will read from disk
83
+b2 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
84
+class(b1) == class(b2)
85
+
86
+# we can even omit the arguments now (though thats not really the point here)
87
+b3 = osem_boxes_offline('mobileboxes.rds')
88
+nrow(b1) == nrow(b3)
89
+
90
+# verify that the custom sensebox methods are still working
91
+summary(b2)
92
+plot(b3)
93
+
94
+## ----cleanup, results='hide'---------------------------------------------
95
+file.remove('mobileboxes.rds', 'boxes_bad.json', 'boxes.json', 'measurements.rds')
96
+

+ 169
- 0
inst/doc/osem-serialization.Rmd View File

@@ -0,0 +1,169 @@
1
+---
2
+title: "opensensmapr reproducibility: Loading openSenseMap Data from Files"
3
+author: "Norwin Roosen"
4
+date: "`r Sys.Date()`"
5
+output: rmarkdown::html_vignette
6
+vignette: >
7
+  %\VignetteIndexEntry{opensensmapr reproducibility: Loading openSenseMap Data from Files}
8
+  %\VignetteEngine{knitr::rmarkdown}
9
+  %\VignetteEncoding{UTF-8}
10
+---
11
+
12
+It may be useful to download data from openSenseMap only once.
13
+For reproducible results, the data could be saved to disk, and reloaded at a
14
+later point.
15
+
16
+This avoids..
17
+
18
+- changed results for queries without date parameters,
19
+- unnecessary wait times,
20
+- risk of API changes / API unavailability,
21
+- stress on the openSenseMap-server.
22
+
23
+This vignette shows how to use this built in `opensensmapr` feature, and
24
+how to do it yourself, if you want to store to other data formats.
25
+
26
+## Using openSensMapr Caching Feature
27
+All data retrieval functions of `opensensmapr` have a built in caching feature,
28
+which serializes an API response to disk.
29
+Subsequent identical requests will then return the serialized data instead of making
30
+another request.
31
+To do so, each request is given a unique ID based on its parameters.
32
+
33
+To use this feature, just add a path to a directory to the `cache` parameter:
34
+```{r cache}
35
+b = osem_boxes(cache = tempdir())
36
+list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
37
+
38
+# the next identical request will hit the cache only!
39
+b = osem_boxes(cache = tempdir())
40
+
41
+# requests without the cache parameter will still be performed normally
42
+b = osem_boxes()
43
+```
44
+
45
+You can maintain multiple caches simultaneously which allows to store only
46
+serialized data related to a script in its directory:
47
+```{r cache_custom}
48
+cacheDir = getwd() # current working directory
49
+b = osem_boxes(cache = cacheDir)
50
+
51
+# the next identical request will hit the cache only!
52
+b = osem_boxes(cache = cacheDir)
53
+```
54
+
55
+To get fresh results again, just call `osem_clear_cache()` for the respective cache:
56
+```{r clearcache}
57
+osem_clear_cache() # clears default cache
58
+osem_clear_cache(getwd()) # clears a custom cache
59
+```
60
+
61
+## Custom (De-) Serialization
62
+If you want to roll your own serialization method to support custom data formats,
63
+here's how:
64
+
65
+```{r setup, results='hide'}
66
+# this section requires:
67
+library(opensensmapr)
68
+library(jsonlite)
69
+library(readr)
70
+
71
+# first get our example data:
72
+boxes = osem_boxes(grouptag = 'ifgi')
73
+measurements = osem_measurements(boxes, phenomenon = 'PM10')
74
+```
75
+
76
+If you are paranoid and worry about `.rds` files not being decodable anymore
77
+in the (distant) future, you could serialize to a plain text format such as JSON.
78
+This of course comes at the cost of storage space and performance.
79
+```{r serialize_json}
80
+# serializing senseBoxes to JSON, and loading from file again:
81
+write(jsonlite::serializeJSON(measurements), 'boxes.json')
82
+boxes_from_file = jsonlite::unserializeJSON(readr::read_file('boxes.json'))
83
+```
84
+
85
+Both methods also persist the R object metadata (classes, attributes).
86
+If you were to use a serialization method that can't persist object metadata, you
87
+could re-apply it with the following functions:
88
+
89
+```{r serialize_attrs}
90
+# note the toJSON call
91
+write(jsonlite::toJSON(measurements), 'boxes_bad.json')
92
+boxes_without_attrs = jsonlite::fromJSON('boxes_bad.json')
93
+
94
+boxes_with_attrs = osem_as_sensebox(boxes_without_attrs)
95
+class(boxes_with_attrs)
96
+```
97
+The same goes for measurements via `osem_as_measurements()`.
98
+
99
+## Workflow for reproducible code
100
+For truly reproducible code you want it to work and return the same results --
101
+no matter if you run it the first time or a consecutive time, and without making
102
+changes to it.
103
+
104
+Therefore we need a wrapper around the save-to-file & load-from-file logic.
105
+The following examples show a way to do just that, and where inspired by
106
+[this reproducible analysis by Daniel Nuest](https://github.com/nuest/sensebox-binder).
107
+
108
+```{r osem_offline}
109
+# offline logic
110
+osem_offline = function (func, file, format='rds', ...) {
111
+  # deserialize if file exists, otherwise download and serialize
112
+  if (file.exists(file)) {
113
+    if (format == 'json')
114
+      jsonlite::unserializeJSON(readr::read_file(file))
115
+    else
116
+      readRDS(file)
117
+  } else {
118
+    data = func(...)
119
+    if (format == 'json')
120
+      write(jsonlite::serializeJSON(data), file = file)
121
+    else
122
+      saveRDS(data, file)
123
+    data
124
+  }
125
+}
126
+
127
+# wrappers for each download function
128
+osem_measurements_offline = function (file, ...) {
129
+  osem_offline(opensensmapr::osem_measurements, file, ...)
130
+}
131
+osem_boxes_offline = function (file, ...) {
132
+  osem_offline(opensensmapr::osem_boxes, file, ...)
133
+}
134
+osem_box_offline = function (file, ...) {
135
+  osem_offline(opensensmapr::osem_box, file, ...)
136
+}
137
+osem_counts_offline = function (file, ...) {
138
+  osem_offline(opensensmapr::osem_counts, file, ...)
139
+}
140
+```
141
+
142
+Thats it! Now let's try it out:
143
+
144
+```{r test}
145
+# first run; will download and save to disk
146
+b1 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
147
+
148
+# consecutive runs; will read from disk
149
+b2 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
150
+class(b1) == class(b2)
151
+
152
+# we can even omit the arguments now (though thats not really the point here)
153
+b3 = osem_boxes_offline('mobileboxes.rds')
154
+nrow(b1) == nrow(b3)
155
+
156
+# verify that the custom sensebox methods are still working
157
+summary(b2)
158
+plot(b3)
159
+```
160
+
161
+To re-download the data, just clear the files that were created in the process:
162
+```{r cleanup, results='hide'}
163
+file.remove('mobileboxes.rds', 'boxes_bad.json', 'boxes.json', 'measurements.rds')
164
+```
165
+
166
+A possible extension to this scheme comes to mind: Omit the specification of a
167
+filename, and assign a unique ID to the request instead.
168
+For example, one could calculate the SHA-1 hash of the parameters, and use it
169
+as filename.

+ 424
- 0
inst/doc/osem-serialization.html
File diff suppressed because it is too large
View File


+ 16
- 3
man/osem_box.Rd View File

@@ -4,12 +4,15 @@
4 4
 \alias{osem_box}
5 5
 \title{Get a single senseBox by its ID}
6 6
 \usage{
7
-osem_box(boxId, endpoint = osem_endpoint())
7
+osem_box(boxId, endpoint = osem_endpoint(), cache = NA)
8 8
 }
9 9
 \arguments{
10 10
 \item{boxId}{A string containing a senseBox ID}
11 11
 
12 12
 \item{endpoint}{The URL of the openSenseMap API instance}
13
+
14
+\item{cache}{Whether to cache the result, defaults to false.
15
+If a valid path to a directory is given, the response will be cached there. Subsequent identical requests will return the cached data instead.}
13 16
 }
14 17
 \value{
15 18
 A \code{sensebox data.frame} containing a box in each row
@@ -18,9 +21,17 @@ A \code{sensebox data.frame} containing a box in each row
18 21
 Get a single senseBox by its ID
19 22
 }
20 23
 \examples{
21
-# get a specific box by ID
22
-b = osem_box('57000b8745fd40c8196ad04c')
24
+\donttest{
25
+  # get a specific box by ID
26
+  b = osem_box('57000b8745fd40c8196ad04c')
27
+
28
+  # get a specific box by ID from a custom (selfhosted) openSenseMap API
29
+  b = osem_box('51030b8725fd30c2196277da', 'http://api.my-custom-osem.com')
23 30
 
31
+  # get a specific box by ID and cache the response, in order to provide
32
+  # reproducible results in the future.
33
+  b = osem_box('51030b8725fd30c2196277da', cache = tempdir())
34
+}
24 35
 }
25 36
 \seealso{
26 37
 \href{https://docs.opensensemap.org/#api-Measurements-findAllBoxes}{openSenseMap API documentation (web)}
@@ -28,4 +39,6 @@ b = osem_box('57000b8745fd40c8196ad04c')
28 39
 \code{\link{osem_phenomena}}
29 40
 
30 41
 \code{\link{osem_boxes}}
42
+
43
+\code{\link{osem_clear_cache}}
31 44
 }

+ 32
- 2
man/osem_boxes.Rd View File

@@ -6,7 +6,7 @@
6 6
 \usage{
7 7
 osem_boxes(exposure = NA, model = NA, grouptag = NA, date = NA,
8 8
   from = NA, to = NA, phenomenon = NA, endpoint = osem_endpoint(),
9
-  progress = TRUE)
9
+  progress = TRUE, cache = NA)
10 10
 }
11 11
 \arguments{
12 12
 \item{exposure}{Only return boxes with the given exposure ('indoor', 'outdoor', 'mobile')}
@@ -26,7 +26,11 @@ time interval as specified through \code{date} or \code{from / to}}
26 26
 
27 27
 \item{endpoint}{The URL of the openSenseMap API instance}
28 28
 
29
-\item{progress}{Whether to print download progress information defaults to \code{TRUE}}
29
+\item{progress}{Whether to print download progress information, defaults to \code{TRUE}}
30
+
31
+\item{cache}{Whether to cache the result, defaults to false.
32
+If a valid path to a directory is given, the response will be cached there.
33
+Subsequent identical requests will return the cached data instead.}
30 34
 }
31 35
 \value{
32 36
 A \code{sensebox data.frame} containing a box in each row
@@ -41,6 +45,7 @@ Note that some filters do not work together:
41 45
 }
42 46
 }
43 47
 \examples{
48
+
44 49
 \donttest{
45 50
   # get *all* boxes available on the API
46 51
   b = osem_boxes()
@@ -48,8 +53,31 @@ Note that some filters do not work together:
48 53
   # get all boxes with grouptag 'ifgi' that are placed outdoors
49 54
   b = osem_boxes(grouptag = 'ifgi', exposure = 'outdoor')
50 55
 
56
+  # get all boxes with model 'luftdaten_sds011_dht22'
57
+  b = osem_boxes(grouptag = 'ifgi')
58
+
51 59
   # get all boxes that have measured PM2.5 in the last 4 hours
52 60
   b = osem_boxes(date = Sys.time(), phenomenon = 'PM2.5')
61
+
62
+  # get all boxes that have measured PM2.5 between Jan & Feb 2018
63
+  library(lubridate)
64
+  b = osem_boxes(
65
+    from = date('2018-01-01'),
66
+    to = date('2018-02-01'),
67
+    phenomenon = 'PM2.5'
68
+  )
69
+
70
+  # get all boxes from a custom (selfhosted) openSenseMap API
71
+  b = osem_box(endpoint = 'http://api.my-custom-osem.com')
72
+
73
+  # get all boxes and cache the response, in order to provide
74
+  # reproducible results in the future. Also useful for development
75
+  # to avoid repeated loading times!
76
+  b = osem_boxes(cache = getwd())
77
+  b = osem_boxes(cache = getwd())
78
+
79
+  # get *all* boxes available on the API, without showing download progress
80
+  b = osem_boxes(progress = FALSE)
53 81
 }
54 82
 }
55 83
 \seealso{
@@ -58,4 +86,6 @@ Note that some filters do not work together:
58 86
 \code{\link{osem_phenomena}}
59 87
 
60 88
 \code{\link{osem_box}}
89
+
90
+\code{\link{osem_clear_cache}}
61 91
 }

+ 28
- 0
man/osem_clear_cache.Rd View File

@@ -0,0 +1,28 @@
1
+% Generated by roxygen2: do not edit by hand
2
+% Please edit documentation in R/api.R
3
+\name{osem_clear_cache}
4
+\alias{osem_clear_cache}
5
+\title{Purge cached responses from the given cache directory}
6
+\usage{
7
+osem_clear_cache(location = tempdir())
8
+}
9
+\arguments{
10
+\item{location}{A path to the cache directory, defaults to the
11
+sessions' \code{tempdir()}}
12
+}
13
+\value{
14
+Boolean whether the deletion was successful
15
+}
16
+\description{
17
+Purge cached responses from the given cache directory
18
+}
19
+\examples{
20
+\donttest{
21
+  osem_boxes(cache = tempdir())
22
+  osem_clear_cache()
23
+
24
+  cachedir = paste(getwd(), 'osemcache', sep = '/')
25
+  osem_boxes(cache = cachedir)
26
+  osem_clear_cache(cachedir)
27
+}
28
+}

+ 5
- 1
man/osem_counts.Rd View File

@@ -4,10 +4,14 @@
4 4
 \alias{osem_counts}
5 5
 \title{Get count statistics of the openSenseMap Instance}
6 6
 \usage{
7
-osem_counts(endpoint = osem_endpoint())
7
+osem_counts(endpoint = osem_endpoint(), cache = NA)
8 8
 }
9 9
 \arguments{
10 10
 \item{endpoint}{The URL of the openSenseMap API}
11
+
12
+\item{cache}{Whether to cache the result, defaults to false.
13
+If a valid path to a directory is given, the response will be cached there.
14
+Subsequent identical requests will return the cached data instead.}
11 15
 }
12 16
 \value{
13 17
 A named \code{list} containing the counts

+ 61
- 10
man/osem_measurements.Rd View File

@@ -12,11 +12,12 @@ osem_measurements(x, ...)
12 12
 \method{osem_measurements}{default}(x, ...)
13 13
 
14 14
 \method{osem_measurements}{bbox}(x, phenomenon, exposure = NA, from = NA,
15
-  to = NA, columns = NA, ..., endpoint = osem_endpoint(), progress = T)
15
+  to = NA, columns = NA, ..., endpoint = osem_endpoint(), progress = T,
16
+  cache = NA)
16 17
 
17 18
 \method{osem_measurements}{sensebox}(x, phenomenon, exposure = NA,
18 19
   from = NA, to = NA, columns = NA, ..., endpoint = osem_endpoint(),
19
-  progress = T)
20
+  progress = T, cache = NA)
20 21
 }
21 22
 \arguments{
22 23
 \item{x}{Depending on the method, either
@@ -42,6 +43,9 @@ osem_measurements(x, ...)
42 43
 \item{endpoint}{The URL of the openSenseMap API}
43 44
 
44 45
 \item{progress}{Whether to print download progress information}
46
+
47
+\item{cache}{Whether to cache the result, defaults to false.
48
+If a valid path to a directory is given, the response will be cached there. Subsequent identical requests will return the cached data instead.}
45 49
 }
46 50
 \value{
47 51
 An \code{osem_measurements data.frame} containing the
@@ -63,17 +67,49 @@ a bounding box spanning the whole world.
63 67
 
64 68
 \examples{
65 69
 \donttest{
66
-  # get measurements from all boxes
67
-  m1 = osem_measurements('Windrichtung')
70
+  # get measurements from all boxes on the phenomenon 'PM10' from the last 48h
71
+  m = osem_measurements('PM10')
72
+
73
+  # get measurements from all mobile boxes on the phenomenon 'PM10' from the last 48h
74
+  m = osem_measurements('PM10', exposure = 'mobile')
75
+
76
+  # get measurements and cache them locally in the working directory.
77
+  # subsequent identical requests will load from the cache instead, ensuring
78
+  # reproducibility and saving time and bandwidth!
79
+  m = osem_measurements('PM10', exposure = 'mobile', cache = getwd())
80
+  m = osem_measurements('PM10', exposure = 'mobile', cache = getwd())
81
+
82
+  # get measurements returning a custom selection of columns
83
+  m = osem_measurements('PM10', exposure = 'mobile', columns = c(
84
+    'value',
85
+    'boxId',
86
+    'sensorType',
87
+    'lat',
88
+    'lon',
89
+    'height'
90
+  ))
68 91
 }
69 92
 \donttest{
70
-  # get measurements from sensors within a bounding box
93
+  # get measurements from sensors within a custom WGS84 bounding box
71 94
   bbox = structure(c(7, 51, 8, 52), class = 'bbox')
72
-  m2 = osem_measurements(bbox, 'Temperatur')
73
-
74
-  points = sf::st_multipoint(matrix(c(7.5, 7.8, 51.7, 52), 2, 2))
75
-  bbox2 = sf::st_bbox(points)
76
-  m3 = osem_measurements(bbox2, 'Temperatur', exposure = 'outdoor')
95
+  m = osem_measurements(bbox, 'Temperatur')
96
+
97
+  # construct a bounding box 12km around berlin using the sf package,
98
+  # and get measurements from stations within that box
99
+  library(sf)
100
+  bbox2 = st_point(c(13.4034, 52.5120)) \%>\%
101
+    st_sfc(crs = 4326) \%>\%
102
+    st_transform(3857) \%>\% # allow setting a buffer in meters
103
+    st_buffer(set_units(12, km)) \%>\%
104
+    st_transform(4326) \%>\% # the opensensemap expects WGS 84
105
+    st_bbox()
106
+  m = osem_measurements(bbox2, 'Temperatur', exposure = 'outdoor')
107
+
108
+  # construct a bounding box from two points,
109
+  # and get measurements from stations within that box
110
+  points = st_multipoint(matrix(c(7.5, 7.8, 51.7, 52), 2, 2))
111
+  bbox3 = st_bbox(points)
112
+  m = osem_measurements(bbox2, 'Temperatur', exposure = 'outdoor')
77 113
 }
78 114
 \donttest{
79 115
   # get measurements from a set of boxes
@@ -83,10 +119,25 @@ a bounding box spanning the whole world.
83 119
   # ...or a single box
84 120
   b = osem_box('57000b8745fd40c8196ad04c')
85 121
   m5 = osem_measurements(b, phenomenon = 'Temperatur')
122
+
123
+  # get measurements from a single box on the from the last 40 days.
124
+  # requests are paged for long time frames, so the APIs limitation
125
+  # does not apply!
126
+  library(lubridate)
127
+  m1 = osem_measurements(
128
+    b,
129
+    'Temperatur',
130
+    to = now(),
131
+    from = now() - days(40)
132
+  )
86 133
 }
87 134
 }
88 135
 \seealso{
89 136
 \href{https://docs.opensensemap.org/#api-Measurements-getDataMulti}{openSenseMap API documentation (web)}
90 137
 
138
+\code{\link{osem_box}}
139
+
91 140
 \code{\link{osem_boxes}}
141
+
142
+\code{\link{osem_clear_cache}}
92 143
 }

Loading…
Cancel
Save