Browse Source

update vignette to workaround #22 ...again

development
noerw 11 months ago
parent
commit
f7cbb1bc26

+ 27
- 72
inst/doc/osem-serialization.R View File

@@ -1,96 +1,51 @@
1
+## ----setup, results='hide'-----------------------------------------------
2
+# this vignette requires:
3
+library(opensensmapr)
4
+library(jsonlite)
5
+library(readr)
6
+
1 7
 ## ----cache---------------------------------------------------------------
2
-b = osem_boxes(cache = tempdir())
3
-list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
8
+b = osem_boxes(grouptag = 'ifgi', cache = tempdir())
4 9
 
5 10
 # the next identical request will hit the cache only!
6
-b = osem_boxes(cache = tempdir())
11
+b = osem_boxes(grouptag = 'ifgi', cache = tempdir())
7 12
 
8 13
 # requests without the cache parameter will still be performed normally
9
-b = osem_boxes()
14
+b = osem_boxes(grouptag = 'ifgi')
15
+
16
+## ----cachelisting--------------------------------------------------------
17
+list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
10 18
 
11 19
 ## ----cache_custom--------------------------------------------------------
12 20
 cacheDir = getwd() # current working directory
13
-b = osem_boxes(cache = cacheDir)
21
+b = osem_boxes(grouptag = 'ifgi', cache = cacheDir)
14 22
 
15 23
 # the next identical request will hit the cache only!
16
-b = osem_boxes(cache = cacheDir)
24
+b = osem_boxes(grouptag = 'ifgi', cache = cacheDir)
17 25
 
18 26
 ## ----clearcache----------------------------------------------------------
19 27
 osem_clear_cache() # clears default cache
20 28
 osem_clear_cache(getwd()) # clears a custom cache
21 29
 
22
-## ----setup, results='hide'-----------------------------------------------
23
-# this section requires:
24
-library(opensensmapr)
25
-library(jsonlite)
26
-library(readr)
27
-
30
+## ----data, results='hide'------------------------------------------------
28 31
 # first get our example data:
29
-boxes = osem_boxes(grouptag = 'ifgi')
30
-measurements = osem_measurements(boxes, phenomenon = 'PM10')
32
+measurements = osem_measurements('Windrichtung')
31 33
 
32 34
 ## ----serialize_json------------------------------------------------------
33 35
 # serializing senseBoxes to JSON, and loading from file again:
34
-write(jsonlite::serializeJSON(measurements), 'boxes.json')
35
-boxes_from_file = jsonlite::unserializeJSON(readr::read_file('boxes.json'))
36
+write(jsonlite::serializeJSON(measurements), 'measurements.json')
37
+measurements_from_file = jsonlite::unserializeJSON(readr::read_file('measurements.json'))
38
+class(measurements_from_file)
36 39
 
37 40
 ## ----serialize_attrs-----------------------------------------------------
38
-# note the toJSON call
39
-write(jsonlite::toJSON(measurements), 'boxes_bad.json')
40
-boxes_without_attrs = jsonlite::fromJSON('boxes_bad.json')
41
-
42
-boxes_with_attrs = osem_as_sensebox(boxes_without_attrs)
43
-class(boxes_with_attrs)
44
-
45
-## ----osem_offline--------------------------------------------------------
46
-# offline logic
47
-osem_offline = function (func, file, format='rds', ...) {
48
-  # deserialize if file exists, otherwise download and serialize
49
-  if (file.exists(file)) {
50
-    if (format == 'json')
51
-      jsonlite::unserializeJSON(readr::read_file(file))
52
-    else
53
-      readRDS(file)
54
-  } else {
55
-    data = func(...)
56
-    if (format == 'json')
57
-      write(jsonlite::serializeJSON(data), file = file)
58
-    else
59
-      saveRDS(data, file)
60
-    data
61
-  }
62
-}
63
-
64
-# wrappers for each download function
65
-osem_measurements_offline = function (file, ...) {
66
-  osem_offline(opensensmapr::osem_measurements, file, ...)
67
-}
68
-osem_boxes_offline = function (file, ...) {
69
-  osem_offline(opensensmapr::osem_boxes, file, ...)
70
-}
71
-osem_box_offline = function (file, ...) {
72
-  osem_offline(opensensmapr::osem_box, file, ...)
73
-}
74
-osem_counts_offline = function (file, ...) {
75
-  osem_offline(opensensmapr::osem_counts, file, ...)
76
-}
77
-
78
-## ----test----------------------------------------------------------------
79
-# first run; will download and save to disk
80
-b1 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
81
-
82
-# consecutive runs; will read from disk
83
-b2 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
84
-class(b1) == class(b2)
85
-
86
-# we can even omit the arguments now (though thats not really the point here)
87
-b3 = osem_boxes_offline('mobileboxes.rds')
88
-nrow(b1) == nrow(b3)
41
+# note the toJSON call instead of serializeJSON
42
+write(jsonlite::toJSON(measurements), 'measurements_bad.json')
43
+measurements_without_attrs = jsonlite::fromJSON('measurements_bad.json')
44
+class(measurements_without_attrs)
89 45
 
90
-# verify that the custom sensebox methods are still working
91
-summary(b2)
92
-plot(b3)
46
+measurements_with_attrs = osem_as_measurements(measurements_without_attrs)
47
+class(measurements_with_attrs)
93 48
 
94
-## ----cleanup, results='hide'---------------------------------------------
95
-file.remove('mobileboxes.rds', 'boxes_bad.json', 'boxes.json', 'measurements.rds')
49
+## ----cleanup, include=FALSE----------------------------------------------
50
+file.remove('measurements.json', 'measurements_bad.json')
96 51
 

+ 38
- 101
inst/doc/osem-serialization.Rmd View File

@@ -10,7 +10,7 @@ vignette: >
10 10
 ---
11 11
 
12 12
 It may be useful to download data from openSenseMap only once.
13
-For reproducible results, the data could be saved to disk, and reloaded at a
13
+For reproducible results, the data should be saved to disk, and reloaded at a
14 14
 later point.
15 15
 
16 16
 This avoids..
@@ -21,40 +21,49 @@ This avoids..
21 21
 - stress on the openSenseMap-server.
22 22
 
23 23
 This vignette shows how to use this built in `opensensmapr` feature, and
24
-how to do it yourself, if you want to store to other data formats.
24
+how to do it yourself in case you want to save to other data formats.
25 25
 
26
-## Using openSensMapr Caching Feature
26
+```{r setup, results='hide'}
27
+# this vignette requires:
28
+library(opensensmapr)
29
+library(jsonlite)
30
+library(readr)
31
+```
32
+
33
+## Using the opensensmapr Caching Feature
27 34
 All data retrieval functions of `opensensmapr` have a built in caching feature,
28 35
 which serializes an API response to disk.
29 36
 Subsequent identical requests will then return the serialized data instead of making
30 37
 another request.
31
-To do so, each request is given a unique ID based on its parameters.
32 38
 
33 39
 To use this feature, just add a path to a directory to the `cache` parameter:
34 40
 ```{r cache}
35
-b = osem_boxes(cache = tempdir())
36
-list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
41
+b = osem_boxes(grouptag = 'ifgi', cache = tempdir())
37 42
 
38 43
 # the next identical request will hit the cache only!
39
-b = osem_boxes(cache = tempdir())
44
+b = osem_boxes(grouptag = 'ifgi', cache = tempdir())
40 45
 
41 46
 # requests without the cache parameter will still be performed normally
42
-b = osem_boxes()
47
+b = osem_boxes(grouptag = 'ifgi')
48
+```
49
+
50
+Looking at the cache directory we can see one file for each request, which is identified through a hash of the request URL:
51
+```{r cachelisting}
52
+list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
43 53
 ```
44 54
 
45
-You can maintain multiple caches simultaneously which allows to store only
46
-serialized data related to a script in its directory:
55
+You can maintain multiple caches simultaneously which allows to only store data related to a script in the same directory:
47 56
 ```{r cache_custom}
48 57
 cacheDir = getwd() # current working directory
49
-b = osem_boxes(cache = cacheDir)
58
+b = osem_boxes(grouptag = 'ifgi', cache = cacheDir)
50 59
 
51 60
 # the next identical request will hit the cache only!
52
-b = osem_boxes(cache = cacheDir)
61
+b = osem_boxes(grouptag = 'ifgi', cache = cacheDir)
53 62
 ```
54 63
 
55 64
 To get fresh results again, just call `osem_clear_cache()` for the respective cache:
56
-```{r clearcache}
57
-osem_clear_cache() # clears default cache
65
+```{r clearcache, results='hide'}
66
+osem_clear_cache()        # clears default cache
58 67
 osem_clear_cache(getwd()) # clears a custom cache
59 68
 ```
60 69
 
@@ -62,15 +71,9 @@ osem_clear_cache(getwd()) # clears a custom cache
62 71
 If you want to roll your own serialization method to support custom data formats,
63 72
 here's how:
64 73
 
65
-```{r setup, results='hide'}
66
-# this section requires:
67
-library(opensensmapr)
68
-library(jsonlite)
69
-library(readr)
70
-
74
+```{r data, results='hide'}
71 75
 # first get our example data:
72
-boxes = osem_boxes(grouptag = 'ifgi')
73
-measurements = osem_measurements(boxes, phenomenon = 'PM10')
76
+measurements = osem_measurements('Windrichtung')
74 77
 ```
75 78
 
76 79
 If you are paranoid and worry about `.rds` files not being decodable anymore
@@ -78,92 +81,26 @@ in the (distant) future, you could serialize to a plain text format such as JSON
78 81
 This of course comes at the cost of storage space and performance.
79 82
 ```{r serialize_json}
80 83
 # serializing senseBoxes to JSON, and loading from file again:
81
-write(jsonlite::serializeJSON(measurements), 'boxes.json')
82
-boxes_from_file = jsonlite::unserializeJSON(readr::read_file('boxes.json'))
84
+write(jsonlite::serializeJSON(measurements), 'measurements.json')
85
+measurements_from_file = jsonlite::unserializeJSON(readr::read_file('measurements.json'))
86
+class(measurements_from_file)
83 87
 ```
84 88
 
85
-Both methods also persist the R object metadata (classes, attributes).
89
+This method also persists the R object metadata (classes, attributes).
86 90
 If you were to use a serialization method that can't persist object metadata, you
87 91
 could re-apply it with the following functions:
88 92
 
89 93
 ```{r serialize_attrs}
90
-# note the toJSON call
91
-write(jsonlite::toJSON(measurements), 'boxes_bad.json')
92
-boxes_without_attrs = jsonlite::fromJSON('boxes_bad.json')
94
+# note the toJSON call instead of serializeJSON
95
+write(jsonlite::toJSON(measurements), 'measurements_bad.json')
96
+measurements_without_attrs = jsonlite::fromJSON('measurements_bad.json')
97
+class(measurements_without_attrs)
93 98
 
94
-boxes_with_attrs = osem_as_sensebox(boxes_without_attrs)
95
-class(boxes_with_attrs)
99
+measurements_with_attrs = osem_as_measurements(measurements_without_attrs)
100
+class(measurements_with_attrs)
96 101
 ```
97
-The same goes for measurements via `osem_as_measurements()`.
98
-
99
-## Workflow for reproducible code
100
-For truly reproducible code you want it to work and return the same results --
101
-no matter if you run it the first time or a consecutive time, and without making
102
-changes to it.
103
-
104
-Therefore we need a wrapper around the save-to-file & load-from-file logic.
105
-The following examples show a way to do just that, and where inspired by
106
-[this reproducible analysis by Daniel Nuest](https://github.com/nuest/sensebox-binder).
107
-
108
-```{r osem_offline}
109
-# offline logic
110
-osem_offline = function (func, file, format='rds', ...) {
111
-  # deserialize if file exists, otherwise download and serialize
112
-  if (file.exists(file)) {
113
-    if (format == 'json')
114
-      jsonlite::unserializeJSON(readr::read_file(file))
115
-    else
116
-      readRDS(file)
117
-  } else {
118
-    data = func(...)
119
-    if (format == 'json')
120
-      write(jsonlite::serializeJSON(data), file = file)
121
-    else
122
-      saveRDS(data, file)
123
-    data
124
-  }
125
-}
126
-
127
-# wrappers for each download function
128
-osem_measurements_offline = function (file, ...) {
129
-  osem_offline(opensensmapr::osem_measurements, file, ...)
130
-}
131
-osem_boxes_offline = function (file, ...) {
132
-  osem_offline(opensensmapr::osem_boxes, file, ...)
133
-}
134
-osem_box_offline = function (file, ...) {
135
-  osem_offline(opensensmapr::osem_box, file, ...)
136
-}
137
-osem_counts_offline = function (file, ...) {
138
-  osem_offline(opensensmapr::osem_counts, file, ...)
139
-}
140
-```
141
-
142
-Thats it! Now let's try it out:
143
-
144
-```{r test}
145
-# first run; will download and save to disk
146
-b1 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
102
+The same goes for boxes via `osem_as_sensebox()`.
147 103
 
148
-# consecutive runs; will read from disk
149
-b2 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
150
-class(b1) == class(b2)
151
-
152
-# we can even omit the arguments now (though thats not really the point here)
153
-b3 = osem_boxes_offline('mobileboxes.rds')
154
-nrow(b1) == nrow(b3)
155
-
156
-# verify that the custom sensebox methods are still working
157
-summary(b2)
158
-plot(b3)
104
+```{r cleanup, include=FALSE}
105
+file.remove('measurements.json', 'measurements_bad.json')
159 106
 ```
160
-
161
-To re-download the data, just clear the files that were created in the process:
162
-```{r cleanup, results='hide'}
163
-file.remove('mobileboxes.rds', 'boxes_bad.json', 'boxes.json', 'measurements.rds')
164
-```
165
-
166
-A possible extension to this scheme comes to mind: Omit the specification of a
167
-filename, and assign a unique ID to the request instead.
168
-For example, one could calculate the SHA-1 hash of the parameters, and use it
169
-as filename.

+ 42
- 120
inst/doc/osem-serialization.html
File diff suppressed because it is too large
View File


+ 23
- 19
vignettes/osem-serialization.Rmd View File

@@ -10,7 +10,7 @@ vignette: >
10 10
 ---
11 11
 
12 12
 It may be useful to download data from openSenseMap only once.
13
-For reproducible results, the data could be saved to disk, and reloaded at a
13
+For reproducible results, the data should be saved to disk, and reloaded at a
14 14
 later point.
15 15
 
16 16
 This avoids..
@@ -21,7 +21,7 @@ This avoids..
21 21
 - stress on the openSenseMap-server.
22 22
 
23 23
 This vignette shows how to use this built in `opensensmapr` feature, and
24
-how to do it yourself, if you want to save to other data formats.
24
+how to do it yourself in case you want to save to other data formats.
25 25
 
26 26
 ```{r setup, results='hide'}
27 27
 # this vignette requires:
@@ -38,13 +38,13 @@ another request.
38 38
 
39 39
 To use this feature, just add a path to a directory to the `cache` parameter:
40 40
 ```{r cache}
41
-b = osem_boxes(cache = tempdir())
41
+b = osem_boxes(grouptag = 'ifgi', cache = tempdir())
42 42
 
43 43
 # the next identical request will hit the cache only!
44
-b = osem_boxes(cache = tempdir())
44
+b = osem_boxes(grouptag = 'ifgi', cache = tempdir())
45 45
 
46 46
 # requests without the cache parameter will still be performed normally
47
-b = osem_boxes()
47
+b = osem_boxes(grouptag = 'ifgi')
48 48
 ```
49 49
 
50 50
 Looking at the cache directory we can see one file for each request, which is identified through a hash of the request URL:
@@ -55,15 +55,15 @@ list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
55 55
 You can maintain multiple caches simultaneously which allows to only store data related to a script in the same directory:
56 56
 ```{r cache_custom}
57 57
 cacheDir = getwd() # current working directory
58
-b = osem_boxes(cache = cacheDir)
58
+b = osem_boxes(grouptag = 'ifgi', cache = cacheDir)
59 59
 
60 60
 # the next identical request will hit the cache only!
61
-b = osem_boxes(cache = cacheDir)
61
+b = osem_boxes(grouptag = 'ifgi', cache = cacheDir)
62 62
 ```
63 63
 
64 64
 To get fresh results again, just call `osem_clear_cache()` for the respective cache:
65
-```{r clearcache}
66
-osem_clear_cache() # clears default cache
65
+```{r clearcache, results='hide'}
66
+osem_clear_cache()        # clears default cache
67 67
 osem_clear_cache(getwd()) # clears a custom cache
68 68
 ```
69 69
 
@@ -73,7 +73,7 @@ here's how:
73 73
 
74 74
 ```{r data, results='hide'}
75 75
 # first get our example data:
76
-boxes = osem_boxes(grouptag = 'ifgi')
76
+measurements = osem_measurements('Windrichtung')
77 77
 ```
78 78
 
79 79
 If you are paranoid and worry about `.rds` files not being decodable anymore
@@ -81,9 +81,9 @@ in the (distant) future, you could serialize to a plain text format such as JSON
81 81
 This of course comes at the cost of storage space and performance.
82 82
 ```{r serialize_json}
83 83
 # serializing senseBoxes to JSON, and loading from file again:
84
-write(jsonlite::serializeJSON(boxes), 'boxes.json')
85
-boxes_from_file = jsonlite::unserializeJSON(readr::read_file('boxes.json'))
86
-class(boxes_from_file)
84
+write(jsonlite::serializeJSON(measurements), 'measurements.json')
85
+measurements_from_file = jsonlite::unserializeJSON(readr::read_file('measurements.json'))
86
+class(measurements_from_file)
87 87
 ```
88 88
 
89 89
 This method also persists the R object metadata (classes, attributes).
@@ -92,11 +92,15 @@ could re-apply it with the following functions:
92 92
 
93 93
 ```{r serialize_attrs}
94 94
 # note the toJSON call instead of serializeJSON
95
-write(jsonlite::toJSON(boxes), 'boxes_bad.json')
96
-boxes_without_attrs = jsonlite::fromJSON('boxes_bad.json')
97
-class(boxes_without_attrs)
95
+write(jsonlite::toJSON(measurements), 'measurements_bad.json')
96
+measurements_without_attrs = jsonlite::fromJSON('measurements_bad.json')
97
+class(measurements_without_attrs)
98 98
 
99
-boxes_with_attrs = osem_as_sensebox(boxes_without_attrs)
100
-class(boxes_with_attrs)
99
+measurements_with_attrs = osem_as_measurements(measurements_without_attrs)
100
+class(measurements_with_attrs)
101
+```
102
+The same goes for boxes via `osem_as_sensebox()`.
103
+
104
+```{r cleanup, include=FALSE}
105
+file.remove('measurements.json', 'measurements_bad.json')
101 106
 ```
102
-The same goes for measurements via `osem_as_measurements()`.

Loading…
Cancel
Save