update docs + vignette builds
父節點
e49ae4bb50
當前提交
553772d209
檔案差異因為一行或多行太長而無法顯示
@ -0,0 +1,96 @@
|
||||
## ----cache---------------------------------------------------------------
|
||||
b = osem_boxes(cache = tempdir())
|
||||
list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
|
||||
|
||||
# the next identical request will hit the cache only!
|
||||
b = osem_boxes(cache = tempdir())
|
||||
|
||||
# requests without the cache parameter will still be performed normally
|
||||
b = osem_boxes()
|
||||
|
||||
## ----cache_custom--------------------------------------------------------
|
||||
cacheDir = getwd() # current working directory
|
||||
b = osem_boxes(cache = cacheDir)
|
||||
|
||||
# the next identical request will hit the cache only!
|
||||
b = osem_boxes(cache = cacheDir)
|
||||
|
||||
## ----clearcache----------------------------------------------------------
|
||||
osem_clear_cache() # clears default cache
|
||||
osem_clear_cache(getwd()) # clears a custom cache
|
||||
|
||||
## ----setup, results='hide'-----------------------------------------------
|
||||
# this section requires:
|
||||
library(opensensmapr)
|
||||
library(jsonlite)
|
||||
library(readr)
|
||||
|
||||
# first get our example data:
|
||||
boxes = osem_boxes(grouptag = 'ifgi')
|
||||
measurements = osem_measurements(boxes, phenomenon = 'PM10')
|
||||
|
||||
## ----serialize_json------------------------------------------------------
|
||||
# serializing senseBoxes to JSON, and loading from file again:
|
||||
write(jsonlite::serializeJSON(measurements), 'boxes.json')
|
||||
boxes_from_file = jsonlite::unserializeJSON(readr::read_file('boxes.json'))
|
||||
|
||||
## ----serialize_attrs-----------------------------------------------------
|
||||
# note the toJSON call
|
||||
write(jsonlite::toJSON(measurements), 'boxes_bad.json')
|
||||
boxes_without_attrs = jsonlite::fromJSON('boxes_bad.json')
|
||||
|
||||
boxes_with_attrs = osem_as_sensebox(boxes_without_attrs)
|
||||
class(boxes_with_attrs)
|
||||
|
||||
## ----osem_offline--------------------------------------------------------
|
||||
# offline logic
|
||||
osem_offline = function (func, file, format='rds', ...) {
|
||||
# deserialize if file exists, otherwise download and serialize
|
||||
if (file.exists(file)) {
|
||||
if (format == 'json')
|
||||
jsonlite::unserializeJSON(readr::read_file(file))
|
||||
else
|
||||
readRDS(file)
|
||||
} else {
|
||||
data = func(...)
|
||||
if (format == 'json')
|
||||
write(jsonlite::serializeJSON(data), file = file)
|
||||
else
|
||||
saveRDS(data, file)
|
||||
data
|
||||
}
|
||||
}
|
||||
|
||||
# wrappers for each download function
|
||||
osem_measurements_offline = function (file, ...) {
|
||||
osem_offline(opensensmapr::osem_measurements, file, ...)
|
||||
}
|
||||
osem_boxes_offline = function (file, ...) {
|
||||
osem_offline(opensensmapr::osem_boxes, file, ...)
|
||||
}
|
||||
osem_box_offline = function (file, ...) {
|
||||
osem_offline(opensensmapr::osem_box, file, ...)
|
||||
}
|
||||
osem_counts_offline = function (file, ...) {
|
||||
osem_offline(opensensmapr::osem_counts, file, ...)
|
||||
}
|
||||
|
||||
## ----test----------------------------------------------------------------
|
||||
# first run; will download and save to disk
|
||||
b1 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
|
||||
|
||||
# consecutive runs; will read from disk
|
||||
b2 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
|
||||
class(b1) == class(b2)
|
||||
|
||||
# we can even omit the arguments now (though thats not really the point here)
|
||||
b3 = osem_boxes_offline('mobileboxes.rds')
|
||||
nrow(b1) == nrow(b3)
|
||||
|
||||
# verify that the custom sensebox methods are still working
|
||||
summary(b2)
|
||||
plot(b3)
|
||||
|
||||
## ----cleanup, results='hide'---------------------------------------------
|
||||
file.remove('mobileboxes.rds', 'boxes_bad.json', 'boxes.json', 'measurements.rds')
|
||||
|
@ -0,0 +1,169 @@
|
||||
---
|
||||
title: "opensensmapr reproducibility: Loading openSenseMap Data from Files"
|
||||
author: "Norwin Roosen"
|
||||
date: "`r Sys.Date()`"
|
||||
output: rmarkdown::html_vignette
|
||||
vignette: >
|
||||
%\VignetteIndexEntry{opensensmapr reproducibility: Loading openSenseMap Data from Files}
|
||||
%\VignetteEngine{knitr::rmarkdown}
|
||||
%\VignetteEncoding{UTF-8}
|
||||
---
|
||||
|
||||
It may be useful to download data from openSenseMap only once.
|
||||
For reproducible results, the data could be saved to disk, and reloaded at a
|
||||
later point.
|
||||
|
||||
This avoids..
|
||||
|
||||
- changed results for queries without date parameters,
|
||||
- unnecessary wait times,
|
||||
- risk of API changes / API unavailability,
|
||||
- stress on the openSenseMap-server.
|
||||
|
||||
This vignette shows how to use this built in `opensensmapr` feature, and
|
||||
how to do it yourself, if you want to store to other data formats.
|
||||
|
||||
## Using openSensMapr Caching Feature
|
||||
All data retrieval functions of `opensensmapr` have a built in caching feature,
|
||||
which serializes an API response to disk.
|
||||
Subsequent identical requests will then return the serialized data instead of making
|
||||
another request.
|
||||
To do so, each request is given a unique ID based on its parameters.
|
||||
|
||||
To use this feature, just add a path to a directory to the `cache` parameter:
|
||||
```{r cache}
|
||||
b = osem_boxes(cache = tempdir())
|
||||
list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
|
||||
|
||||
# the next identical request will hit the cache only!
|
||||
b = osem_boxes(cache = tempdir())
|
||||
|
||||
# requests without the cache parameter will still be performed normally
|
||||
b = osem_boxes()
|
||||
```
|
||||
|
||||
You can maintain multiple caches simultaneously which allows to store only
|
||||
serialized data related to a script in its directory:
|
||||
```{r cache_custom}
|
||||
cacheDir = getwd() # current working directory
|
||||
b = osem_boxes(cache = cacheDir)
|
||||
|
||||
# the next identical request will hit the cache only!
|
||||
b = osem_boxes(cache = cacheDir)
|
||||
```
|
||||
|
||||
To get fresh results again, just call `osem_clear_cache()` for the respective cache:
|
||||
```{r clearcache}
|
||||
osem_clear_cache() # clears default cache
|
||||
osem_clear_cache(getwd()) # clears a custom cache
|
||||
```
|
||||
|
||||
## Custom (De-) Serialization
|
||||
If you want to roll your own serialization method to support custom data formats,
|
||||
here's how:
|
||||
|
||||
```{r setup, results='hide'}
|
||||
# this section requires:
|
||||
library(opensensmapr)
|
||||
library(jsonlite)
|
||||
library(readr)
|
||||
|
||||
# first get our example data:
|
||||
boxes = osem_boxes(grouptag = 'ifgi')
|
||||
measurements = osem_measurements(boxes, phenomenon = 'PM10')
|
||||
```
|
||||
|
||||
If you are paranoid and worry about `.rds` files not being decodable anymore
|
||||
in the (distant) future, you could serialize to a plain text format such as JSON.
|
||||
This of course comes at the cost of storage space and performance.
|
||||
```{r serialize_json}
|
||||
# serializing senseBoxes to JSON, and loading from file again:
|
||||
write(jsonlite::serializeJSON(measurements), 'boxes.json')
|
||||
boxes_from_file = jsonlite::unserializeJSON(readr::read_file('boxes.json'))
|
||||
```
|
||||
|
||||
Both methods also persist the R object metadata (classes, attributes).
|
||||
If you were to use a serialization method that can't persist object metadata, you
|
||||
could re-apply it with the following functions:
|
||||
|
||||
```{r serialize_attrs}
|
||||
# note the toJSON call
|
||||
write(jsonlite::toJSON(measurements), 'boxes_bad.json')
|
||||
boxes_without_attrs = jsonlite::fromJSON('boxes_bad.json')
|
||||
|
||||
boxes_with_attrs = osem_as_sensebox(boxes_without_attrs)
|
||||
class(boxes_with_attrs)
|
||||
```
|
||||
The same goes for measurements via `osem_as_measurements()`.
|
||||
|
||||
## Workflow for reproducible code
|
||||
For truly reproducible code you want it to work and return the same results --
|
||||
no matter if you run it the first time or a consecutive time, and without making
|
||||
changes to it.
|
||||
|
||||
Therefore we need a wrapper around the save-to-file & load-from-file logic.
|
||||
The following examples show a way to do just that, and where inspired by
|
||||
[this reproducible analysis by Daniel Nuest](https://github.com/nuest/sensebox-binder).
|
||||
|
||||
```{r osem_offline}
|
||||
# offline logic
|
||||
osem_offline = function (func, file, format='rds', ...) {
|
||||
# deserialize if file exists, otherwise download and serialize
|
||||
if (file.exists(file)) {
|
||||
if (format == 'json')
|
||||
jsonlite::unserializeJSON(readr::read_file(file))
|
||||
else
|
||||
readRDS(file)
|
||||
} else {
|
||||
data = func(...)
|
||||
if (format == 'json')
|
||||
write(jsonlite::serializeJSON(data), file = file)
|
||||
else
|
||||
saveRDS(data, file)
|
||||
data
|
||||
}
|
||||
}
|
||||
|
||||
# wrappers for each download function
|
||||
osem_measurements_offline = function (file, ...) {
|
||||
osem_offline(opensensmapr::osem_measurements, file, ...)
|
||||
}
|
||||
osem_boxes_offline = function (file, ...) {
|
||||
osem_offline(opensensmapr::osem_boxes, file, ...)
|
||||
}
|
||||
osem_box_offline = function (file, ...) {
|
||||
osem_offline(opensensmapr::osem_box, file, ...)
|
||||
}
|
||||
osem_counts_offline = function (file, ...) {
|
||||
osem_offline(opensensmapr::osem_counts, file, ...)
|
||||
}
|
||||
```
|
||||
|
||||
Thats it! Now let's try it out:
|
||||
|
||||
```{r test}
|
||||
# first run; will download and save to disk
|
||||
b1 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
|
||||
|
||||
# consecutive runs; will read from disk
|
||||
b2 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
|
||||
class(b1) == class(b2)
|
||||
|
||||
# we can even omit the arguments now (though thats not really the point here)
|
||||
b3 = osem_boxes_offline('mobileboxes.rds')
|
||||
nrow(b1) == nrow(b3)
|
||||
|
||||
# verify that the custom sensebox methods are still working
|
||||
summary(b2)
|
||||
plot(b3)
|
||||
```
|
||||
|
||||
To re-download the data, just clear the files that were created in the process:
|
||||
```{r cleanup, results='hide'}
|
||||
file.remove('mobileboxes.rds', 'boxes_bad.json', 'boxes.json', 'measurements.rds')
|
||||
```
|
||||
|
||||
A possible extension to this scheme comes to mind: Omit the specification of a
|
||||
filename, and assign a unique ID to the request instead.
|
||||
For example, one could calculate the SHA-1 hash of the parameters, and use it
|
||||
as filename.
|
檔案差異因為一行或多行太長而無法顯示
@ -0,0 +1,28 @@
|
||||
% Generated by roxygen2: do not edit by hand
|
||||
% Please edit documentation in R/api.R
|
||||
\name{osem_clear_cache}
|
||||
\alias{osem_clear_cache}
|
||||
\title{Purge cached responses from the given cache directory}
|
||||
\usage{
|
||||
osem_clear_cache(location = tempdir())
|
||||
}
|
||||
\arguments{
|
||||
\item{location}{A path to the cache directory, defaults to the
|
||||
sessions' \code{tempdir()}}
|
||||
}
|
||||
\value{
|
||||
Boolean whether the deletion was successful
|
||||
}
|
||||
\description{
|
||||
Purge cached responses from the given cache directory
|
||||
}
|
||||
\examples{
|
||||
\donttest{
|
||||
osem_boxes(cache = tempdir())
|
||||
osem_clear_cache()
|
||||
|
||||
cachedir = paste(getwd(), 'osemcache', sep = '/')
|
||||
osem_boxes(cache = cachedir)
|
||||
osem_clear_cache(cachedir)
|
||||
}
|
||||
}
|
載入中…
新增問題並參考