mirror of
https://github.com/sensebox/opensensmapr
synced 2025-02-23 07:53:58 +01:00
425 lines
70 KiB
HTML
425 lines
70 KiB
HTML
![]() |
<!DOCTYPE html>
|
||
|
<html>
|
||
|
<head>
|
||
|
<meta http-equiv="Content-Type" content="text/html; charset=utf-8"/>
|
||
|
|
||
|
<title>Using openSensMapr Caching Feature</title>
|
||
|
|
||
|
<script type="text/javascript">
|
||
|
window.onload = function() {
|
||
|
var imgs = document.getElementsByTagName('img'), i, img;
|
||
|
for (i = 0; i < imgs.length; i++) {
|
||
|
img = imgs[i];
|
||
|
// center an image if it is the only element of its parent
|
||
|
if (img.parentElement.childElementCount === 1)
|
||
|
img.parentElement.style.textAlign = 'center';
|
||
|
}
|
||
|
};
|
||
|
</script>
|
||
|
|
||
|
<!-- Styles for R syntax highlighter -->
|
||
|
<style type="text/css">
|
||
|
pre .operator,
|
||
|
pre .paren {
|
||
|
color: rgb(104, 118, 135)
|
||
|
}
|
||
|
|
||
|
pre .literal {
|
||
|
color: #990073
|
||
|
}
|
||
|
|
||
|
pre .number {
|
||
|
color: #099;
|
||
|
}
|
||
|
|
||
|
pre .comment {
|
||
|
color: #998;
|
||
|
font-style: italic
|
||
|
}
|
||
|
|
||
|
pre .keyword {
|
||
|
color: #900;
|
||
|
font-weight: bold
|
||
|
}
|
||
|
|
||
|
pre .identifier {
|
||
|
color: rgb(0, 0, 0);
|
||
|
}
|
||
|
|
||
|
pre .string {
|
||
|
color: #d14;
|
||
|
}
|
||
|
</style>
|
||
|
|
||
|
<!-- R syntax highlighter -->
|
||
|
<script type="text/javascript">
|
||
|
var hljs=new function(){function m(p){return p.replace(/&/gm,"&").replace(/</gm,"<")}function f(r,q,p){return RegExp(q,"m"+(r.cI?"i":"")+(p?"g":""))}function b(r){for(var p=0;p<r.childNodes.length;p++){var q=r.childNodes[p];if(q.nodeName=="CODE"){return q}if(!(q.nodeType==3&&q.nodeValue.match(/\s+/))){break}}}function h(t,s){var p="";for(var r=0;r<t.childNodes.length;r++){if(t.childNodes[r].nodeType==3){var q=t.childNodes[r].nodeValue;if(s){q=q.replace(/\n/g,"")}p+=q}else{if(t.childNodes[r].nodeName=="BR"){p+="\n"}else{p+=h(t.childNodes[r])}}}if(/MSIE [678]/.test(navigator.userAgent)){p=p.replace(/\r/g,"\n")}return p}function a(s){var r=s.className.split(/\s+/);r=r.concat(s.parentNode.className.split(/\s+/));for(var q=0;q<r.length;q++){var p=r[q].replace(/^language-/,"");if(e[p]){return p}}}function c(q){var p=[];(function(s,t){for(var r=0;r<s.childNodes.length;r++){if(s.childNodes[r].nodeType==3){t+=s.childNodes[r].nodeValue.length}else{if(s.childNodes[r].nodeName=="BR"){t+=1}else{if(s.childNodes[r].nodeType==1){p.push({event:"start",offset:t,node:s.childNodes[r]});t=arguments.callee(s.childNodes[r],t);p.push({event:"stop",offset:t,node:s.childNodes[r]})}}}}return t})(q,0);return p}function k(y,w,x){var q=0;var z="";var s=[];function u(){if(y.length&&w.length){if(y[0].offset!=w[0].offset){return(y[0].offset<w[0].offset)?y:w}else{return w[0].event=="start"?y:w}}else{return y.length?y:w}}function t(D){var A="<"+D.nodeName.toLowerCase();for(var B=0;B<D.attributes.length;B++){var C=D.attributes[B];A+=" "+C.nodeName.toLowerCase();if(C.value!==undefined&&C.value!==false&&C.value!==null){A+='="'+m(C.value)+'"'}}return A+">"}while(y.length||w.length){var v=u().splice(0,1)[0];z+=m(x.substr(q,v.offset-q));q=v.offset;if(v.event=="start"){z+=t(v.node);s.push(v.node)}else{if(v.event=="stop"){var p,r=s.length;do{r--;p=s[r];z+=("</"+p.nodeName.toLowerCase()+">")}while(p!=v.node);s.splice(r,1);while(r<s.length){z+=t(s[r]);r++}}}}return z+m(x.substr(q))}function j(){function q(x,y,v){if(x.compiled){return}var u;var s=[];if(x.k){x.lR=f(y,x.l||hljs.IR,true);for(var w in x.k){if(!x.k.hasOwnProperty(w)){continue}if(x.k[w] instanceof Object){u=x.k[w]}else{u=x.k;w="keyword"}for(var r in u){if(!u.hasOwnProperty(r)){continue}x.k[r]=[w,u[r]];s.push(r)}}}if(!v){if(x.bWK){x.b="\\b("+s.join("|")+")\\s"}x.bR=f(y,x.b?x.b:"\\B|\\b");if(!x.e&&!x.eW){x.e="\\B|\\b"}if(x.e){x.eR=f(y,x.e)}}if(x.i){x.iR=f(y,x.i)}if(x.r===undefined){x.r=1}if(!x.c){x.c=[]}x.compiled=true;for(var t=0;t<x.c.length;t++){if(x.c[t]=="self"){x.c[t]=x}q(x.c[t],y,false)}if(x.starts){q(x.starts,y,false)}}for(var p in e){if(!e.hasOwnProperty(p)){continue}q(e[p].dM,e[p],true)}}function d(B,C){if(!j.called){j();j.called=true}function q(r,M){for(var L=0;L<M.c.length;L++){if((M.c[L].bR.exec(r)||[null])[0]==r){return M.c[L]}}}function v(L,r){if(D[L].e&&D[L].eR.test(r)){return 1}if(D[L].eW){var M=v(L-1,r);return M?M+1:0}return 0}function w(r,L){return L.i&&L.iR.test(r)}function K(N,O){var M=[];for(var L=0;L<N.c.length;L++){M.push(N.c[L].b)}var r=D.length-1;do{if(D[r].e){M.push(D[r].e)}r--}while(D[r+1].eW);if(N.i){M.push(N.i)}return f(O,M.join("|"),true)}function p(M,L){var N=D[D.length-1];if(!N.t){N.t=K(N,E)}N.t.lastIndex=L;var r=N.t.exec(M);return r?[M.substr(L,r.index-L),r[0],false]:[M.substr(L),"",true]}function z(N,r){var L=E.cI?r[0].toLowerCase():r[0];var M=N.k[L];if(M&&M instanceof Array){return M}return false}function F(L,P){L=m(L);if(!P.k){return L}var r="";var O=0;P.lR.lastIndex=0;var M=P.lR.exec(L);while(M){r+=L.substr(O,M.index-O);var N=z(P,M);if(N){x+=N[1];r+='<span class="'+N[0]+'">'+M[0]+"</span>"}else{r+=M[0]}O=P.lR.lastIndex;M=P.lR.exec(L)}return r+L.substr(O,L.length-O)}function J(L,M){if(M.sL&&e[M.sL]){var r=d(M.sL,L);x+=r.keyword_count;return r.value}else{return F(L,M)}}function I(M,r){var L=M.cN?'<span class="'+M.cN+'">':"";if(M.rB){y+=L;M.buffer=""}else{if(M.eB){y+=m(r)+L;M.buffer=""}else{y+=L;M.buffer=r}}D.push(M);A+=M.r}function G(N,M,Q){var R=D[D.length-1];if(Q){y+=J(R.buffer+N,R);return false}var P=q(M,R);if(P){y+=J(R.buffer+N,R);I(P,M);return P.rB}var L=v(D.
|
||
|
hljs.initHighlightingOnLoad();
|
||
|
</script>
|
||
|
|
||
|
|
||
|
|
||
|
<style type="text/css">
|
||
|
body, td {
|
||
|
font-family: sans-serif;
|
||
|
background-color: white;
|
||
|
font-size: 13px;
|
||
|
}
|
||
|
|
||
|
body {
|
||
|
max-width: 800px;
|
||
|
margin: auto;
|
||
|
padding: 1em;
|
||
|
line-height: 20px;
|
||
|
}
|
||
|
|
||
|
tt, code, pre {
|
||
|
font-family: 'DejaVu Sans Mono', 'Droid Sans Mono', 'Lucida Console', Consolas, Monaco, monospace;
|
||
|
}
|
||
|
|
||
|
h1 {
|
||
|
font-size:2.2em;
|
||
|
}
|
||
|
|
||
|
h2 {
|
||
|
font-size:1.8em;
|
||
|
}
|
||
|
|
||
|
h3 {
|
||
|
font-size:1.4em;
|
||
|
}
|
||
|
|
||
|
h4 {
|
||
|
font-size:1.0em;
|
||
|
}
|
||
|
|
||
|
h5 {
|
||
|
font-size:0.9em;
|
||
|
}
|
||
|
|
||
|
h6 {
|
||
|
font-size:0.8em;
|
||
|
}
|
||
|
|
||
|
a:visited {
|
||
|
color: rgb(50%, 0%, 50%);
|
||
|
}
|
||
|
|
||
|
pre, img {
|
||
|
max-width: 100%;
|
||
|
}
|
||
|
pre {
|
||
|
overflow-x: auto;
|
||
|
}
|
||
|
pre code {
|
||
|
display: block; padding: 0.5em;
|
||
|
}
|
||
|
|
||
|
code {
|
||
|
font-size: 92%;
|
||
|
border: 1px solid #ccc;
|
||
|
}
|
||
|
|
||
|
code[class] {
|
||
|
background-color: #F8F8F8;
|
||
|
}
|
||
|
|
||
|
table, td, th {
|
||
|
border: none;
|
||
|
}
|
||
|
|
||
|
blockquote {
|
||
|
color:#666666;
|
||
|
margin:0;
|
||
|
padding-left: 1em;
|
||
|
border-left: 0.5em #EEE solid;
|
||
|
}
|
||
|
|
||
|
hr {
|
||
|
height: 0px;
|
||
|
border-bottom: none;
|
||
|
border-top-width: thin;
|
||
|
border-top-style: dotted;
|
||
|
border-top-color: #999999;
|
||
|
}
|
||
|
|
||
|
@media print {
|
||
|
* {
|
||
|
background: transparent !important;
|
||
|
color: black !important;
|
||
|
filter:none !important;
|
||
|
-ms-filter: none !important;
|
||
|
}
|
||
|
|
||
|
body {
|
||
|
font-size:12pt;
|
||
|
max-width:100%;
|
||
|
}
|
||
|
|
||
|
a, a:visited {
|
||
|
text-decoration: underline;
|
||
|
}
|
||
|
|
||
|
hr {
|
||
|
visibility: hidden;
|
||
|
page-break-before: always;
|
||
|
}
|
||
|
|
||
|
pre, blockquote {
|
||
|
padding-right: 1em;
|
||
|
page-break-inside: avoid;
|
||
|
}
|
||
|
|
||
|
tr, img {
|
||
|
page-break-inside: avoid;
|
||
|
}
|
||
|
|
||
|
img {
|
||
|
max-width: 100% !important;
|
||
|
}
|
||
|
|
||
|
@page :left {
|
||
|
margin: 15mm 20mm 15mm 10mm;
|
||
|
}
|
||
|
|
||
|
@page :right {
|
||
|
margin: 15mm 10mm 15mm 20mm;
|
||
|
}
|
||
|
|
||
|
p, h2, h3 {
|
||
|
orphans: 3; widows: 3;
|
||
|
}
|
||
|
|
||
|
h2, h3 {
|
||
|
page-break-after: avoid;
|
||
|
}
|
||
|
}
|
||
|
</style>
|
||
|
|
||
|
|
||
|
|
||
|
</head>
|
||
|
|
||
|
<body>
|
||
|
<p>It may be useful to download data from openSenseMap only once.
|
||
|
For reproducible results, the data could be saved to disk, and reloaded at a
|
||
|
later point.</p>
|
||
|
|
||
|
<p>This avoids..</p>
|
||
|
|
||
|
<ul>
|
||
|
<li>changed results for queries without date parameters,</li>
|
||
|
<li>unnecessary wait times,</li>
|
||
|
<li>risk of API changes / API unavailability,</li>
|
||
|
<li>stress on the openSenseMap-server.</li>
|
||
|
</ul>
|
||
|
|
||
|
<p>This vignette shows how to use this built in <code>opensensmapr</code> feature, and
|
||
|
how to do it yourself, if you want to store to other data formats.</p>
|
||
|
|
||
|
<h2>Using openSensMapr Caching Feature</h2>
|
||
|
|
||
|
<p>All data retrieval functions of <code>opensensmapr</code> have a built in caching feature,
|
||
|
which serializes an API response to disk.
|
||
|
Subsequent identical requests will then return the serialized data instead of making
|
||
|
another request.
|
||
|
To do so, each request is given a unique ID based on its parameters.</p>
|
||
|
|
||
|
<p>To use this feature, just add a path to a directory to the <code>cache</code> parameter:</p>
|
||
|
|
||
|
<pre><code class="r">b = osem_boxes(cache = tempdir())
|
||
|
list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
|
||
|
</code></pre>
|
||
|
|
||
|
<pre><code>## [1] "osemcache.c54710f66b662e29dd86b089962b0f598e47eddb.rds"
|
||
|
</code></pre>
|
||
|
|
||
|
<pre><code class="r"># the next identical request will hit the cache only!
|
||
|
b = osem_boxes(cache = tempdir())
|
||
|
|
||
|
# requests without the cache parameter will still be performed normally
|
||
|
b = osem_boxes()
|
||
|
</code></pre>
|
||
|
|
||
|
<p>You can maintain multiple caches simultaneously which allows to store only
|
||
|
serialized data related to a script in its directory:</p>
|
||
|
|
||
|
<pre><code class="r">cacheDir = getwd() # current working directory
|
||
|
b = osem_boxes(cache = cacheDir)
|
||
|
|
||
|
# the next identical request will hit the cache only!
|
||
|
b = osem_boxes(cache = cacheDir)
|
||
|
</code></pre>
|
||
|
|
||
|
<p>To get fresh results again, just call <code>osem_clear_cache()</code> for the respective cache:</p>
|
||
|
|
||
|
<pre><code class="r">osem_clear_cache() # clears default cache
|
||
|
</code></pre>
|
||
|
|
||
|
<pre><code>## [1] TRUE
|
||
|
</code></pre>
|
||
|
|
||
|
<pre><code class="r">osem_clear_cache(getwd()) # clears a custom cache
|
||
|
</code></pre>
|
||
|
|
||
|
<pre><code>## [1] TRUE
|
||
|
</code></pre>
|
||
|
|
||
|
<h2>Custom (De-) Serialization</h2>
|
||
|
|
||
|
<p>If you want to roll your own serialization method to support custom data formats,
|
||
|
here's how:</p>
|
||
|
|
||
|
<pre><code class="r"># this section requires:
|
||
|
library(opensensmapr)
|
||
|
library(jsonlite)
|
||
|
library(readr)
|
||
|
|
||
|
# first get our example data:
|
||
|
boxes = osem_boxes(grouptag = 'ifgi')
|
||
|
measurements = osem_measurements(boxes, phenomenon = 'PM10')
|
||
|
</code></pre>
|
||
|
|
||
|
<p>If you are paranoid and worry about <code>.rds</code> files not being decodable anymore
|
||
|
in the (distant) future, you could serialize to a plain text format such as JSON.
|
||
|
This of course comes at the cost of storage space and performance.</p>
|
||
|
|
||
|
<pre><code class="r"># serializing senseBoxes to JSON, and loading from file again:
|
||
|
write(jsonlite::serializeJSON(measurements), 'boxes.json')
|
||
|
boxes_from_file = jsonlite::unserializeJSON(readr::read_file('boxes.json'))
|
||
|
</code></pre>
|
||
|
|
||
|
<p>Both methods also persist the R object metadata (classes, attributes).
|
||
|
If you were to use a serialization method that can't persist object metadata, you
|
||
|
could re-apply it with the following functions:</p>
|
||
|
|
||
|
<pre><code class="r"># note the toJSON call
|
||
|
write(jsonlite::toJSON(measurements), 'boxes_bad.json')
|
||
|
boxes_without_attrs = jsonlite::fromJSON('boxes_bad.json')
|
||
|
|
||
|
boxes_with_attrs = osem_as_sensebox(boxes_without_attrs)
|
||
|
class(boxes_with_attrs)
|
||
|
</code></pre>
|
||
|
|
||
|
<pre><code>## [1] "sensebox" "data.frame"
|
||
|
</code></pre>
|
||
|
|
||
|
<p>The same goes for measurements via <code>osem_as_measurements()</code>.</p>
|
||
|
|
||
|
<h2>Workflow for reproducible code</h2>
|
||
|
|
||
|
<p>For truly reproducible code you want it to work and return the same results –
|
||
|
no matter if you run it the first time or a consecutive time, and without making
|
||
|
changes to it.</p>
|
||
|
|
||
|
<p>Therefore we need a wrapper around the save-to-file & load-from-file logic.
|
||
|
The following examples show a way to do just that, and where inspired by
|
||
|
<a href="https://github.com/nuest/sensebox-binder">this reproducible analysis by Daniel Nuest</a>.</p>
|
||
|
|
||
|
<pre><code class="r"># offline logic
|
||
|
osem_offline = function (func, file, format='rds', ...) {
|
||
|
# deserialize if file exists, otherwise download and serialize
|
||
|
if (file.exists(file)) {
|
||
|
if (format == 'json')
|
||
|
jsonlite::unserializeJSON(readr::read_file(file))
|
||
|
else
|
||
|
readRDS(file)
|
||
|
} else {
|
||
|
data = func(...)
|
||
|
if (format == 'json')
|
||
|
write(jsonlite::serializeJSON(data), file = file)
|
||
|
else
|
||
|
saveRDS(data, file)
|
||
|
data
|
||
|
}
|
||
|
}
|
||
|
|
||
|
# wrappers for each download function
|
||
|
osem_measurements_offline = function (file, ...) {
|
||
|
osem_offline(opensensmapr::osem_measurements, file, ...)
|
||
|
}
|
||
|
osem_boxes_offline = function (file, ...) {
|
||
|
osem_offline(opensensmapr::osem_boxes, file, ...)
|
||
|
}
|
||
|
osem_box_offline = function (file, ...) {
|
||
|
osem_offline(opensensmapr::osem_box, file, ...)
|
||
|
}
|
||
|
osem_counts_offline = function (file, ...) {
|
||
|
osem_offline(opensensmapr::osem_counts, file, ...)
|
||
|
}
|
||
|
</code></pre>
|
||
|
|
||
|
<p>Thats it! Now let's try it out:</p>
|
||
|
|
||
|
<pre><code class="r"># first run; will download and save to disk
|
||
|
b1 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
|
||
|
|
||
|
# consecutive runs; will read from disk
|
||
|
b2 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
|
||
|
class(b1) == class(b2)
|
||
|
</code></pre>
|
||
|
|
||
|
<pre><code>## [1] TRUE TRUE
|
||
|
</code></pre>
|
||
|
|
||
|
<pre><code class="r"># we can even omit the arguments now (though thats not really the point here)
|
||
|
b3 = osem_boxes_offline('mobileboxes.rds')
|
||
|
nrow(b1) == nrow(b3)
|
||
|
</code></pre>
|
||
|
|
||
|
<pre><code>## [1] TRUE
|
||
|
</code></pre>
|
||
|
|
||
|
<pre><code class="r"># verify that the custom sensebox methods are still working
|
||
|
summary(b2)
|
||
|
</code></pre>
|
||
|
|
||
|
<pre><code>## boxes total: 55
|
||
|
##
|
||
|
## boxes by exposure:
|
||
|
## mobile
|
||
|
## 55
|
||
|
##
|
||
|
## boxes by model:
|
||
|
## custom homeEthernet homeWifi
|
||
|
## 7 2 8
|
||
|
## homeWifiFeinstaub luftdaten_pms5003_bme280 luftdaten_sds011_bme280
|
||
|
## 6 2 9
|
||
|
## luftdaten_sds011_dht11 luftdaten_sds011_dht22
|
||
|
## 1 20
|
||
|
##
|
||
|
## $last_measurement_within
|
||
|
## 1h 1d 30d 365d never
|
||
|
## 16 18 24 43 12
|
||
|
##
|
||
|
## oldest box: 2017-05-24 08:16:36 (Feinstaub Hauptstrasse Steampunk-Design)
|
||
|
## newest box: 2018-05-24 07:08:32 (Josi Test)
|
||
|
##
|
||
|
## sensors per box:
|
||
|
## Min. 1st Qu. Median Mean 3rd Qu. Max.
|
||
|
## 1.000 4.000 4.000 4.618 5.000 22.000
|
||
|
</code></pre>
|
||
|
|
||
|
<pre><code class="r">plot(b3)
|
||
|
</code></pre>
|
||
|
|
||
|
<p><img src="
|
||
|
|
||
|
<p>To re-download the data, just clear the files that were created in the process:</p>
|
||
|
|
||
|
<pre><code class="r">file.remove('mobileboxes.rds', 'boxes_bad.json', 'boxes.json', 'measurements.rds')
|
||
|
</code></pre>
|
||
|
|
||
|
<pre><code>## Warning in file.remove("mobileboxes.rds", "boxes_bad.json", "boxes.json", :
|
||
|
## cannot remove file 'measurements.rds', reason 'No such file or directory'
|
||
|
</code></pre>
|
||
|
|
||
|
<p>A possible extension to this scheme comes to mind: Omit the specification of a
|
||
|
filename, and assign a unique ID to the request instead.
|
||
|
For example, one could calculate the SHA-1 hash of the parameters, and use it
|
||
|
as filename.</p>
|
||
|
|
||
|
</body>
|
||
|
|
||
|
</html>
|