<!DOCTYPE html>
< html >
< head >
< meta charset = "utf-8" / >
< meta name = "generator" content = "pandoc" / >
< meta http-equiv = "X-UA-Compatible" content = "IE=EDGE" / >
< meta name = "viewport" content = "width=device-width, initial-scale=1" / >
< meta name = "author" content = "Norwin Roosen" / >
< meta name = "date" content = "2023-03-08" / >
< title > Caching openSenseMap Data for Reproducibility< / title >
< script > / / P a n d o c 2 . 9 a d d s a t t r i b u t e s o n b o t h h e a d e r a n d d i v . W e r e m o v e t h e f o r m e r ( t o
// be compatible with the behavior of Pandoc < 2.8 ) .
document.addEventListener('DOMContentLoaded', function(e) {
var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
var i, h, a;
for (i = 0; i < hs.length ; i + + ) {
h = hs[i];
if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
a = h.attributes;
while (a.length > 0) h.removeAttribute(a[0].name);
}
});
< / script >
< style type = "text/css" >
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
< / style >
< style type = "text/css" >
code {
white-space: pre;
}
.sourceCode {
overflow: visible;
}
< / style >
< style type = "text/css" data-origin = "pandoc" >
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; }
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.at { color: #7d9029; }
code span.bn { color: #40a070; }
code span.bu { color: #008000; }
code span.cf { color: #007020; font-weight: bold; }
code span.ch { color: #4070a0; }
code span.cn { color: #880000; }
code span.co { color: #60a0b0; font-style: italic; }
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.do { color: #ba2121; font-style: italic; }
code span.dt { color: #902000; }
code span.dv { color: #40a070; }
code span.er { color: #ff0000; font-weight: bold; }
code span.ex { }
code span.fl { color: #40a070; }
code span.fu { color: #06287e; }
code span.im { color: #008000; font-weight: bold; }
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.kw { color: #007020; font-weight: bold; }
code span.op { color: #666666; }
code span.ot { color: #007020; }
code span.pp { color: #bc7a00; }
code span.sc { color: #4070a0; }
code span.ss { color: #bb6688; }
code span.st { color: #4070a0; }
code span.va { color: #19177c; }
code span.vs { color: #4070a0; }
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; }
< / style >
< script >
// apply pandoc div.sourceCode style to pre.sourceCode instead
(function() {
var sheets = document.styleSheets;
for (var i = 0; i < sheets.length ; i + + ) {
if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
try { var rules = sheets[i].cssRules; } catch (e) { continue; }
var j = 0;
while (j < rules.length ) {
var rule = rules[j];
// check if there is a div.sourceCode rule
if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") {
j++;
continue;
}
var style = rule.style.cssText;
// check if color or background-color is set
if (rule.style.color === '' & & rule.style.backgroundColor === '') {
j++;
continue;
}
// replace div.sourceCode by a pre.sourceCode rule
sheets[i].deleteRule(j);
sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
}
}
})();
< / script >
< style type = "text/css" > b o d y {
background-color: #fff;
margin: 1em auto;
max-width: 700px;
overflow: visible;
padding-left: 2em;
padding-right: 2em;
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 14px;
line-height: 1.35;
}
#TOC {
clear: both;
margin: 0 0 10px 10px;
padding: 4px;
width: 400px;
border: 1px solid #CCCCCC;
border-radius: 5px;
background-color: #f6f6f6;
font-size: 13px;
line-height: 1.3;
}
#TOC .toctitle {
font-weight: bold;
font-size: 15px;
margin-left: 5px;
}
#TOC ul {
padding-left: 40px;
margin-left: -1.5em;
margin-top: 5px;
margin-bottom: 5px;
}
#TOC ul ul {
margin-left: -2em;
}
#TOC li {
line-height: 16px;
}
table {
margin: 1em auto;
border-width: 1px;
border-color: #DDDDDD;
border-style: outset;
border-collapse: collapse;
}
table th {
border-width: 2px;
padding: 5px;
border-style: inset;
}
table td {
border-width: 1px;
border-style: inset;
line-height: 18px;
padding: 5px 5px;
}
table, table th, table td {
border-left-style: none;
border-right-style: none;
}
table thead, table tr.even {
background-color: #f7f7f7;
}
p {
margin: 0.5em 0;
}
blockquote {
background-color: #f6f6f6;
padding: 0.25em 0.75em;
}
hr {
border-style: solid;
border: none;
border-top: 1px solid #777;
margin: 28px 0;
}
dl {
margin-left: 0;
}
dl dd {
margin-bottom: 13px;
margin-left: 13px;
}
dl dt {
font-weight: bold;
}
ul {
margin-top: 0;
}
ul li {
list-style: circle outside;
}
ul ul {
margin-bottom: 0;
}
pre, code {
background-color: #f7f7f7;
border-radius: 3px;
color: #333;
white-space: pre-wrap;
}
pre {
border-radius: 3px;
margin: 5px 0px 10px 0px;
padding: 10px;
}
pre:not([class]) {
background-color: #f7f7f7;
}
code {
font-family: Consolas, Monaco, 'Courier New', monospace;
font-size: 85%;
}
p > code, li > code {
padding: 2px 0px;
}
div.figure {
text-align: center;
}
img {
background-color: #FFFFFF;
padding: 2px;
border: 1px solid #DDDDDD;
border-radius: 3px;
border: 1px solid #CCCCCC;
margin: 0 5px;
}
h1 {
margin-top: 0;
font-size: 35px;
line-height: 40px;
}
h2 {
border-bottom: 4px solid #f7f7f7;
padding-top: 10px;
padding-bottom: 2px;
font-size: 145%;
}
h3 {
border-bottom: 2px solid #f7f7f7;
padding-top: 10px;
font-size: 120%;
}
h4 {
border-bottom: 1px solid #f7f7f7;
margin-left: 8px;
font-size: 105%;
}
h5, h6 {
border-bottom: 1px solid #ccc;
font-size: 105%;
}
a {
color: #0033dd;
text-decoration: none;
}
a:hover {
color: #6666ff; }
a:visited {
color: #800080; }
a:visited:hover {
color: #BB00BB; }
a[href^="http:"] {
text-decoration: underline; }
a[href^="https:"] {
text-decoration: underline; }
code > span.kw { color: #555; font-weight: bold; }
code > span.dt { color: #902000; }
code > span.dv { color: #40a070; }
code > span.bn { color: #d14; }
code > span.fl { color: #d14; }
code > span.ch { color: #d14; }
code > span.st { color: #d14; }
code > span.co { color: #888888; font-style: italic; }
code > span.ot { color: #007020; }
code > span.al { color: #ff0000; font-weight: bold; }
code > span.fu { color: #900; font-weight: bold; }
code > span.er { color: #a61717; background-color: #e3d2d2; }
< / style >
< / head >
< body >
< h1 class = "title toc-ignore" > Caching openSenseMap Data for
Reproducibility< / h1 >
< h4 class = "author" > Norwin Roosen< / h4 >
< h4 class = "date" > 2023-03-08< / h4 >
< p > It may be useful to download data from openSenseMap only once. For
reproducible results, the data should be saved to disk, and reloaded at
a later point.< / p >
< p > This avoids..< / p >
< ul >
< li > changed results for queries without date parameters,< / li >
< li > unnecessary wait times,< / li >
< li > risk of API changes / API unavailability,< / li >
< li > stress on the openSenseMap-server.< / li >
< / ul >
< p > This vignette shows how to use this built in
< code > opensensmapr< / code > feature, and how to do it yourself in case you
want to save to other data formats.< / p >
< div class = "sourceCode" id = "cb1" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb1-1" > < a href = "#cb1-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "co" > # this vignette requires:< / span > < / span >
< span id = "cb1-2" > < a href = "#cb1-2" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > library< / span > (opensensmapr)< / span >
< span id = "cb1-3" > < a href = "#cb1-3" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > library< / span > (jsonlite)< / span >
< span id = "cb1-4" > < a href = "#cb1-4" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > library< / span > (readr)< / span > < / code > < / pre > < / div >
< div id = "using-the-opensensmapr-caching-feature" class = "section level2" >
< h2 > Using the opensensmapr Caching Feature< / h2 >
< p > All data retrieval functions of < code > opensensmapr< / code > have a
built in caching feature, which serializes an API response to disk.
Subsequent identical requests will then return the serialized data
instead of making another request.< / p >
< p > To use this feature, just add a path to a directory to the
< code > cache< / code > parameter:< / p >
< div class = "sourceCode" id = "cb2" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb2-1" > < a href = "#cb2-1" aria-hidden = "true" tabindex = "-1" > < / a > b < span class = "ot" > =< / span > < span class = "fu" > osem_boxes< / span > (< span class = "at" > grouptag =< / span > < span class = "st" > ' ifgi' < / span > , < span class = "at" > cache =< / span > < span class = "fu" > tempdir< / span > ())< / span >
< span id = "cb2-2" > < a href = "#cb2-2" aria-hidden = "true" tabindex = "-1" > < / a > < / span >
< span id = "cb2-3" > < a href = "#cb2-3" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "co" > # the next identical request will hit the cache only!< / span > < / span >
< span id = "cb2-4" > < a href = "#cb2-4" aria-hidden = "true" tabindex = "-1" > < / a > b < span class = "ot" > =< / span > < span class = "fu" > osem_boxes< / span > (< span class = "at" > grouptag =< / span > < span class = "st" > ' ifgi' < / span > , < span class = "at" > cache =< / span > < span class = "fu" > tempdir< / span > ())< / span >
< span id = "cb2-5" > < a href = "#cb2-5" aria-hidden = "true" tabindex = "-1" > < / a > < / span >
< span id = "cb2-6" > < a href = "#cb2-6" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "co" > # requests without the cache parameter will still be performed normally< / span > < / span >
< span id = "cb2-7" > < a href = "#cb2-7" aria-hidden = "true" tabindex = "-1" > < / a > b < span class = "ot" > =< / span > < span class = "fu" > osem_boxes< / span > (< span class = "at" > grouptag =< / span > < span class = "st" > ' ifgi' < / span > )< / span > < / code > < / pre > < / div >
< p > Looking at the cache directory we can see one file for each request,
which is identified through a hash of the request URL:< / p >
< div class = "sourceCode" id = "cb3" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb3-1" > < a href = "#cb3-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > list.files< / span > (< span class = "fu" > tempdir< / span > (), < span class = "at" > pattern =< / span > < span class = "st" > ' osemcache< / span > < span class = "sc" > \\< / span > < span class = "st" > ..*< / span > < span class = "sc" > \\< / span > < span class = "st" > .rds' < / span > )< / span > < / code > < / pre > < / div >
< pre > < code > ## [1] " osemcache.17db5c57fc6fca4d836fa2cf30345ce8767cd61a.rds" < / code > < / pre >
< p > You can maintain multiple caches simultaneously which allows to only
store data related to a script in the same directory:< / p >
< div class = "sourceCode" id = "cb5" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb5-1" > < a href = "#cb5-1" aria-hidden = "true" tabindex = "-1" > < / a > cacheDir < span class = "ot" > =< / span > < span class = "fu" > getwd< / span > () < span class = "co" > # current working directory< / span > < / span >
< span id = "cb5-2" > < a href = "#cb5-2" aria-hidden = "true" tabindex = "-1" > < / a > b < span class = "ot" > =< / span > < span class = "fu" > osem_boxes< / span > (< span class = "at" > grouptag =< / span > < span class = "st" > ' ifgi' < / span > , < span class = "at" > cache =< / span > cacheDir)< / span >
< span id = "cb5-3" > < a href = "#cb5-3" aria-hidden = "true" tabindex = "-1" > < / a > < / span >
< span id = "cb5-4" > < a href = "#cb5-4" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "co" > # the next identical request will hit the cache only!< / span > < / span >
< span id = "cb5-5" > < a href = "#cb5-5" aria-hidden = "true" tabindex = "-1" > < / a > b < span class = "ot" > =< / span > < span class = "fu" > osem_boxes< / span > (< span class = "at" > grouptag =< / span > < span class = "st" > ' ifgi' < / span > , < span class = "at" > cache =< / span > cacheDir)< / span > < / code > < / pre > < / div >
< p > To get fresh results again, just call < code > osem_clear_cache()< / code >
for the respective cache:< / p >
< div class = "sourceCode" id = "cb6" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb6-1" > < a href = "#cb6-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > osem_clear_cache< / span > () < span class = "co" > # clears default cache< / span > < / span >
< span id = "cb6-2" > < a href = "#cb6-2" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > osem_clear_cache< / span > (< span class = "fu" > getwd< / span > ()) < span class = "co" > # clears a custom cache< / span > < / span > < / code > < / pre > < / div >
< / div >
< div id = "custom-de--serialization" class = "section level2" >
< h2 > Custom (De-) Serialization< / h2 >
< p > If you want to roll your own serialization method to support custom
data formats, here’ s how:< / p >
< div class = "sourceCode" id = "cb7" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb7-1" > < a href = "#cb7-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "co" > # first get our example data:< / span > < / span >
< span id = "cb7-2" > < a href = "#cb7-2" aria-hidden = "true" tabindex = "-1" > < / a > measurements < span class = "ot" > =< / span > < span class = "fu" > osem_measurements< / span > (< span class = "st" > ' Windgeschwindigkeit' < / span > )< / span > < / code > < / pre > < / div >
< p > If you are paranoid and worry about < code > .rds< / code > files not being
decodable anymore in the (distant) future, you could serialize to a
plain text format such as JSON. This of course comes at the cost of
storage space and performance.< / p >
< div class = "sourceCode" id = "cb8" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb8-1" > < a href = "#cb8-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "co" > # serializing senseBoxes to JSON, and loading from file again:< / span > < / span >
< span id = "cb8-2" > < a href = "#cb8-2" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > write< / span > (jsonlite< span class = "sc" > ::< / span > < span class = "fu" > serializeJSON< / span > (measurements), < span class = "st" > ' measurements.json' < / span > )< / span >
< span id = "cb8-3" > < a href = "#cb8-3" aria-hidden = "true" tabindex = "-1" > < / a > measurements_from_file < span class = "ot" > =< / span > jsonlite< span class = "sc" > ::< / span > < span class = "fu" > unserializeJSON< / span > (readr< span class = "sc" > ::< / span > < span class = "fu" > read_file< / span > (< span class = "st" > ' measurements.json' < / span > ))< / span >
< span id = "cb8-4" > < a href = "#cb8-4" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > class< / span > (measurements_from_file)< / span > < / code > < / pre > < / div >
< p > This method also persists the R object metadata (classes,
attributes). If you were to use a serialization method that can’ t
persist object metadata, you could re-apply it with the following
functions:< / p >
< div class = "sourceCode" id = "cb9" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb9-1" > < a href = "#cb9-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "co" > # note the toJSON call instead of serializeJSON< / span > < / span >
< span id = "cb9-2" > < a href = "#cb9-2" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > write< / span > (jsonlite< span class = "sc" > ::< / span > < span class = "fu" > toJSON< / span > (measurements), < span class = "st" > ' measurements_bad.json' < / span > )< / span >
< span id = "cb9-3" > < a href = "#cb9-3" aria-hidden = "true" tabindex = "-1" > < / a > measurements_without_attrs < span class = "ot" > =< / span > jsonlite< span class = "sc" > ::< / span > < span class = "fu" > fromJSON< / span > (< span class = "st" > ' measurements_bad.json' < / span > )< / span >
< span id = "cb9-4" > < a href = "#cb9-4" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > class< / span > (measurements_without_attrs)< / span >
< span id = "cb9-5" > < a href = "#cb9-5" aria-hidden = "true" tabindex = "-1" > < / a > < / span >
< span id = "cb9-6" > < a href = "#cb9-6" aria-hidden = "true" tabindex = "-1" > < / a > measurements_with_attrs < span class = "ot" > =< / span > < span class = "fu" > osem_as_measurements< / span > (measurements_without_attrs)< / span >
< span id = "cb9-7" > < a href = "#cb9-7" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > class< / span > (measurements_with_attrs)< / span > < / code > < / pre > < / div >
< p > The same goes for boxes via < code > osem_as_sensebox()< / code > .< / p >
< / div >
<!-- code folding -->
<!-- dynamically load mathjax for compatibility with self - contained -->
< script >
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
< / script >
< / body >
< / html >