1
0
Fork 0
mirror of https://github.com/sensebox/opensensmapr synced 2025-02-22 06:23:57 +01:00
opensensmapR/inst/doc/osem-intro.html

870 lines
110 KiB
HTML
Raw Permalink Normal View History

2023-02-23 15:12:46 +01:00
<!DOCTYPE html>
<html>
<head>
<meta charset="utf-8" />
<meta name="generator" content="pandoc" />
<meta http-equiv="X-UA-Compatible" content="IE=EDGE" />
<meta name="viewport" content="width=device-width, initial-scale=1" />
<meta name="author" content="Norwin Roosen" />
2023-03-10 10:32:36 +01:00
<meta name="date" content="2023-03-08" />
2023-02-23 15:12:46 +01:00
<title>Exploring the openSenseMap Dataset</title>
<script>// Pandoc 2.9 adds attributes on both header and div. We remove the former (to
// be compatible with the behavior of Pandoc < 2.8).
document.addEventListener('DOMContentLoaded', function(e) {
var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
var i, h, a;
for (i = 0; i < hs.length; i++) {
h = hs[i];
if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
a = h.attributes;
while (a.length > 0) h.removeAttribute(a[0].name);
}
});
</script>
<style type="text/css">
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
</style>
<style type="text/css">
code {
white-space: pre;
}
.sourceCode {
overflow: visible;
}
</style>
<style type="text/css" data-origin="pandoc">
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; }
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.at { color: #7d9029; }
code span.bn { color: #40a070; }
code span.bu { color: #008000; }
code span.cf { color: #007020; font-weight: bold; }
code span.ch { color: #4070a0; }
code span.cn { color: #880000; }
code span.co { color: #60a0b0; font-style: italic; }
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.do { color: #ba2121; font-style: italic; }
code span.dt { color: #902000; }
code span.dv { color: #40a070; }
code span.er { color: #ff0000; font-weight: bold; }
code span.ex { }
code span.fl { color: #40a070; }
code span.fu { color: #06287e; }
code span.im { color: #008000; font-weight: bold; }
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.kw { color: #007020; font-weight: bold; }
code span.op { color: #666666; }
code span.ot { color: #007020; }
code span.pp { color: #bc7a00; }
code span.sc { color: #4070a0; }
code span.ss { color: #bb6688; }
code span.st { color: #4070a0; }
code span.va { color: #19177c; }
code span.vs { color: #4070a0; }
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; }
</style>
<script>
// apply pandoc div.sourceCode style to pre.sourceCode instead
(function() {
var sheets = document.styleSheets;
for (var i = 0; i < sheets.length; i++) {
if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
try { var rules = sheets[i].cssRules; } catch (e) { continue; }
var j = 0;
while (j < rules.length) {
var rule = rules[j];
// check if there is a div.sourceCode rule
if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") {
j++;
continue;
}
var style = rule.style.cssText;
// check if color or background-color is set
if (rule.style.color === '' && rule.style.backgroundColor === '') {
j++;
continue;
}
// replace div.sourceCode by a pre.sourceCode rule
sheets[i].deleteRule(j);
sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
}
}
})();
</script>
<style type="text/css">body {
background-color: #fff;
margin: 1em auto;
max-width: 700px;
overflow: visible;
padding-left: 2em;
padding-right: 2em;
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 14px;
line-height: 1.35;
}
#TOC {
clear: both;
margin: 0 0 10px 10px;
padding: 4px;
width: 400px;
border: 1px solid #CCCCCC;
border-radius: 5px;
background-color: #f6f6f6;
font-size: 13px;
line-height: 1.3;
}
#TOC .toctitle {
font-weight: bold;
font-size: 15px;
margin-left: 5px;
}
#TOC ul {
padding-left: 40px;
margin-left: -1.5em;
margin-top: 5px;
margin-bottom: 5px;
}
#TOC ul ul {
margin-left: -2em;
}
#TOC li {
line-height: 16px;
}
table {
margin: 1em auto;
border-width: 1px;
border-color: #DDDDDD;
border-style: outset;
border-collapse: collapse;
}
table th {
border-width: 2px;
padding: 5px;
border-style: inset;
}
table td {
border-width: 1px;
border-style: inset;
line-height: 18px;
padding: 5px 5px;
}
table, table th, table td {
border-left-style: none;
border-right-style: none;
}
table thead, table tr.even {
background-color: #f7f7f7;
}
p {
margin: 0.5em 0;
}
blockquote {
background-color: #f6f6f6;
padding: 0.25em 0.75em;
}
hr {
border-style: solid;
border: none;
border-top: 1px solid #777;
margin: 28px 0;
}
dl {
margin-left: 0;
}
dl dd {
margin-bottom: 13px;
margin-left: 13px;
}
dl dt {
font-weight: bold;
}
ul {
margin-top: 0;
}
ul li {
list-style: circle outside;
}
ul ul {
margin-bottom: 0;
}
pre, code {
background-color: #f7f7f7;
border-radius: 3px;
color: #333;
white-space: pre-wrap;
}
pre {
border-radius: 3px;
margin: 5px 0px 10px 0px;
padding: 10px;
}
pre:not([class]) {
background-color: #f7f7f7;
}
code {
font-family: Consolas, Monaco, 'Courier New', monospace;
font-size: 85%;
}
p > code, li > code {
padding: 2px 0px;
}
div.figure {
text-align: center;
}
img {
background-color: #FFFFFF;
padding: 2px;
border: 1px solid #DDDDDD;
border-radius: 3px;
border: 1px solid #CCCCCC;
margin: 0 5px;
}
h1 {
margin-top: 0;
font-size: 35px;
line-height: 40px;
}
h2 {
border-bottom: 4px solid #f7f7f7;
padding-top: 10px;
padding-bottom: 2px;
font-size: 145%;
}
h3 {
border-bottom: 2px solid #f7f7f7;
padding-top: 10px;
font-size: 120%;
}
h4 {
border-bottom: 1px solid #f7f7f7;
margin-left: 8px;
font-size: 105%;
}
h5, h6 {
border-bottom: 1px solid #ccc;
font-size: 105%;
}
a {
color: #0033dd;
text-decoration: none;
}
a:hover {
color: #6666ff; }
a:visited {
color: #800080; }
a:visited:hover {
color: #BB00BB; }
a[href^="http:"] {
text-decoration: underline; }
a[href^="https:"] {
text-decoration: underline; }
code > span.kw { color: #555; font-weight: bold; }
code > span.dt { color: #902000; }
code > span.dv { color: #40a070; }
code > span.bn { color: #d14; }
code > span.fl { color: #d14; }
code > span.ch { color: #d14; }
code > span.st { color: #d14; }
code > span.co { color: #888888; font-style: italic; }
code > span.ot { color: #007020; }
code > span.al { color: #ff0000; font-weight: bold; }
code > span.fu { color: #900; font-weight: bold; }
code > span.er { color: #a61717; background-color: #e3d2d2; }
</style>
</head>
<body>
<h1 class="title toc-ignore">Exploring the openSenseMap Dataset</h1>
<h4 class="author">Norwin Roosen</h4>
2023-03-10 10:32:36 +01:00
<h4 class="date">2023-03-08</h4>
2023-02-23 15:12:46 +01:00
<p>This package provides data ingestion functions for almost any data
stored on the open data platform for environmental sensordata <a href="https://opensensemap.org" class="uri">https://opensensemap.org</a>. Its main goals are to provide
means for:</p>
<ul>
<li>big data analysis of the measurements stored on the platform</li>
<li>sensor metadata analysis (sensor counts, spatial distribution,
temporal trends)</li>
</ul>
<div id="exploring-the-dataset" class="section level3">
<h3>Exploring the dataset</h3>
<p>Before we look at actual observations, lets get a grasp of the
openSenseMap datasets structure.</p>
<div class="sourceCode" id="cb1"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb1-1"><a href="#cb1-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(magrittr)</span>
<span id="cb1-2"><a href="#cb1-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(opensensmapr)</span>
<span id="cb1-3"><a href="#cb1-3" aria-hidden="true" tabindex="-1"></a></span>
2023-03-10 10:32:36 +01:00
<span id="cb1-4"><a href="#cb1-4" aria-hidden="true" tabindex="-1"></a><span class="co"># all_sensors = osem_boxes(cache = &#39;.&#39;)</span></span>
<span id="cb1-5"><a href="#cb1-5" aria-hidden="true" tabindex="-1"></a>all_sensors <span class="ot">=</span> <span class="fu">readRDS</span>(<span class="st">&#39;boxes_precomputed.rds&#39;</span>) <span class="co"># read precomputed file to save resources </span></span></code></pre></div>
2023-02-23 15:12:46 +01:00
<div class="sourceCode" id="cb2"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb2-1"><a href="#cb2-1" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(all_sensors)</span></code></pre></div>
2023-03-10 10:32:36 +01:00
<pre><code>## boxes total: 11390
2023-02-23 15:12:46 +01:00
##
## boxes by exposure:
## indoor mobile outdoor unknown
2023-03-10 10:32:36 +01:00
## 2364 590 8417 19
2023-02-23 15:12:46 +01:00
##
## boxes by model:
## custom hackair_home_v2 homeEthernet
2023-03-10 10:32:36 +01:00
## 2800 73 73
2023-02-23 15:12:46 +01:00
## homeEthernetFeinstaub homeV2Ethernet homeV2EthernetFeinstaub
## 55 21 40
## homeV2Lora homeV2Wifi homeV2WifiFeinstaub
2023-03-10 10:32:36 +01:00
## 240 577 743
2023-02-23 15:12:46 +01:00
## homeWifi homeWifiFeinstaub luftdaten_pms1003
## 215 222 9
## luftdaten_pms1003_bme280 luftdaten_pms3003 luftdaten_pms3003_bme280
## 10 1 7
## luftdaten_pms5003 luftdaten_pms5003_bme280 luftdaten_pms7003
## 7 60 6
## luftdaten_pms7003_bme280 luftdaten_sds011 luftdaten_sds011_bme280
2023-03-10 10:32:36 +01:00
## 78 286 3066
2023-02-23 15:12:46 +01:00
## luftdaten_sds011_bmp180 luftdaten_sds011_dht11 luftdaten_sds011_dht22
2023-03-10 10:32:36 +01:00
## 114 135 2552
2023-02-23 15:12:46 +01:00
##
## $last_measurement_within
## 1h 1d 30d 365d never
2023-03-10 10:32:36 +01:00
## 0 0 4151 5909 2062
2023-02-23 15:12:46 +01:00
##
## oldest box: 2016-08-09 19:34:42 (OBS Bohmte UK_02)
2023-03-10 10:32:36 +01:00
## newest box: 2023-02-28 09:47:17 (bitburg)
2023-02-23 15:12:46 +01:00
##
## sensors per box:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
2023-03-10 10:32:36 +01:00
## 1.000 4.000 5.000 4.994 5.000 76.000</code></pre>
2023-02-23 15:12:46 +01:00
<p>This gives a good overview already: As of writing this, there are
more than 700 sensor stations, of which ~50% are currently running. Most
of them are placed outdoors and have around 5 sensors each. The oldest
station is from May 2014, while the latest station was registered a
couple of minutes ago.</p>
<p>Another feature of interest is the spatial distribution of the boxes:
<code>plot()</code> can help us out here. This function requires a bunch
of optional dependencies though.</p>
2023-03-10 10:32:36 +01:00
<div class="sourceCode" id="cb4"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb4-1"><a href="#cb4-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(all_sensors)</span></code></pre></div>
<p><img src="
2023-02-23 15:12:46 +01:00
<p>It seems we have to reduce our area of interest to Germany.</p>
<p>But what do these sensor stations actually measure? Lets find out.
<code>osem_phenomena()</code> gives us a named list of of the counts of
each observed phenomenon for the given set of sensor stations:</p>
<div class="sourceCode" id="cb5"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb5-1"><a href="#cb5-1" aria-hidden="true" tabindex="-1"></a>phenoms <span class="ot">=</span> <span class="fu">osem_phenomena</span>(all_sensors)</span>
<span id="cb5-2"><a href="#cb5-2" aria-hidden="true" tabindex="-1"></a><span class="fu">str</span>(phenoms)</span></code></pre></div>
2023-03-10 10:32:36 +01:00
<pre><code>## List of 3298
## $ Temperatur : int 9405
## $ rel. Luftfeuchte : int 8315
## $ PM10 : int 8148
## $ PM2.5 : int 8136
## $ Luftdruck : int 5668
## $ Beleuchtungsstärke : int 1670
## $ UV-Intensität : int 1660
## $ Temperature : int 644
2023-02-23 15:12:46 +01:00
## $ Humidity : int 473
2023-03-10 10:32:36 +01:00
## $ VOC : int 423
## $ Luftfeuchte : int 363
## $ Lufttemperatur : int 357
## $ CO₂ : int 305
2023-02-23 15:12:46 +01:00
## $ Pressure : int 293
2023-03-10 10:32:36 +01:00
## $ Bodenfeuchte : int 283
2023-02-23 15:12:46 +01:00
## $ Luftfeuchtigkeit : int 272
2023-03-10 10:32:36 +01:00
## $ atm. Luftdruck : int 246
2023-02-23 15:12:46 +01:00
## $ Lautstärke : int 240
## $ PM01 : int 206
## $ IAQ : int 162
## $ Kalibrierungswert : int 156
## $ rel. Luftfeuchte SCD30 : int 156
2023-03-10 10:32:36 +01:00
## $ Bodentemperatur : int 154
2023-02-23 15:12:46 +01:00
## $ Temperatur SCD30 : int 154
## $ CO2eq : int 153
## $ Windgeschwindigkeit : int 152
2023-03-10 10:32:36 +01:00
## $ pH-Wert : int 143
## $ Gesamthärte : int 142
## $ Blei : int 140
## $ Eisen : int 140
## $ Gesamthärte 2 : int 140
## $ Kupfer C : int 140
## $ Kupfer D : int 140
## $ Kupfer1 : int 140
## $ Kupfer2 : int 140
## $ Nitrat : int 140
## $ Nitrit : int 140
2023-02-23 15:12:46 +01:00
## $ GesamthaerteLabor : int 120
2023-03-10 10:32:36 +01:00
## $ CO2 : int 113
2023-02-23 15:12:46 +01:00
## $ Feinstaub PM10 : int 98
## $ Windrichtung : int 82
2023-03-10 10:32:36 +01:00
## $ rel. Luftfeuchte (HECA) : int 75
## $ Temperatur (HECA) : int 73
2023-02-23 15:12:46 +01:00
## $ Temperatura : int 69
## $ Helligkeit : int 67
## $ Feinstaub PM2.5 : int 65
## $ Taupunkt : int 62
## $ Latitude : int 61
## $ Longtitude : int 58
## $ Durchschnitt Umgebungslautstärke : int 51
## $ Minimum Umgebungslautstärke : int 51
## $ UV-Index : int 49
2023-03-10 10:32:36 +01:00
## $ Batterie : int 46
2023-02-23 15:12:46 +01:00
## $ temperature : int 46
## $ Feinstaub PM1.0 : int 41
## $ Umgebungslautstärke : int 41
## $ UV : int 40
## $ humidity : int 38
## $ Abstand nach links : int 34
## $ Beschleunigung Z-Achse : int 34
## $ Beschleunigung X-Achse : int 33
## $ Beschleunigung Y-Achse : int 33
## $ Geschwindigkeit : int 33
## $ Niederschlag : int 33
## $ Feinstaub PM25 : int 32
## $ PM1 : int 32
## $ Abstand nach rechts : int 31
## $ PM1.0 : int 30
## $ rel. Luftfeuchtigkeit : int 30
## $ Relative Humidity : int 29
## $ Sonnenstrahlung : int 29
## $ Luftdruck relativ : int 28
## $ Luftdruck absolut : int 26
## $ Rain : int 26
## $ Regenrate : int 26
## $ CO2 Konzentration : int 25
## $ RSSI : int 22
## $ gefühlte Temperatur : int 22
## $ PM 2.5 : int 21
## $ Battery : int 20
## $ Ciśnienie : int 20
2023-03-10 10:32:36 +01:00
## $ EisenLabor : int 20
2023-02-23 15:12:46 +01:00
## $ Air Pressure : int 19
## $ Regen : int 19
## $ Schall : int 19
## $ Signal : int 19
## $ Ilmanpaine : int 18
## $ Lämpötila : int 18
## $ UV Index : int 18
## $ Wind speed : int 18
## $ PM 10 : int 17
## $ PM4 : int 17
## $ Air pressure : int 16
## $ Temperatur DHT22 : int 16
## $ Wind Direction : int 16
## $ Altitude : int 15
## $ Illuminance : int 15
## $ Speed : int 15
## $ Wind Speed : int 15
## [list output truncated]</code></pre>
<p>Thats quite some noise there, with many phenomena being measured by a
single sensor only, or many duplicated phenomena due to slightly
different spellings. We should clean that up, but for now lets just
filter out the noise and find those phenomena with high sensor
numbers:</p>
<div class="sourceCode" id="cb7"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb7-1"><a href="#cb7-1" aria-hidden="true" tabindex="-1"></a>phenoms[phenoms <span class="sc">&gt;</span> <span class="dv">20</span>]</span></code></pre></div>
<pre><code>## $Temperatur
2023-03-10 10:32:36 +01:00
## [1] 9405
2023-02-23 15:12:46 +01:00
##
## $`rel. Luftfeuchte`
2023-03-10 10:32:36 +01:00
## [1] 8315
2023-02-23 15:12:46 +01:00
##
## $PM10
2023-03-10 10:32:36 +01:00
## [1] 8148
2023-02-23 15:12:46 +01:00
##
## $PM2.5
2023-03-10 10:32:36 +01:00
## [1] 8136
2023-02-23 15:12:46 +01:00
##
## $Luftdruck
2023-03-10 10:32:36 +01:00
## [1] 5668
2023-02-23 15:12:46 +01:00
##
## $Beleuchtungsstärke
2023-03-10 10:32:36 +01:00
## [1] 1670
2023-02-23 15:12:46 +01:00
##
## $`UV-Intensität`
2023-03-10 10:32:36 +01:00
## [1] 1660
2023-02-23 15:12:46 +01:00
##
## $Temperature
2023-03-10 10:32:36 +01:00
## [1] 644
2023-02-23 15:12:46 +01:00
##
## $Humidity
## [1] 473
##
## $VOC
2023-03-10 10:32:36 +01:00
## [1] 423
2023-02-23 15:12:46 +01:00
##
## $Luftfeuchte
2023-03-10 10:32:36 +01:00
## [1] 363
2023-02-23 15:12:46 +01:00
##
## $Lufttemperatur
2023-03-10 10:32:36 +01:00
## [1] 357
2023-02-23 15:12:46 +01:00
##
## $`CO₂`
2023-03-10 10:32:36 +01:00
## [1] 305
2023-02-23 15:12:46 +01:00
##
## $Pressure
## [1] 293
##
## $Bodenfeuchte
2023-03-10 10:32:36 +01:00
## [1] 283
2023-02-23 15:12:46 +01:00
##
## $Luftfeuchtigkeit
## [1] 272
##
## $`atm. Luftdruck`
2023-03-10 10:32:36 +01:00
## [1] 246
2023-02-23 15:12:46 +01:00
##
## $Lautstärke
## [1] 240
##
## $PM01
## [1] 206
##
## $IAQ
## [1] 162
##
## $Kalibrierungswert
## [1] 156
##
## $`rel. Luftfeuchte SCD30`
## [1] 156
##
## $Bodentemperatur
2023-03-10 10:32:36 +01:00
## [1] 154
2023-02-23 15:12:46 +01:00
##
## $`Temperatur SCD30`
## [1] 154
##
## $CO2eq
## [1] 153
##
## $Windgeschwindigkeit
## [1] 152
##
## $`pH-Wert`
2023-03-10 10:32:36 +01:00
## [1] 143
2023-02-23 15:12:46 +01:00
##
## $Gesamthärte
2023-03-10 10:32:36 +01:00
## [1] 142
2023-02-23 15:12:46 +01:00
##
## $Blei
2023-03-10 10:32:36 +01:00
## [1] 140
2023-02-23 15:12:46 +01:00
##
## $Eisen
2023-03-10 10:32:36 +01:00
## [1] 140
2023-02-23 15:12:46 +01:00
##
## $`Gesamthärte 2`
2023-03-10 10:32:36 +01:00
## [1] 140
2023-02-23 15:12:46 +01:00
##
## $`Kupfer C`
2023-03-10 10:32:36 +01:00
## [1] 140
2023-02-23 15:12:46 +01:00
##
## $`Kupfer D`
2023-03-10 10:32:36 +01:00
## [1] 140
2023-02-23 15:12:46 +01:00
##
## $Kupfer1
2023-03-10 10:32:36 +01:00
## [1] 140
2023-02-23 15:12:46 +01:00
##
## $Kupfer2
2023-03-10 10:32:36 +01:00
## [1] 140
2023-02-23 15:12:46 +01:00
##
## $Nitrat
2023-03-10 10:32:36 +01:00
## [1] 140
2023-02-23 15:12:46 +01:00
##
## $Nitrit
2023-03-10 10:32:36 +01:00
## [1] 140
##
## $GesamthaerteLabor
2023-02-23 15:12:46 +01:00
## [1] 120
##
## $CO2
2023-03-10 10:32:36 +01:00
## [1] 113
2023-02-23 15:12:46 +01:00
##
## $`Feinstaub PM10`
## [1] 98
##
## $Windrichtung
## [1] 82
##
## $`rel. Luftfeuchte (HECA)`
2023-03-10 10:32:36 +01:00
## [1] 75
2023-02-23 15:12:46 +01:00
##
## $`Temperatur (HECA)`
2023-03-10 10:32:36 +01:00
## [1] 73
2023-02-23 15:12:46 +01:00
##
## $Temperatura
## [1] 69
##
## $Helligkeit
## [1] 67
##
## $`Feinstaub PM2.5`
## [1] 65
##
## $Taupunkt
## [1] 62
##
## $Latitude
## [1] 61
##
## $Longtitude
## [1] 58
##
## $`Durchschnitt Umgebungslautstärke`
## [1] 51
##
## $`Minimum Umgebungslautstärke`
## [1] 51
##
## $`UV-Index`
## [1] 49
##
2023-03-10 10:32:36 +01:00
## $Batterie
2023-02-23 15:12:46 +01:00
## [1] 46
##
2023-03-10 10:32:36 +01:00
## $temperature
## [1] 46
2023-02-23 15:12:46 +01:00
##
## $`Feinstaub PM1.0`
## [1] 41
##
## $Umgebungslautstärke
## [1] 41
##
## $UV
## [1] 40
##
## $humidity
## [1] 38
##
## $`Abstand nach links`
## [1] 34
##
## $`Beschleunigung Z-Achse`
## [1] 34
##
## $`Beschleunigung X-Achse`
## [1] 33
##
## $`Beschleunigung Y-Achse`
## [1] 33
##
## $Geschwindigkeit
## [1] 33
##
## $Niederschlag
## [1] 33
##
## $`Feinstaub PM25`
## [1] 32
##
## $PM1
## [1] 32
##
## $`Abstand nach rechts`
## [1] 31
##
## $PM1.0
## [1] 30
##
## $`rel. Luftfeuchtigkeit`
## [1] 30
##
## $`Relative Humidity`
## [1] 29
##
## $Sonnenstrahlung
## [1] 29
##
## $`Luftdruck relativ`
## [1] 28
##
## $`Luftdruck absolut`
## [1] 26
##
## $Rain
## [1] 26
##
## $Regenrate
## [1] 26
##
## $`CO2 Konzentration`
## [1] 25
##
## $RSSI
## [1] 22
##
## $`gefühlte Temperatur`
## [1] 22
##
## $`PM 2.5`
## [1] 21</code></pre>
<p>Alright, temperature it is! Fine particulate matter (PM2.5) seems to
be more interesting to analyze though. We should check how many sensor
stations provide useful data: We want only those boxes with a PM2.5
sensor, that are placed outdoors and are currently submitting
measurements:</p>
<div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a>pm25_sensors <span class="ot">=</span> <span class="fu">osem_boxes</span>(</span>
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a> <span class="at">exposure =</span> <span class="st">&#39;outdoor&#39;</span>,</span>
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a> <span class="at">date =</span> <span class="fu">Sys.time</span>(), <span class="co"># ±4 hours</span></span>
<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a> <span class="at">phenomenon =</span> <span class="st">&#39;PM2.5&#39;</span></span>
<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div>
2023-03-10 10:32:36 +01:00
<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a>pm25_sensors <span class="ot">=</span> <span class="fu">readRDS</span>(<span class="st">&#39;pm25_sensors.rds&#39;</span>) <span class="co"># read precomputed file to save resources </span></span>
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a><span class="fu">summary</span>(pm25_sensors)</span></code></pre></div>
<pre><code>## boxes total: 3011
2023-02-23 15:12:46 +01:00
##
## boxes by exposure:
## outdoor
2023-03-10 10:32:36 +01:00
## 3011
2023-02-23 15:12:46 +01:00
##
## boxes by model:
## custom hackair_home_v2 homeEthernetFeinstaub
2023-03-10 10:32:36 +01:00
## 175 8 12
2023-02-23 15:12:46 +01:00
## homeV2EthernetFeinstaub homeV2Lora homeV2Wifi
2023-03-10 10:32:36 +01:00
## 9 22 2
2023-02-23 15:12:46 +01:00
## homeV2WifiFeinstaub homeWifi homeWifiFeinstaub
2023-03-10 10:32:36 +01:00
## 132 3 32
2023-02-23 15:12:46 +01:00
## luftdaten_pms1003 luftdaten_pms1003_bme280 luftdaten_pms5003
2023-03-10 10:32:36 +01:00
## 1 3 3
2023-02-23 15:12:46 +01:00
## luftdaten_pms5003_bme280 luftdaten_pms7003 luftdaten_pms7003_bme280
2023-03-10 10:32:36 +01:00
## 10 2 28
2023-02-23 15:12:46 +01:00
## luftdaten_sds011 luftdaten_sds011_bme280 luftdaten_sds011_bmp180
2023-03-10 10:32:36 +01:00
## 117 1365 60
2023-02-23 15:12:46 +01:00
## luftdaten_sds011_dht11 luftdaten_sds011_dht22
2023-03-10 10:32:36 +01:00
## 44 983
2023-02-23 15:12:46 +01:00
##
## $last_measurement_within
## 1h 1d 30d 365d never
2023-03-10 10:32:36 +01:00
## 0 0 3011 3011 0
2023-02-23 15:12:46 +01:00
##
## oldest box: 2017-03-03 18:20:43 (Witten Heven Dorf)
2023-03-10 10:32:36 +01:00
## newest box: 2023-02-28 08:28:27 (eth0)
2023-02-23 15:12:46 +01:00
##
## sensors per box:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
2023-03-10 10:32:36 +01:00
## 2.000 4.000 5.000 4.854 5.000 26.000</code></pre>
2023-02-23 15:12:46 +01:00
<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(pm25_sensors)</span></code></pre></div>
2023-03-10 10:32:36 +01:00
<p><img src="
2023-02-23 15:12:46 +01:00
<p>Thats still more than 200 measuring stations, we can work with
that.</p>
</div>
<div id="analyzing-sensor-data" class="section level3">
<h3>Analyzing sensor data</h3>
<p>Having analyzed the available data sources, lets finally get some
measurements. We could call <code>osem_measurements(pm25_sensors)</code>
now, however we are focusing on a restricted area of interest, the city
of Berlin. Luckily we can get the measurements filtered by a bounding
box:</p>
2023-03-10 10:32:36 +01:00
<div class="sourceCode" id="cb13"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb13-1"><a href="#cb13-1" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(sf)</span>
<span id="cb13-2"><a href="#cb13-2" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(units)</span>
<span id="cb13-3"><a href="#cb13-3" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(lubridate)</span>
<span id="cb13-4"><a href="#cb13-4" aria-hidden="true" tabindex="-1"></a><span class="fu">library</span>(dplyr)</span></code></pre></div>
<p>Since the API takes quite long to response measurements, especially
filtered on space and time, we do not run the following chunks for
publication of the package on CRAN.</p>
<div class="sourceCode" id="cb14"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb14-1"><a href="#cb14-1" aria-hidden="true" tabindex="-1"></a><span class="co"># construct a bounding box: 12 kilometers around Berlin</span></span>
<span id="cb14-2"><a href="#cb14-2" aria-hidden="true" tabindex="-1"></a>berlin <span class="ot">=</span> <span class="fu">st_point</span>(<span class="fu">c</span>(<span class="fl">13.4034</span>, <span class="fl">52.5120</span>)) <span class="sc">%&gt;%</span></span>
<span id="cb14-3"><a href="#cb14-3" aria-hidden="true" tabindex="-1"></a> <span class="fu">st_sfc</span>(<span class="at">crs =</span> <span class="dv">4326</span>) <span class="sc">%&gt;%</span></span>
<span id="cb14-4"><a href="#cb14-4" aria-hidden="true" tabindex="-1"></a> <span class="fu">st_transform</span>(<span class="dv">3857</span>) <span class="sc">%&gt;%</span> <span class="co"># allow setting a buffer in meters</span></span>
<span id="cb14-5"><a href="#cb14-5" aria-hidden="true" tabindex="-1"></a> <span class="fu">st_buffer</span>(<span class="fu">set_units</span>(<span class="dv">12</span>, km)) <span class="sc">%&gt;%</span></span>
<span id="cb14-6"><a href="#cb14-6" aria-hidden="true" tabindex="-1"></a> <span class="fu">st_transform</span>(<span class="dv">4326</span>) <span class="sc">%&gt;%</span> <span class="co"># the opensensemap expects WGS 84</span></span>
<span id="cb14-7"><a href="#cb14-7" aria-hidden="true" tabindex="-1"></a> <span class="fu">st_bbox</span>()</span>
<span id="cb14-8"><a href="#cb14-8" aria-hidden="true" tabindex="-1"></a>pm25 <span class="ot">=</span> <span class="fu">osem_measurements</span>(</span>
<span id="cb14-9"><a href="#cb14-9" aria-hidden="true" tabindex="-1"></a> berlin,</span>
<span id="cb14-10"><a href="#cb14-10" aria-hidden="true" tabindex="-1"></a> <span class="at">phenomenon =</span> <span class="st">&#39;PM2.5&#39;</span>,</span>
<span id="cb14-11"><a href="#cb14-11" aria-hidden="true" tabindex="-1"></a> <span class="at">from =</span> <span class="fu">now</span>() <span class="sc">-</span> <span class="fu">days</span>(<span class="dv">3</span>), <span class="co"># defaults to 2 days</span></span>
<span id="cb14-12"><a href="#cb14-12" aria-hidden="true" tabindex="-1"></a> <span class="at">to =</span> <span class="fu">now</span>()</span>
<span id="cb14-13"><a href="#cb14-13" aria-hidden="true" tabindex="-1"></a>)</span></code></pre></div>
<div class="sourceCode" id="cb15"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb15-1"><a href="#cb15-1" aria-hidden="true" tabindex="-1"></a>pm25 <span class="ot">=</span> <span class="fu">readRDS</span>(<span class="st">&#39;pm25_berlin.rds&#39;</span>) <span class="co"># read precomputed file to save resources </span></span>
<span id="cb15-2"><a href="#cb15-2" aria-hidden="true" tabindex="-1"></a><span class="fu">plot</span>(pm25)</span></code></pre></div>
<p><img src="
2023-02-23 15:12:46 +01:00
<p>Now we can get started with actual spatiotemporal data analysis.
First, lets mask the seemingly uncalibrated sensors:</p>
2023-03-10 10:32:36 +01:00
<div class="sourceCode" id="cb16"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb16-1"><a href="#cb16-1" aria-hidden="true" tabindex="-1"></a>outliers <span class="ot">=</span> <span class="fu">filter</span>(pm25, value <span class="sc">&gt;</span> <span class="dv">100</span>)<span class="sc">$</span>sensorId</span>
<span id="cb16-2"><a href="#cb16-2" aria-hidden="true" tabindex="-1"></a>bad_sensors <span class="ot">=</span> outliers[, drop <span class="ot">=</span> <span class="cn">TRUE</span>] <span class="sc">%&gt;%</span> <span class="fu">levels</span>()</span>
<span id="cb16-3"><a href="#cb16-3" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb16-4"><a href="#cb16-4" aria-hidden="true" tabindex="-1"></a>pm25 <span class="ot">=</span> <span class="fu">mutate</span>(pm25, <span class="at">invalid =</span> sensorId <span class="sc">%in%</span> bad_sensors)</span></code></pre></div>
2023-02-23 15:12:46 +01:00
<p>Then plot the measuring locations, flagging the outliers:</p>
2023-03-10 10:32:36 +01:00
<div class="sourceCode" id="cb17"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb17-1"><a href="#cb17-1" aria-hidden="true" tabindex="-1"></a><span class="fu">st_as_sf</span>(pm25) <span class="sc">%&gt;%</span> <span class="fu">st_geometry</span>() <span class="sc">%&gt;%</span> <span class="fu">plot</span>(<span class="at">col =</span> <span class="fu">factor</span>(pm25<span class="sc">$</span>invalid), <span class="at">axes =</span> <span class="cn">TRUE</span>)</span></code></pre></div>
<p><img src="
2023-02-23 15:12:46 +01:00
<p>Removing these sensors yields a nicer time series plot:</p>
2023-03-10 10:32:36 +01:00
<div class="sourceCode" id="cb18"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb18-1"><a href="#cb18-1" aria-hidden="true" tabindex="-1"></a>pm25 <span class="sc">%&gt;%</span> <span class="fu">filter</span>(invalid <span class="sc">==</span> <span class="cn">FALSE</span>) <span class="sc">%&gt;%</span> <span class="fu">plot</span>()</span></code></pre></div>
<p><img src="
2023-02-23 15:12:46 +01:00
<p>Further analysis: comparison with LANUV data <code>TODO</code></p>
</div>
<!-- code folding -->
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>