<!DOCTYPE html>
< html >
< head >
< meta charset = "utf-8" / >
< meta name = "generator" content = "pandoc" / >
< meta http-equiv = "X-UA-Compatible" content = "IE=EDGE" / >
< meta name = "viewport" content = "width=device-width, initial-scale=1" / >
< meta name = "author" content = "Norwin Roosen" / >
< meta name = "date" content = "2023-03-08" / >
< title > Exploring the openSenseMap Dataset< / title >
< script > / / P a n d o c 2 . 9 a d d s a t t r i b u t e s o n b o t h h e a d e r a n d d i v . W e r e m o v e t h e f o r m e r ( t o
// be compatible with the behavior of Pandoc < 2.8 ) .
document.addEventListener('DOMContentLoaded', function(e) {
var hs = document.querySelectorAll("div.section[class*='level'] > :first-child");
var i, h, a;
for (i = 0; i < hs.length ; i + + ) {
h = hs[i];
if (!/^h[1-6]$/i.test(h.tagName)) continue; // it should be a header h1-h6
a = h.attributes;
while (a.length > 0) h.removeAttribute(a[0].name);
}
});
< / script >
< style type = "text/css" >
code{white-space: pre-wrap;}
span.smallcaps{font-variant: small-caps;}
span.underline{text-decoration: underline;}
div.column{display: inline-block; vertical-align: top; width: 50%;}
div.hanging-indent{margin-left: 1.5em; text-indent: -1.5em;}
ul.task-list{list-style: none;}
< / style >
< style type = "text/css" >
code {
white-space: pre;
}
.sourceCode {
overflow: visible;
}
< / style >
< style type = "text/css" data-origin = "pandoc" >
pre > code.sourceCode { white-space: pre; position: relative; }
pre > code.sourceCode > span { display: inline-block; line-height: 1.25; }
pre > code.sourceCode > span:empty { height: 1.2em; }
.sourceCode { overflow: visible; }
code.sourceCode > span { color: inherit; text-decoration: inherit; }
div.sourceCode { margin: 1em 0; }
pre.sourceCode { margin: 0; }
@media screen {
div.sourceCode { overflow: auto; }
}
@media print {
pre > code.sourceCode { white-space: pre-wrap; }
pre > code.sourceCode > span { text-indent: -5em; padding-left: 5em; }
}
pre.numberSource code
{ counter-reset: source-line 0; }
pre.numberSource code > span
{ position: relative; left: -4em; counter-increment: source-line; }
pre.numberSource code > span > a:first-child::before
{ content: counter(source-line);
position: relative; left: -1em; text-align: right; vertical-align: baseline;
border: none; display: inline-block;
-webkit-touch-callout: none; -webkit-user-select: none;
-khtml-user-select: none; -moz-user-select: none;
-ms-user-select: none; user-select: none;
padding: 0 4px; width: 4em;
color: #aaaaaa;
}
pre.numberSource { margin-left: 3em; border-left: 1px solid #aaaaaa; padding-left: 4px; }
div.sourceCode
{ }
@media screen {
pre > code.sourceCode > span > a:first-child::before { text-decoration: underline; }
}
code span.al { color: #ff0000; font-weight: bold; }
code span.an { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.at { color: #7d9029; }
code span.bn { color: #40a070; }
code span.bu { color: #008000; }
code span.cf { color: #007020; font-weight: bold; }
code span.ch { color: #4070a0; }
code span.cn { color: #880000; }
code span.co { color: #60a0b0; font-style: italic; }
code span.cv { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.do { color: #ba2121; font-style: italic; }
code span.dt { color: #902000; }
code span.dv { color: #40a070; }
code span.er { color: #ff0000; font-weight: bold; }
code span.ex { }
code span.fl { color: #40a070; }
code span.fu { color: #06287e; }
code span.im { color: #008000; font-weight: bold; }
code span.in { color: #60a0b0; font-weight: bold; font-style: italic; }
code span.kw { color: #007020; font-weight: bold; }
code span.op { color: #666666; }
code span.ot { color: #007020; }
code span.pp { color: #bc7a00; }
code span.sc { color: #4070a0; }
code span.ss { color: #bb6688; }
code span.st { color: #4070a0; }
code span.va { color: #19177c; }
code span.vs { color: #4070a0; }
code span.wa { color: #60a0b0; font-weight: bold; font-style: italic; }
< / style >
< script >
// apply pandoc div.sourceCode style to pre.sourceCode instead
(function() {
var sheets = document.styleSheets;
for (var i = 0; i < sheets.length ; i + + ) {
if (sheets[i].ownerNode.dataset["origin"] !== "pandoc") continue;
try { var rules = sheets[i].cssRules; } catch (e) { continue; }
var j = 0;
while (j < rules.length ) {
var rule = rules[j];
// check if there is a div.sourceCode rule
if (rule.type !== rule.STYLE_RULE || rule.selectorText !== "div.sourceCode") {
j++;
continue;
}
var style = rule.style.cssText;
// check if color or background-color is set
if (rule.style.color === '' & & rule.style.backgroundColor === '') {
j++;
continue;
}
// replace div.sourceCode by a pre.sourceCode rule
sheets[i].deleteRule(j);
sheets[i].insertRule('pre.sourceCode{' + style + '}', j);
}
}
})();
< / script >
< style type = "text/css" > b o d y {
background-color: #fff;
margin: 1em auto;
max-width: 700px;
overflow: visible;
padding-left: 2em;
padding-right: 2em;
font-family: "Open Sans", "Helvetica Neue", Helvetica, Arial, sans-serif;
font-size: 14px;
line-height: 1.35;
}
#TOC {
clear: both;
margin: 0 0 10px 10px;
padding: 4px;
width: 400px;
border: 1px solid #CCCCCC;
border-radius: 5px;
background-color: #f6f6f6;
font-size: 13px;
line-height: 1.3;
}
#TOC .toctitle {
font-weight: bold;
font-size: 15px;
margin-left: 5px;
}
#TOC ul {
padding-left: 40px;
margin-left: -1.5em;
margin-top: 5px;
margin-bottom: 5px;
}
#TOC ul ul {
margin-left: -2em;
}
#TOC li {
line-height: 16px;
}
table {
margin: 1em auto;
border-width: 1px;
border-color: #DDDDDD;
border-style: outset;
border-collapse: collapse;
}
table th {
border-width: 2px;
padding: 5px;
border-style: inset;
}
table td {
border-width: 1px;
border-style: inset;
line-height: 18px;
padding: 5px 5px;
}
table, table th, table td {
border-left-style: none;
border-right-style: none;
}
table thead, table tr.even {
background-color: #f7f7f7;
}
p {
margin: 0.5em 0;
}
blockquote {
background-color: #f6f6f6;
padding: 0.25em 0.75em;
}
hr {
border-style: solid;
border: none;
border-top: 1px solid #777;
margin: 28px 0;
}
dl {
margin-left: 0;
}
dl dd {
margin-bottom: 13px;
margin-left: 13px;
}
dl dt {
font-weight: bold;
}
ul {
margin-top: 0;
}
ul li {
list-style: circle outside;
}
ul ul {
margin-bottom: 0;
}
pre, code {
background-color: #f7f7f7;
border-radius: 3px;
color: #333;
white-space: pre-wrap;
}
pre {
border-radius: 3px;
margin: 5px 0px 10px 0px;
padding: 10px;
}
pre:not([class]) {
background-color: #f7f7f7;
}
code {
font-family: Consolas, Monaco, 'Courier New', monospace;
font-size: 85%;
}
p > code, li > code {
padding: 2px 0px;
}
div.figure {
text-align: center;
}
img {
background-color: #FFFFFF;
padding: 2px;
border: 1px solid #DDDDDD;
border-radius: 3px;
border: 1px solid #CCCCCC;
margin: 0 5px;
}
h1 {
margin-top: 0;
font-size: 35px;
line-height: 40px;
}
h2 {
border-bottom: 4px solid #f7f7f7;
padding-top: 10px;
padding-bottom: 2px;
font-size: 145%;
}
h3 {
border-bottom: 2px solid #f7f7f7;
padding-top: 10px;
font-size: 120%;
}
h4 {
border-bottom: 1px solid #f7f7f7;
margin-left: 8px;
font-size: 105%;
}
h5, h6 {
border-bottom: 1px solid #ccc;
font-size: 105%;
}
a {
color: #0033dd;
text-decoration: none;
}
a:hover {
color: #6666ff; }
a:visited {
color: #800080; }
a:visited:hover {
color: #BB00BB; }
a[href^="http:"] {
text-decoration: underline; }
a[href^="https:"] {
text-decoration: underline; }
code > span.kw { color: #555; font-weight: bold; }
code > span.dt { color: #902000; }
code > span.dv { color: #40a070; }
code > span.bn { color: #d14; }
code > span.fl { color: #d14; }
code > span.ch { color: #d14; }
code > span.st { color: #d14; }
code > span.co { color: #888888; font-style: italic; }
code > span.ot { color: #007020; }
code > span.al { color: #ff0000; font-weight: bold; }
code > span.fu { color: #900; font-weight: bold; }
code > span.er { color: #a61717; background-color: #e3d2d2; }
< / style >
< / head >
< body >
< h1 class = "title toc-ignore" > Exploring the openSenseMap Dataset< / h1 >
< h4 class = "author" > Norwin Roosen< / h4 >
< h4 class = "date" > 2023-03-08< / h4 >
< p > This package provides data ingestion functions for almost any data
stored on the open data platform for environmental sensordata < a href = "https://opensensemap.org" class = "uri" > https://opensensemap.org< / a > . Its main goals are to provide
means for:< / p >
< ul >
< li > big data analysis of the measurements stored on the platform< / li >
< li > sensor metadata analysis (sensor counts, spatial distribution,
temporal trends)< / li >
< / ul >
< div id = "exploring-the-dataset" class = "section level3" >
< h3 > Exploring the dataset< / h3 >
< p > Before we look at actual observations, lets get a grasp of the
openSenseMap datasets’ structure.< / p >
< div class = "sourceCode" id = "cb1" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb1-1" > < a href = "#cb1-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > library< / span > (magrittr)< / span >
< span id = "cb1-2" > < a href = "#cb1-2" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > library< / span > (opensensmapr)< / span >
< span id = "cb1-3" > < a href = "#cb1-3" aria-hidden = "true" tabindex = "-1" > < / a > < / span >
< span id = "cb1-4" > < a href = "#cb1-4" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "co" > # all_sensors = osem_boxes(cache = ' .' )< / span > < / span >
< span id = "cb1-5" > < a href = "#cb1-5" aria-hidden = "true" tabindex = "-1" > < / a > all_sensors < span class = "ot" > =< / span > < span class = "fu" > readRDS< / span > (< span class = "st" > ' boxes_precomputed.rds' < / span > ) < span class = "co" > # read precomputed file to save resources < / span > < / span > < / code > < / pre > < / div >
< div class = "sourceCode" id = "cb2" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb2-1" > < a href = "#cb2-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > summary< / span > (all_sensors)< / span > < / code > < / pre > < / div >
< pre > < code > ## boxes total: 11390
##
## boxes by exposure:
## indoor mobile outdoor unknown
## 2364 590 8417 19
##
## boxes by model:
## custom hackair_home_v2 homeEthernet
## 2800 73 73
## homeEthernetFeinstaub homeV2Ethernet homeV2EthernetFeinstaub
## 55 21 40
## homeV2Lora homeV2Wifi homeV2WifiFeinstaub
## 240 577 743
## homeWifi homeWifiFeinstaub luftdaten_pms1003
## 215 222 9
## luftdaten_pms1003_bme280 luftdaten_pms3003 luftdaten_pms3003_bme280
## 10 1 7
## luftdaten_pms5003 luftdaten_pms5003_bme280 luftdaten_pms7003
## 7 60 6
## luftdaten_pms7003_bme280 luftdaten_sds011 luftdaten_sds011_bme280
## 78 286 3066
## luftdaten_sds011_bmp180 luftdaten_sds011_dht11 luftdaten_sds011_dht22
## 114 135 2552
##
## $last_measurement_within
## 1h 1d 30d 365d never
## 0 0 4151 5909 2062
##
## oldest box: 2016-08-09 19:34:42 (OBS Bohmte UK_02)
## newest box: 2023-02-28 09:47:17 (bitburg)
##
## sensors per box:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1.000 4.000 5.000 4.994 5.000 76.000< / code > < / pre >
< p > This gives a good overview already: As of writing this, there are
more than 700 sensor stations, of which ~50% are currently running. Most
of them are placed outdoors and have around 5 sensors each. The oldest
station is from May 2014, while the latest station was registered a
couple of minutes ago.< / p >
< p > Another feature of interest is the spatial distribution of the boxes:
< code > plot()< / code > can help us out here. This function requires a bunch
of optional dependencies though.< / p >
< div class = "sourceCode" id = "cb4" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb4-1" > < a href = "#cb4-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > plot< / span > (all_sensors)< / span > < / code > < / pre > < / div >
< p > < img src = "
< p > It seems we have to reduce our area of interest to Germany.< / p >
< p > But what do these sensor stations actually measure? Lets find out.
< code > osem_phenomena()< / code > gives us a named list of of the counts of
each observed phenomenon for the given set of sensor stations:< / p >
< div class = "sourceCode" id = "cb5" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb5-1" > < a href = "#cb5-1" aria-hidden = "true" tabindex = "-1" > < / a > phenoms < span class = "ot" > =< / span > < span class = "fu" > osem_phenomena< / span > (all_sensors)< / span >
< span id = "cb5-2" > < a href = "#cb5-2" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > str< / span > (phenoms)< / span > < / code > < / pre > < / div >
< pre > < code > ## List of 3298
## $ Temperatur : int 9405
## $ rel. Luftfeuchte : int 8315
## $ PM10 : int 8148
## $ PM2.5 : int 8136
## $ Luftdruck : int 5668
## $ Beleuchtungsstärke : int 1670
## $ UV-Intensität : int 1660
## $ Temperature : int 644
## $ Humidity : int 473
## $ VOC : int 423
## $ Luftfeuchte : int 363
## $ Lufttemperatur : int 357
## $ CO₂ : int 305
## $ Pressure : int 293
## $ Bodenfeuchte : int 283
## $ Luftfeuchtigkeit : int 272
## $ atm. Luftdruck : int 246
## $ Lautstärke : int 240
## $ PM01 : int 206
## $ IAQ : int 162
## $ Kalibrierungswert : int 156
## $ rel. Luftfeuchte SCD30 : int 156
## $ Bodentemperatur : int 154
## $ Temperatur SCD30 : int 154
## $ CO2eq : int 153
## $ Windgeschwindigkeit : int 152
## $ pH-Wert : int 143
## $ Gesamthärte : int 142
## $ Blei : int 140
## $ Eisen : int 140
## $ Gesamthärte 2 : int 140
## $ Kupfer C : int 140
## $ Kupfer D : int 140
## $ Kupfer1 : int 140
## $ Kupfer2 : int 140
## $ Nitrat : int 140
## $ Nitrit : int 140
## $ GesamthaerteLabor : int 120
## $ CO2 : int 113
## $ Feinstaub PM10 : int 98
## $ Windrichtung : int 82
## $ rel. Luftfeuchte (HECA) : int 75
## $ Temperatur (HECA) : int 73
## $ Temperatura : int 69
## $ Helligkeit : int 67
## $ Feinstaub PM2.5 : int 65
## $ Taupunkt : int 62
## $ Latitude : int 61
## $ Longtitude : int 58
## $ Durchschnitt Umgebungslautstärke : int 51
## $ Minimum Umgebungslautstärke : int 51
## $ UV-Index : int 49
## $ Batterie : int 46
## $ temperature : int 46
## $ Feinstaub PM1.0 : int 41
## $ Umgebungslautstärke : int 41
## $ UV : int 40
## $ humidity : int 38
## $ Abstand nach links : int 34
## $ Beschleunigung Z-Achse : int 34
## $ Beschleunigung X-Achse : int 33
## $ Beschleunigung Y-Achse : int 33
## $ Geschwindigkeit : int 33
## $ Niederschlag : int 33
## $ Feinstaub PM25 : int 32
## $ PM1 : int 32
## $ Abstand nach rechts : int 31
## $ PM1.0 : int 30
## $ rel. Luftfeuchtigkeit : int 30
## $ Relative Humidity : int 29
## $ Sonnenstrahlung : int 29
## $ Luftdruck relativ : int 28
## $ Luftdruck absolut : int 26
## $ Rain : int 26
## $ Regenrate : int 26
## $ CO2 Konzentration : int 25
## $ RSSI : int 22
## $ gefühlte Temperatur : int 22
## $ PM 2.5 : int 21
## $ Battery : int 20
## $ Ciśnienie : int 20
## $ EisenLabor : int 20
## $ Air Pressure : int 19
## $ Regen : int 19
## $ Schall : int 19
## $ Signal : int 19
## $ Ilmanpaine : int 18
## $ Lämpötila : int 18
## $ UV Index : int 18
## $ Wind speed : int 18
## $ PM 10 : int 17
## $ PM4 : int 17
## $ Air pressure : int 16
## $ Temperatur DHT22 : int 16
## $ Wind Direction : int 16
## $ Altitude : int 15
## $ Illuminance : int 15
## $ Speed : int 15
## $ Wind Speed : int 15
## [list output truncated]< / code > < / pre >
< p > Thats quite some noise there, with many phenomena being measured by a
single sensor only, or many duplicated phenomena due to slightly
different spellings. We should clean that up, but for now let’ s just
filter out the noise and find those phenomena with high sensor
numbers:< / p >
< div class = "sourceCode" id = "cb7" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb7-1" > < a href = "#cb7-1" aria-hidden = "true" tabindex = "-1" > < / a > phenoms[phenoms < span class = "sc" > > < / span > < span class = "dv" > 20< / span > ]< / span > < / code > < / pre > < / div >
< pre > < code > ## $Temperatur
## [1] 9405
##
## $`rel. Luftfeuchte`
## [1] 8315
##
## $PM10
## [1] 8148
##
## $PM2.5
## [1] 8136
##
## $Luftdruck
## [1] 5668
##
## $Beleuchtungsstärke
## [1] 1670
##
## $`UV-Intensität`
## [1] 1660
##
## $Temperature
## [1] 644
##
## $Humidity
## [1] 473
##
## $VOC
## [1] 423
##
## $Luftfeuchte
## [1] 363
##
## $Lufttemperatur
## [1] 357
##
## $`CO₂`
## [1] 305
##
## $Pressure
## [1] 293
##
## $Bodenfeuchte
## [1] 283
##
## $Luftfeuchtigkeit
## [1] 272
##
## $`atm. Luftdruck`
## [1] 246
##
## $Lautstärke
## [1] 240
##
## $PM01
## [1] 206
##
## $IAQ
## [1] 162
##
## $Kalibrierungswert
## [1] 156
##
## $`rel. Luftfeuchte SCD30`
## [1] 156
##
## $Bodentemperatur
## [1] 154
##
## $`Temperatur SCD30`
## [1] 154
##
## $CO2eq
## [1] 153
##
## $Windgeschwindigkeit
## [1] 152
##
## $`pH-Wert`
## [1] 143
##
## $Gesamthärte
## [1] 142
##
## $Blei
## [1] 140
##
## $Eisen
## [1] 140
##
## $`Gesamthärte 2`
## [1] 140
##
## $`Kupfer C`
## [1] 140
##
## $`Kupfer D`
## [1] 140
##
## $Kupfer1
## [1] 140
##
## $Kupfer2
## [1] 140
##
## $Nitrat
## [1] 140
##
## $Nitrit
## [1] 140
##
## $GesamthaerteLabor
## [1] 120
##
## $CO2
## [1] 113
##
## $`Feinstaub PM10`
## [1] 98
##
## $Windrichtung
## [1] 82
##
## $`rel. Luftfeuchte (HECA)`
## [1] 75
##
## $`Temperatur (HECA)`
## [1] 73
##
## $Temperatura
## [1] 69
##
## $Helligkeit
## [1] 67
##
## $`Feinstaub PM2.5`
## [1] 65
##
## $Taupunkt
## [1] 62
##
## $Latitude
## [1] 61
##
## $Longtitude
## [1] 58
##
## $`Durchschnitt Umgebungslautstärke`
## [1] 51
##
## $`Minimum Umgebungslautstärke`
## [1] 51
##
## $`UV-Index`
## [1] 49
##
## $Batterie
## [1] 46
##
## $temperature
## [1] 46
##
## $`Feinstaub PM1.0`
## [1] 41
##
## $Umgebungslautstärke
## [1] 41
##
## $UV
## [1] 40
##
## $humidity
## [1] 38
##
## $`Abstand nach links`
## [1] 34
##
## $`Beschleunigung Z-Achse`
## [1] 34
##
## $`Beschleunigung X-Achse`
## [1] 33
##
## $`Beschleunigung Y-Achse`
## [1] 33
##
## $Geschwindigkeit
## [1] 33
##
## $Niederschlag
## [1] 33
##
## $`Feinstaub PM25`
## [1] 32
##
## $PM1
## [1] 32
##
## $`Abstand nach rechts`
## [1] 31
##
## $PM1.0
## [1] 30
##
## $`rel. Luftfeuchtigkeit`
## [1] 30
##
## $`Relative Humidity`
## [1] 29
##
## $Sonnenstrahlung
## [1] 29
##
## $`Luftdruck relativ`
## [1] 28
##
## $`Luftdruck absolut`
## [1] 26
##
## $Rain
## [1] 26
##
## $Regenrate
## [1] 26
##
## $`CO2 Konzentration`
## [1] 25
##
## $RSSI
## [1] 22
##
## $`gefühlte Temperatur`
## [1] 22
##
## $`PM 2.5`
## [1] 21< / code > < / pre >
< p > Alright, temperature it is! Fine particulate matter (PM2.5) seems to
be more interesting to analyze though. We should check how many sensor
stations provide useful data: We want only those boxes with a PM2.5
sensor, that are placed outdoors and are currently submitting
measurements:< / p >
< div class = "sourceCode" id = "cb9" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb9-1" > < a href = "#cb9-1" aria-hidden = "true" tabindex = "-1" > < / a > pm25_sensors < span class = "ot" > =< / span > < span class = "fu" > osem_boxes< / span > (< / span >
< span id = "cb9-2" > < a href = "#cb9-2" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "at" > exposure =< / span > < span class = "st" > ' outdoor' < / span > ,< / span >
< span id = "cb9-3" > < a href = "#cb9-3" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "at" > date =< / span > < span class = "fu" > Sys.time< / span > (), < span class = "co" > # ±4 hours< / span > < / span >
< span id = "cb9-4" > < a href = "#cb9-4" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "at" > phenomenon =< / span > < span class = "st" > ' PM2.5' < / span > < / span >
< span id = "cb9-5" > < a href = "#cb9-5" aria-hidden = "true" tabindex = "-1" > < / a > )< / span > < / code > < / pre > < / div >
< div class = "sourceCode" id = "cb10" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb10-1" > < a href = "#cb10-1" aria-hidden = "true" tabindex = "-1" > < / a > pm25_sensors < span class = "ot" > =< / span > < span class = "fu" > readRDS< / span > (< span class = "st" > ' pm25_sensors.rds' < / span > ) < span class = "co" > # read precomputed file to save resources < / span > < / span >
< span id = "cb10-2" > < a href = "#cb10-2" aria-hidden = "true" tabindex = "-1" > < / a > < / span >
< span id = "cb10-3" > < a href = "#cb10-3" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > summary< / span > (pm25_sensors)< / span > < / code > < / pre > < / div >
< pre > < code > ## boxes total: 3011
##
## boxes by exposure:
## outdoor
## 3011
##
## boxes by model:
## custom hackair_home_v2 homeEthernetFeinstaub
## 175 8 12
## homeV2EthernetFeinstaub homeV2Lora homeV2Wifi
## 9 22 2
## homeV2WifiFeinstaub homeWifi homeWifiFeinstaub
## 132 3 32
## luftdaten_pms1003 luftdaten_pms1003_bme280 luftdaten_pms5003
## 1 3 3
## luftdaten_pms5003_bme280 luftdaten_pms7003 luftdaten_pms7003_bme280
## 10 2 28
## luftdaten_sds011 luftdaten_sds011_bme280 luftdaten_sds011_bmp180
## 117 1365 60
## luftdaten_sds011_dht11 luftdaten_sds011_dht22
## 44 983
##
## $last_measurement_within
## 1h 1d 30d 365d never
## 0 0 3011 3011 0
##
## oldest box: 2017-03-03 18:20:43 (Witten Heven Dorf)
## newest box: 2023-02-28 08:28:27 (eth0)
##
## sensors per box:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 2.000 4.000 5.000 4.854 5.000 26.000< / code > < / pre >
< div class = "sourceCode" id = "cb12" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb12-1" > < a href = "#cb12-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > plot< / span > (pm25_sensors)< / span > < / code > < / pre > < / div >
< p > < img src = "
< p > Thats still more than 200 measuring stations, we can work with
that.< / p >
< / div >
< div id = "analyzing-sensor-data" class = "section level3" >
< h3 > Analyzing sensor data< / h3 >
< p > Having analyzed the available data sources, let’ s finally get some
measurements. We could call < code > osem_measurements(pm25_sensors)< / code >
now, however we are focusing on a restricted area of interest, the city
of Berlin. Luckily we can get the measurements filtered by a bounding
box:< / p >
< div class = "sourceCode" id = "cb13" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb13-1" > < a href = "#cb13-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > library< / span > (sf)< / span >
< span id = "cb13-2" > < a href = "#cb13-2" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > library< / span > (units)< / span >
< span id = "cb13-3" > < a href = "#cb13-3" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > library< / span > (lubridate)< / span >
< span id = "cb13-4" > < a href = "#cb13-4" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > library< / span > (dplyr)< / span > < / code > < / pre > < / div >
< p > Since the API takes quite long to response measurements, especially
filtered on space and time, we do not run the following chunks for
publication of the package on CRAN.< / p >
< div class = "sourceCode" id = "cb14" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb14-1" > < a href = "#cb14-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "co" > # construct a bounding box: 12 kilometers around Berlin< / span > < / span >
< span id = "cb14-2" > < a href = "#cb14-2" aria-hidden = "true" tabindex = "-1" > < / a > berlin < span class = "ot" > =< / span > < span class = "fu" > st_point< / span > (< span class = "fu" > c< / span > (< span class = "fl" > 13.4034< / span > , < span class = "fl" > 52.5120< / span > )) < span class = "sc" > %> %< / span > < / span >
< span id = "cb14-3" > < a href = "#cb14-3" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > st_sfc< / span > (< span class = "at" > crs =< / span > < span class = "dv" > 4326< / span > ) < span class = "sc" > %> %< / span > < / span >
< span id = "cb14-4" > < a href = "#cb14-4" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > st_transform< / span > (< span class = "dv" > 3857< / span > ) < span class = "sc" > %> %< / span > < span class = "co" > # allow setting a buffer in meters< / span > < / span >
< span id = "cb14-5" > < a href = "#cb14-5" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > st_buffer< / span > (< span class = "fu" > set_units< / span > (< span class = "dv" > 12< / span > , km)) < span class = "sc" > %> %< / span > < / span >
< span id = "cb14-6" > < a href = "#cb14-6" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > st_transform< / span > (< span class = "dv" > 4326< / span > ) < span class = "sc" > %> %< / span > < span class = "co" > # the opensensemap expects WGS 84< / span > < / span >
< span id = "cb14-7" > < a href = "#cb14-7" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > st_bbox< / span > ()< / span >
< span id = "cb14-8" > < a href = "#cb14-8" aria-hidden = "true" tabindex = "-1" > < / a > pm25 < span class = "ot" > =< / span > < span class = "fu" > osem_measurements< / span > (< / span >
< span id = "cb14-9" > < a href = "#cb14-9" aria-hidden = "true" tabindex = "-1" > < / a > berlin,< / span >
< span id = "cb14-10" > < a href = "#cb14-10" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "at" > phenomenon =< / span > < span class = "st" > ' PM2.5' < / span > ,< / span >
< span id = "cb14-11" > < a href = "#cb14-11" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "at" > from =< / span > < span class = "fu" > now< / span > () < span class = "sc" > -< / span > < span class = "fu" > days< / span > (< span class = "dv" > 3< / span > ), < span class = "co" > # defaults to 2 days< / span > < / span >
< span id = "cb14-12" > < a href = "#cb14-12" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "at" > to =< / span > < span class = "fu" > now< / span > ()< / span >
< span id = "cb14-13" > < a href = "#cb14-13" aria-hidden = "true" tabindex = "-1" > < / a > )< / span > < / code > < / pre > < / div >
< div class = "sourceCode" id = "cb15" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb15-1" > < a href = "#cb15-1" aria-hidden = "true" tabindex = "-1" > < / a > pm25 < span class = "ot" > =< / span > < span class = "fu" > readRDS< / span > (< span class = "st" > ' pm25_berlin.rds' < / span > ) < span class = "co" > # read precomputed file to save resources < / span > < / span >
< span id = "cb15-2" > < a href = "#cb15-2" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > plot< / span > (pm25)< / span > < / code > < / pre > < / div >
< p > < img src = "
< p > Now we can get started with actual spatiotemporal data analysis.
First, lets mask the seemingly uncalibrated sensors:< / p >
< div class = "sourceCode" id = "cb16" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb16-1" > < a href = "#cb16-1" aria-hidden = "true" tabindex = "-1" > < / a > outliers < span class = "ot" > =< / span > < span class = "fu" > filter< / span > (pm25, value < span class = "sc" > > < / span > < span class = "dv" > 100< / span > )< span class = "sc" > $< / span > sensorId< / span >
< span id = "cb16-2" > < a href = "#cb16-2" aria-hidden = "true" tabindex = "-1" > < / a > bad_sensors < span class = "ot" > =< / span > outliers[, drop < span class = "ot" > =< / span > < span class = "cn" > TRUE< / span > ] < span class = "sc" > %> %< / span > < span class = "fu" > levels< / span > ()< / span >
< span id = "cb16-3" > < a href = "#cb16-3" aria-hidden = "true" tabindex = "-1" > < / a > < / span >
< span id = "cb16-4" > < a href = "#cb16-4" aria-hidden = "true" tabindex = "-1" > < / a > pm25 < span class = "ot" > =< / span > < span class = "fu" > mutate< / span > (pm25, < span class = "at" > invalid =< / span > sensorId < span class = "sc" > %in%< / span > bad_sensors)< / span > < / code > < / pre > < / div >
< p > Then plot the measuring locations, flagging the outliers:< / p >
< div class = "sourceCode" id = "cb17" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb17-1" > < a href = "#cb17-1" aria-hidden = "true" tabindex = "-1" > < / a > < span class = "fu" > st_as_sf< / span > (pm25) < span class = "sc" > %> %< / span > < span class = "fu" > st_geometry< / span > () < span class = "sc" > %> %< / span > < span class = "fu" > plot< / span > (< span class = "at" > col =< / span > < span class = "fu" > factor< / span > (pm25< span class = "sc" > $< / span > invalid), < span class = "at" > axes =< / span > < span class = "cn" > TRUE< / span > )< / span > < / code > < / pre > < / div >
< p > < img src = "
< p > Removing these sensors yields a nicer time series plot:< / p >
< div class = "sourceCode" id = "cb18" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span id = "cb18-1" > < a href = "#cb18-1" aria-hidden = "true" tabindex = "-1" > < / a > pm25 < span class = "sc" > %> %< / span > < span class = "fu" > filter< / span > (invalid < span class = "sc" > ==< / span > < span class = "cn" > FALSE< / span > ) < span class = "sc" > %> %< / span > < span class = "fu" > plot< / span > ()< / span > < / code > < / pre > < / div >
< p > < img src = "
< p > Further analysis: comparison with LANUV data < code > TODO< / code > < / p >
< / div >
<!-- code folding -->
<!-- dynamically load mathjax for compatibility with self - contained -->
< script >
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
< / script >
< / body >
< / html >