2017-08-23 02:07:56 +02:00
<!DOCTYPE html>
2018-05-26 12:52:02 +02:00
< html xmlns = "http://www.w3.org/1999/xhtml" >
2017-08-23 02:07:56 +02:00
2018-05-26 12:52:02 +02:00
< head >
2017-08-23 02:07:56 +02:00
2018-05-26 12:52:02 +02:00
< meta charset = "utf-8" / >
< meta http-equiv = "Content-Type" content = "text/html; charset=utf-8" / >
< meta name = "generator" content = "pandoc" / >
2017-08-23 02:07:56 +02:00
2018-05-26 12:52:02 +02:00
< meta name = "viewport" content = "width=device-width, initial-scale=1" >
2018-05-25 17:17:29 +02:00
2018-05-26 12:52:02 +02:00
< meta name = "author" content = "Norwin Roosen" / >
2018-05-25 17:17:29 +02:00
2018-05-26 12:52:02 +02:00
< meta name = "date" content = "2018-05-26" / >
2018-05-25 17:17:29 +02:00
2018-05-26 12:52:02 +02:00
< title > Exploring the openSenseMap Dataset< / title >
2017-08-23 02:07:56 +02:00
2018-05-26 12:52:02 +02:00
< style type = "text/css" > code { white-space : pre ; } < / style >
2017-08-23 02:07:56 +02:00
< style type = "text/css" >
2018-05-26 12:52:02 +02:00
div.sourceCode { overflow-x: auto; }
table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
margin: 0; padding: 0; vertical-align: baseline; border: none; }
table.sourceCode { width: 100%; line-height: 100%; }
td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
td.sourceCode { padding-left: 5px; }
code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
code > span.dt { color: #902000; } /* DataType */
code > span.dv { color: #40a070; } /* DecVal */
code > span.bn { color: #40a070; } /* BaseN */
code > span.fl { color: #40a070; } /* Float */
code > span.ch { color: #4070a0; } /* Char */
code > span.st { color: #4070a0; } /* String */
code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
code > span.ot { color: #007020; } /* Other */
code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
code > span.fu { color: #06287e; } /* Function */
code > span.er { color: #ff0000; font-weight: bold; } /* Error */
code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
code > span.cn { color: #880000; } /* Constant */
code > span.sc { color: #4070a0; } /* SpecialChar */
code > span.vs { color: #4070a0; } /* VerbatimString */
code > span.ss { color: #bb6688; } /* SpecialString */
code > span.im { } /* Import */
code > span.va { color: #19177c; } /* Variable */
code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code > span.op { color: #666666; } /* Operator */
code > span.bu { } /* BuiltIn */
code > span.ex { } /* Extension */
code > span.pp { color: #bc7a00; } /* Preprocessor */
code > span.at { color: #7d9029; } /* Attribute */
code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
< / style >
2017-08-23 02:07:56 +02:00
2018-05-26 12:52:02 +02:00
< link href = "data:text/css;charset=utf-8,body%20%7B%0Abackground%2Dcolor%3A%20%23fff%3B%0Amargin%3A%201em%20auto%3B%0Amax%2Dwidth%3A%20700px%3B%0Aoverflow%3A%20visible%3B%0Apadding%2Dleft%3A%202em%3B%0Apadding%2Dright%3A%202em%3B%0Afont%2Dfamily%3A%20%22Open%20Sans%22%2C%20%22Helvetica%20Neue%22%2C%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0Afont%2Dsize%3A%2014px%3B%0Aline%2Dheight%3A%201%2E35%3B%0A%7D%0A%23header%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0A%23TOC%20%7B%0Aclear%3A%20both%3B%0Amargin%3A%200%200%2010px%2010px%3B%0Apadding%3A%204px%3B%0Awidth%3A%20400px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Aborder%2Dradius%3A%205px%3B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Afont%2Dsize%3A%2013px%3B%0Aline%2Dheight%3A%201%2E3%3B%0A%7D%0A%23TOC%20%2Etoctitle%20%7B%0Afont%2Dweight%3A%20bold%3B%0Afont%2Dsize%3A%2015px%3B%0Amargin%2Dleft%3A%205px%3B%0A%7D%0A%23TOC%20ul%20%7B%0Apadding%2Dleft%3A%2040px%3B%0Amargin%2Dleft%3A%20%2D1%2E5em%3B%0Amargin%2Dtop%3A%205px%3B%0Amargin%2Dbottom%3A%205px%3B%0A%7D%0A%23TOC%20ul%20ul%20%7B%0Amargin%2Dleft%3A%20%2D2em%3B%0A%7D%0A%23TOC%20li%20%7B%0Aline%2Dheight%3A%2016px%3B%0A%7D%0Atable%20%7B%0Amargin%3A%201em%20auto%3B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dcolor%3A%20%23DDDDDD%3B%0Aborder%2Dstyle%3A%20outset%3B%0Aborder%2Dcollapse%3A%20collapse%3B%0A%7D%0Atable%20th%20%7B%0Aborder%2Dwidth%3A%202px%3B%0Apadding%3A%205px%3B%0Aborder%2Dstyle%3A%20inset%3B%0A%7D%0Atable%20td%20%7B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dstyle%3A%20inset%3B%0Aline%2Dheight%3A%2018px%3B%0Apadding%3A%205px%205px%3B%0A%7D%0Atable%2C%20table%20th%2C%20table%20td%20%7B%0Aborder%2Dleft%2Dstyle%3A%20none%3B%0Aborder%2Dright%2Dstyle%3A%20none%3B%0A%7D%0Atable%20thead%2C%20table%20tr%2Eeven%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Ap%20%7B%0Amargin%3A%200%2E5em%200%3B%0A%7D%0Ablockquote%20%7B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Apadding%3A%200%2E25em%200%2E75em%3B%0A%7D%0Ahr%20%7B%0Aborder%2Dstyle%3A%20solid%3B%0Aborder%3A%20none%3B%0Aborder%2Dtop%3A%201px%20solid%20%23777%3B%0Amargin%3A%2028px%200%3B%0A%7D%0Adl%20%7B%0Amargin%2Dleft%3A%200%3B%0A%7D%0Adl%20dd%20%7B%0Amargin%2Dbottom%3A%2013px%3B%0Amargin%2Dleft%3A%2013px%3B%0A%7D%0Adl%20dt%20%7B%0Afont%2Dweight%3A%20bold%3B%0A%7D%0Aul%20%7B%0Amargin%2Dtop%3A%200%3B%0A%7D%0Aul%20li%20%7B%0Alist%2Dstyle%3A%20circle%20outside%3B%0A%7D%0Aul%20ul%20%7B%0Amargin%2Dbottom%3A%200%3B%0A%7D%0Apre%2C%20code%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0Aborder%2Dradius%3A%203px%3B%0Acolor%3A%20%23333%3B%0Awhite%2Dspace%3A%20pre%2Dwrap%3B%20%0A%7D%0Apre%20%7B%0Aborder%2Dradius%3A%203px%3B%0Amargin%3A%205px%200px%2010px%200px%3B%0Apadding%3A%2010px%3B%0A%7D%0Apre%3Anot%28%5Bclass%5D%29%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Acode%20%7B%0Afont%2Dfamily%3A%20Consolas%2C%20Monaco%2C%20%27Courier%20New%27%2C%20monospace%3B%0Afont%2Dsize%3A%2085%25%3B%0A%7D%0Ap%20%3E%20code%2C%20li%20%3E%20code%20%7B%0Apadding%3A%202px%200px%3B%0A%7D%0Adiv%2Efigure%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0Aimg%20%7B%0Abackground%2Dcolor%3A%20%23FFFFFF%3B%0Apadding%3A%202px%3B%0Aborder%3A%201px%20solid%20%23DDDDDD%3B%0Aborder%2Dradius%3A%203px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Amargin%3A%200%205px%3B%0A%7D%0Ah1%20%7B%0Amargin%2Dtop%3A%200%3B%0Afont%2Dsize%3A%2035px%3B%0Aline%2Dheight%3A%2040px%3B%0A%7D%0Ah2%20%7B%0Aborder%2Dbottom%3A%204px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Apadding%2Dbottom%3A%202px%3B%0Afont%2Dsize%3A%20145%25%3B%0A%7D%0Ah3%20%7B%0Aborder%2Dbottom%3A%202px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Afont%2Dsize%3A%20120%25%3B%0A%7D%0Ah4%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23f7f7f7%3B%0Amargin%2Dleft%3A%208px%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Ah5%2C%20h6%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23ccc%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Aa%20%7B%0Acolor%3A%20%230033dd%3B%0Atext%2Ddecoration%3A%20none%3B%0A%7D%0Aa%3Ahover%20%7B%0Acolor%3A%20%236666ff%3B%20%7D%0Aa%3Avisited%20%7B%0Acolor%3A%20%23800080%3B%20%7D%0Aa%3Avisited%3Ahover%20%7B%0Acolor%3A%20%23BB00BB%3B%20%7D%0Aa%5Bhref%5E%3D%22http
2017-08-23 02:07:56 +02:00
2018-05-26 12:52:02 +02:00
< / head >
2018-05-25 17:17:29 +02:00
2018-05-26 12:52:02 +02:00
< body >
2018-05-25 17:17:29 +02:00
2018-05-26 12:52:02 +02:00
< h1 class = "title toc-ignore" > Exploring the openSenseMap Dataset< / h1 >
< h4 class = "author" > < em > Norwin Roosen< / em > < / h4 >
< h4 class = "date" > < em > 2018-05-26< / em > < / h4 >
2018-05-25 17:17:29 +02:00
2018-05-26 12:52:02 +02:00
< p > This package provides data ingestion functions for almost any data stored on the open data platform for environemental sensordata < a href = "https://opensensemap.org" class = "uri" > https://opensensemap.org< / a > . Its main goals are to provide means for:< / p >
2017-08-23 02:07:56 +02:00
< ul >
< li > big data analysis of the measurements stored on the platform< / li >
< li > sensor metadata analysis (sensor counts, spatial distribution, temporal trends)< / li >
< / ul >
2018-05-26 12:52:02 +02:00
< div id = "exploring-the-dataset" class = "section level3" >
2017-08-23 02:07:56 +02:00
< h3 > Exploring the dataset< / h3 >
2018-05-26 12:52:02 +02:00
< p > Before we look at actual observations, lets get a grasp of the openSenseMap datasets’ structure.< / p >
< div class = "sourceCode" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span class = "kw" > library< / span > (magrittr)
< span class = "kw" > library< / span > (opensensmapr)
2017-08-23 02:07:56 +02:00
2018-05-26 12:52:02 +02:00
all_sensors =< span class = "st" > < / span > < span class = "kw" > osem_boxes< / span > ()< / code > < / pre > < / div >
< div class = "sourceCode" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span class = "kw" > summary< / span > (all_sensors)< / code > < / pre > < / div >
< pre > < code > ## boxes total: 1781
2017-08-23 02:07:56 +02:00
##
## boxes by exposure:
2018-05-25 17:17:29 +02:00
## indoor mobile outdoor unknown
2018-05-26 12:52:02 +02:00
## 290 55 1416 20
2017-08-23 02:07:56 +02:00
##
## boxes by model:
2018-05-25 17:17:29 +02:00
## custom homeEthernet homeEthernetFeinstaub
2018-05-26 12:52:02 +02:00
## 336 92 49
2018-05-25 17:17:29 +02:00
## homeWifi homeWifiFeinstaub luftdaten_pms1003
2018-05-26 12:52:02 +02:00
## 193 144 1
2018-05-25 17:17:29 +02:00
## luftdaten_pms1003_bme280 luftdaten_pms5003_bme280 luftdaten_pms7003_bme280
## 1 5 2
## luftdaten_sds011 luftdaten_sds011_bme280 luftdaten_sds011_bmp180
## 57 197 19
## luftdaten_sds011_dht11 luftdaten_sds011_dht22
## 46 639
2017-08-23 02:07:56 +02:00
##
## $last_measurement_within
## 1h 1d 30d 365d never
2018-05-26 12:52:02 +02:00
## 929 954 1091 1428 235
2017-08-23 02:07:56 +02:00
##
## oldest box: 2014-05-28 15:36:14 (CALIMERO)
2018-05-26 12:52:02 +02:00
## newest box: 2018-05-26 10:29:27 (UOS_DDI)
2017-08-23 02:07:56 +02:00
##
## sensors per box:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
2018-05-26 12:52:02 +02:00
## 1.0 4.0 4.0 4.6 5.0 33.0< / code > < / pre >
< p > This gives a good overview already: As of writing this, there are more than 700 sensor stations, of which ~50% are currently running. Most of them are placed outdoors and have around 5 sensors each. The oldest station is from May 2014, while the latest station was registered a couple of minutes ago.< / p >
< p > Another feature of interest is the spatial distribution of the boxes: < code > plot()< / code > can help us out here. This function requires a bunch of optional dependencies though.< / p >
< div class = "sourceCode" > < pre class = "sourceCode r" > < code class = "sourceCode r" > if (!< span class = "kw" > require< / span > (< span class = "st" > 'maps'< / span > )) < span class = "kw" > install.packages< / span > (< span class = "st" > 'maps'< / span > )
if (!< span class = "kw" > require< / span > (< span class = "st" > 'maptools'< / span > )) < span class = "kw" > install.packages< / span > (< span class = "st" > 'maptools'< / span > )
if (!< span class = "kw" > require< / span > (< span class = "st" > 'rgeos'< / span > )) < span class = "kw" > install.packages< / span > (< span class = "st" > 'rgeos'< / span > )
< span class = "kw" > plot< / span > (all_sensors)< / code > < / pre > < / div >
< p > < img src = "
2017-08-23 02:07:56 +02:00
< p > It seems we have to reduce our area of interest to Germany.< / p >
2018-05-26 12:52:02 +02:00
< p > But what do these sensor stations actually measure? Lets find out. < code > osem_phenomena()< / code > gives us a named list of of the counts of each observed phenomenon for the given set of sensor stations:< / p >
< div class = "sourceCode" > < pre class = "sourceCode r" > < code class = "sourceCode r" > phenoms =< span class = "st" > < / span > < span class = "kw" > osem_phenomena< / span > (all_sensors)
< span class = "kw" > str< / span > (phenoms)< / code > < / pre > < / div >
< pre > < code > ## List of 433
## $ Temperatur : int 1608
## $ rel. Luftfeuchte : int 1422
2018-05-25 17:17:29 +02:00
## $ PM10 : int 1200
## $ PM2.5 : int 1198
2018-05-26 12:52:02 +02:00
## $ Luftdruck : int 825
## $ Beleuchtungsstärke : int 481
## $ UV-Intensität : int 472
2018-05-25 17:17:29 +02:00
## $ Luftfeuchtigkeit : int 84
## $ Temperature : int 49
## $ Humidity : int 42
## $ Helligkeit : int 25
## $ Lautstärke : int 21
## $ Schall : int 20
## $ UV : int 20
## $ Pressure : int 19
## $ Licht : int 18
## $ Luftfeuchte : int 14
## $ Umgebungslautstärke : int 14
## $ Lämpötila : int 13
## $ Ilmanpaine : int 12
## $ Signal : int 12
## $ Feinstaub PM10 : int 10
## $ Feinstaub PM2.5 : int 9
## $ Kosteus : int 8
2018-05-26 12:52:02 +02:00
## $ Temperatur DHT22 : int 8
2018-05-25 17:17:29 +02:00
## $ Valonmäärä : int 8
## $ temperature : int 8
## $ PM01 : int 7
## $ UV-säteily : int 7
## $ Niederschlag : int 6
## $ UV-Strahlung : int 6
## $ Wind speed : int 6
## $ Windgeschwindigkeit : int 6
## $ humidity : int 6
## $ Ilmankosteus : int 5
## $ Wassertemperatur : int 5
## $ Windrichtung : int 5
## $ rel. Luftfeuchtigkeit : int 5
## $ Druck : int 4
## $ Light : int 4
## $ Temperature 1 : int 4
## $ UV Index : int 4
## $ UV-Säteily : int 4
## $ lautstärke : int 4
## $ rel. Luftfeuchte 1 : int 4
2018-05-26 12:52:02 +02:00
## $ rel. Luftfeuchte DHT22 : int 4
2018-05-25 17:17:29 +02:00
## $ relative Luftfeuchtigkeit : int 4
## $ Air pressure : int 3
## $ Batterie : int 3
## $ Battery : int 3
## $ DS18B20_Probe01 : int 3
## $ DS18B20_Probe02 : int 3
## $ DS18B20_Probe03 : int 3
## $ DS18B20_Probe04 : int 3
## $ DS18B20_Probe05 : int 3
## $ Licht (digital) : int 3
## $ Luftdruck (BME280) : int 3
## $ PM 10 : int 3
## $ PM 2.5 : int 3
## $ Temp : int 3
## $ Temperatur (BME280) : int 3
## $ Temperatur HDC1008 : int 3
## $ Temperatura : int 3
## $ Temperature 2 : int 3
## $ UV-Index : int 3
## $ Valoisuus : int 3
## $ Wind Gust : int 3
## $ pressure : int 3
## $ 1 : int 2
## $ 10 : int 2
## $ 2 : int 2
## $ 3 : int 2
## $ 4 : int 2
## $ 5 : int 2
## $ 6 : int 2
## $ 7 : int 2
## $ 8 : int 2
## $ 9 : int 2
## $ Air Pressure : int 2
## $ Anderer : int 2
## $ Battery voltage : int 2
## $ CO2 : int 2
## $ Feuchte : int 2
## $ Illuminance : int 2
## $ Intensity : int 2
## $ Leitfähigkeit : int 2
## $ Lichtintensität : int 2
## $ Luftdruck BMP180 : int 2
## $ Luftfeuchte (BME280) : int 2
## $ Luftqualität : int 2
## $ Lufttemperatur : int 2
## $ PM25 : int 2
## $ Radioactivity : int 2
## $ Radioaktivität : int 2
## $ Regen : int 2
## $ Relative Humidity : int 2
## $ Sound : int 2
## $ Temperatur (DHT22) : int 2
## $ Temperatur BMP180 : int 2
2018-05-26 12:52:02 +02:00
## [list output truncated]< / code > < / pre >
< p > Thats quite some noise there, with many phenomena being measured by a single sensor only, or many duplicated phenomena due to slightly different spellings. We should clean that up, but for now let’ s just filter out the noise and find those phenomena with high sensor numbers:< / p >
< div class = "sourceCode" > < pre class = "sourceCode r" > < code class = "sourceCode r" > phenoms[phenoms > < span class = "st" > < / span > < span class = "dv" > 20< / span > ]< / code > < / pre > < / div >
2017-08-23 02:07:56 +02:00
< pre > < code > ## $Temperatur
2018-05-26 12:52:02 +02:00
## [1] 1608
2017-08-23 02:07:56 +02:00
##
## $`rel. Luftfeuchte`
2018-05-26 12:52:02 +02:00
## [1] 1422
2017-08-23 02:07:56 +02:00
##
## $PM10
2018-05-25 17:17:29 +02:00
## [1] 1200
2017-08-23 02:07:56 +02:00
##
## $PM2.5
2018-05-25 17:17:29 +02:00
## [1] 1198
2017-08-23 02:07:56 +02:00
##
2018-05-25 17:17:29 +02:00
## $Luftdruck
2018-05-26 12:52:02 +02:00
## [1] 825
2017-08-23 02:07:56 +02:00
##
## $Beleuchtungsstärke
2018-05-26 12:52:02 +02:00
## [1] 481
2018-05-25 17:17:29 +02:00
##
## $`UV-Intensität`
2018-05-26 12:52:02 +02:00
## [1] 472
2017-08-23 02:07:56 +02:00
##
## $Luftfeuchtigkeit
2018-05-25 17:17:29 +02:00
## [1] 84
##
## $Temperature
## [1] 49
2017-08-23 02:07:56 +02:00
##
2018-05-25 17:17:29 +02:00
## $Humidity
## [1] 42
##
## $Helligkeit
## [1] 25
##
## $Lautstärke
2018-05-26 12:52:02 +02:00
## [1] 21< / code > < / pre >
< p > Alright, temperature it is! Fine particulate matter (PM2.5) seems to be more interesting to analyze though. We should check how many sensor stations provide useful data: We want only those boxes with a PM2.5 sensor, that are placed outdoors and are currently submitting measurements:< / p >
< div class = "sourceCode" > < pre class = "sourceCode r" > < code class = "sourceCode r" > pm25_sensors =< span class = "st" > < / span > < span class = "kw" > osem_boxes< / span > (
< span class = "dt" > exposure =< / span > < span class = "st" > 'outdoor'< / span > ,
< span class = "dt" > date =< / span > < span class = "kw" > Sys.time< / span > (), < span class = "co" > # ±4 hours< / span >
< span class = "dt" > phenomenon =< / span > < span class = "st" > 'PM2.5'< / span >
)< / code > < / pre > < / div >
< div class = "sourceCode" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span class = "kw" > summary< / span > (pm25_sensors)< / code > < / pre > < / div >
< pre > < code > ## boxes total: 791
2017-08-23 02:07:56 +02:00
##
## boxes by exposure:
## outdoor
2018-05-26 12:52:02 +02:00
## 791
2017-08-23 02:07:56 +02:00
##
## boxes by model:
2018-05-25 17:17:29 +02:00
## custom homeEthernetFeinstaub homeWifi
2018-05-26 12:52:02 +02:00
## 29 37 6
2018-05-25 17:17:29 +02:00
## homeWifiFeinstaub luftdaten_pms1003_bme280 luftdaten_pms5003_bme280
2018-05-26 12:52:02 +02:00
## 57 1 1
2018-05-25 17:17:29 +02:00
## luftdaten_pms7003_bme280 luftdaten_sds011 luftdaten_sds011_bme280
2018-05-26 12:52:02 +02:00
## 2 32 137
2018-05-25 17:17:29 +02:00
## luftdaten_sds011_bmp180 luftdaten_sds011_dht11 luftdaten_sds011_dht22
2018-05-26 12:52:02 +02:00
## 14 32 443
2017-08-23 02:07:56 +02:00
##
## $last_measurement_within
## 1h 1d 30d 365d never
2018-05-26 12:52:02 +02:00
## 771 780 784 789 2
2017-08-23 02:07:56 +02:00
##
2018-05-25 17:17:29 +02:00
## oldest box: 2016-06-02 12:09:47 (BalkonBox Mindener Str.)
## newest box: 2018-05-24 20:29:50 (Stadthalle)
2017-08-23 02:07:56 +02:00
##
## sensors per box:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
2018-05-26 12:52:02 +02:00
## 2.000 4.000 4.000 4.617 5.000 12.000< / code > < / pre >
< div class = "sourceCode" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span class = "kw" > plot< / span > (pm25_sensors)< / code > < / pre > < / div >
< p > < img src = "
2017-08-23 02:07:56 +02:00
< p > Thats still more than 200 measuring stations, we can work with that.< / p >
2018-05-26 12:52:02 +02:00
< / div >
< div id = "analyzing-sensor-data" class = "section level3" >
2017-08-23 02:07:56 +02:00
< h3 > Analyzing sensor data< / h3 >
2018-05-26 12:52:02 +02:00
< p > Having analyzed the available data sources, let’ s finally get some measurements. We could call < code > osem_measurements(pm25_sensors)< / code > now, however we are focussing on a restricted area of interest, the city of Berlin. Luckily we can get the measurements filtered by a bounding box:< / p >
< div class = "sourceCode" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span class = "kw" > library< / span > (sf)< / code > < / pre > < / div >
< pre > < code > ## Linking to GEOS 3.5.1, GDAL 2.2.2, proj.4 4.9.2< / code > < / pre >
< div class = "sourceCode" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span class = "kw" > library< / span > (units)< / code > < / pre > < / div >
2018-05-25 17:17:29 +02:00
< pre > < code > ##
2018-05-26 12:52:02 +02:00
## Attaching package: 'units'< / code > < / pre >
< pre > < code > ## The following object is masked from 'package:base':
2018-05-25 17:17:29 +02:00
##
2018-05-26 12:52:02 +02:00
## %*%< / code > < / pre >
< div class = "sourceCode" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span class = "kw" > library< / span > (lubridate)
< span class = "kw" > library< / span > (dplyr)
< span class = "co" > # construct a bounding box: 12 kilometers around Berlin< / span >
berlin =< span class = "st" > < / span > < span class = "kw" > st_point< / span > (< span class = "kw" > c< / span > (< span class = "fl" > 13.4034< / span > , < span class = "fl" > 52.5120< / span > )) %> %
< span class = "st" > < / span > < span class = "kw" > st_sfc< / span > (< span class = "dt" > crs =< / span > < span class = "dv" > 4326< / span > ) %> %
< span class = "st" > < / span > < span class = "kw" > st_transform< / span > (< span class = "dv" > 3857< / span > ) %> %< span class = "st" > < / span > < span class = "co" > # allow setting a buffer in meters< / span >
< span class = "st" > < / span > < span class = "kw" > st_buffer< / span > (< span class = "kw" > set_units< / span > (< span class = "dv" > 12< / span > , km)) %> %
< span class = "st" > < / span > < span class = "kw" > st_transform< / span > (< span class = "dv" > 4326< / span > ) %> %< span class = "st" > < / span > < span class = "co" > # the opensensemap expects WGS 84< / span >
< span class = "st" > < / span > < span class = "kw" > st_bbox< / span > ()< / code > < / pre > < / div >
< div class = "sourceCode" > < pre class = "sourceCode r" > < code class = "sourceCode r" > pm25 =< span class = "st" > < / span > < span class = "kw" > osem_measurements< / span > (
2017-08-23 02:07:56 +02:00
berlin,
2018-05-26 12:52:02 +02:00
< span class = "dt" > phenomenon =< / span > < span class = "st" > 'PM2.5'< / span > ,
< span class = "dt" > from =< / span > < span class = "kw" > now< / span > () -< span class = "st" > < / span > < span class = "kw" > days< / span > (< span class = "dv" > 20< / span > ), < span class = "co" > # defaults to 2 days< / span >
< span class = "dt" > to =< / span > < span class = "kw" > now< / span > ()
2017-08-23 16:07:05 +02:00
)
2018-05-26 12:52:02 +02:00
< span class = "kw" > plot< / span > (pm25)< / code > < / pre > < / div >
< p > < img src = "
< p > Now we can get started with actual spatiotemporal data analysis. First, lets mask the seemingly uncalibrated sensors:< / p >
< div class = "sourceCode" > < pre class = "sourceCode r" > < code class = "sourceCode r" > outliers =< span class = "st" > < / span > < span class = "kw" > filter< / span > (pm25, value > < span class = "st" > < / span > < span class = "dv" > 100< / span > )$sensorId
bad_sensors =< span class = "st" > < / span > outliers[, drop =< span class = "st" > < / span > T] %> %< span class = "st" > < / span > < span class = "kw" > levels< / span > ()
2017-08-24 21:45:38 +02:00
2018-05-26 12:52:02 +02:00
pm25 =< span class = "st" > < / span > < span class = "kw" > mutate< / span > (pm25, < span class = "dt" > invalid =< / span > sensorId %in%< span class = "st" > < / span > bad_sensors)< / code > < / pre > < / div >
2017-08-24 21:45:38 +02:00
< p > Then plot the measuring locations, flagging the outliers:< / p >
2018-05-26 12:52:02 +02:00
< div class = "sourceCode" > < pre class = "sourceCode r" > < code class = "sourceCode r" > < span class = "kw" > st_as_sf< / span > (pm25) %> %< span class = "st" > < / span > < span class = "kw" > st_geometry< / span > () %> %< span class = "st" > < / span > < span class = "kw" > plot< / span > (< span class = "dt" > col =< / span > < span class = "kw" > factor< / span > (pm25$invalid), < span class = "dt" > axes =< / span > T)< / code > < / pre > < / div >
< p > < img src = "
2017-08-24 21:45:38 +02:00
< p > Removing these sensors yields a nicer time series plot:< / p >
2018-05-26 12:52:02 +02:00
< div class = "sourceCode" > < pre class = "sourceCode r" > < code class = "sourceCode r" > pm25 %> %< span class = "st" > < / span > < span class = "kw" > filter< / span > (invalid ==< span class = "st" > < / span > < span class = "ot" > FALSE< / span > ) %> %< span class = "st" > < / span > < span class = "kw" > plot< / span > ()< / code > < / pre > < / div >
< p > < img src = "
< p > Further analysis: comparison with LANUV data < code > TODO< / code > < / p >
< / div >
2017-08-23 02:07:56 +02:00
2018-05-26 12:52:02 +02:00
<!-- dynamically load mathjax for compatibility with self - contained -->
< script >
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
< / script >
2017-08-23 02:07:56 +02:00
< / body >
< / html >