diff --git a/inst/doc/osem-history.R b/inst/doc/osem-history.R index cda0dcb..20e6072 100644 --- a/inst/doc/osem-history.R +++ b/inst/doc/osem-history.R @@ -9,7 +9,10 @@ library(zoo) # rollmean() ## ----download----------------------------------------------------------------- # if you want to see results for a specific subset of boxes, # just specify a filter such as grouptag='ifgi' here -boxes = osem_boxes() + +# boxes = osem_boxes(cache = '.') +boxes = readRDS('boxes_precomputed.rds') # read precomputed file to save resources + ## ----exposure_counts, message=FALSE------------------------------------------- exposure_counts = boxes %>% diff --git a/inst/doc/osem-history.Rmd b/inst/doc/osem-history.Rmd index 5a6ab21..1221342 100644 --- a/inst/doc/osem-history.Rmd +++ b/inst/doc/osem-history.Rmd @@ -43,7 +43,10 @@ So the first step is to retrieve *all the boxes*: ```{r download} # if you want to see results for a specific subset of boxes, # just specify a filter such as grouptag='ifgi' here -boxes = osem_boxes() + +# boxes = osem_boxes(cache = '.') +boxes = readRDS('boxes_precomputed.rds') # read precomputed file to save resources + ``` # Plot count of boxes by time {.tabset} diff --git a/inst/doc/osem-history.html b/inst/doc/osem-history.html index 4f7b3d5..ca38ab8 100644 --- a/inst/doc/osem-history.html +++ b/inst/doc/osem-history.html @@ -12,7 +12,7 @@ - +
opensensmapr
, dplyr
and
library(lubridate) # date arithmetic
library(zoo) # rollmean()openSenseMap.org has grown quite a bit in the last years; it would be -interesting to see how we got to the current 11367 sensor stations, +interesting to see how we got to the current 11448 sensor stations, split up by various attributes of the boxes.
While opensensmapr
provides extensive methods of
filtering boxes by attributes on the server, we do the filtering within
@@ -385,7 +385,9 @@ R to save time and gain flexibility. So the first step is to retrieve
all the boxes:
# if you want to see results for a specific subset of boxes,
# just specify a filter such as grouptag='ifgi' here
-= osem_boxes() boxes
By looking at the createdAt
attribute of each box we
@@ -403,7 +405,7 @@ okay for now.
Outdoor boxes are growing fast! We can also see the
introduction of mobile
sensor “stations” in 2017. While
mobile boxes are still few, we can expect a quick rise in 2018 once the
@@ -430,20 +432,20 @@ new senseBox MCU with GPS support is released.
Luftdaten
,
ggplot(grouptag_counts, aes(x = createdAt, y = count, colour = grouptag)) +
geom_line(aes(group = grouptag)) +
xlab('Registration Date') + ylab('senseBox count')
-
+
%>%
grouptag_counts summarise(
oldest = min(createdAt),
@@ -507,8 +509,8 @@ grouptag naming is inconsistent ( Luftdaten
,
edu
2022-03-30 11:25:43
-2023-02-20 11:06:45
-430
+2023-02-28 09:47:17
+431
Save Dnipro
@@ -523,17 +525,17 @@ grouptag naming is inconsistent (Luftdaten
,
244
+CS:iDrop
+2023-01-10 10:22:33
+2023-02-27 09:53:33
+140
+
+
HU Explorers
2022-03-30 11:25:43
2022-12-14 10:11:34
124
-
-CS:iDrop
-2023-01-10 10:22:33
-2023-01-31 15:13:46
-120
-
#stropdeaer
2022-03-30 11:25:43
@@ -562,7 +564,7 @@ grouptag naming is inconsistent (Luftdaten
,
Futurium
2022-03-30 11:25:43
2022-03-30 11:25:43
-40
+39
Bad_Hersfeld
@@ -577,15 +579,15 @@ grouptag naming is inconsistent (Luftdaten
,
36
-Mikroprojekt Mitmachklima
+kerekdomb_
+2022-03-30 11:25:43
2022-03-30 11:25:43
-2022-08-23 13:14:11
34
-kerekdomb_
-2022-03-30 11:25:43
+Mikroprojekt Mitmachklima
2022-03-30 11:25:43
+2022-08-23 13:14:11
34
@@ -637,15 +639,15 @@ grouptag naming is inconsistent (Luftdaten
,
24
-WAUW!denberg
-2022-03-30 11:25:43
+freshairbromley
2022-03-30 11:25:43
-23
+2023-01-31 10:18:57
+24
-freshairbromley
+WAUW!denberg
+2022-03-30 11:25:43
2022-03-30 11:25:43
-2023-01-31 10:18:57
23
@@ -655,21 +657,21 @@ grouptag naming is inconsistent (Luftdaten
,
22
-KJR-M
-2022-03-30 11:25:43
+bad_hersfeld
2022-03-30 11:25:43
+2022-06-14 09:34:02
21
-Mikroklima
+KJR-M
+2022-03-30 11:25:43
2022-03-30 11:25:43
-2022-09-05 08:38:57
21
-bad_hersfeld
+Mikroklima
2022-03-30 11:25:43
-2022-06-14 09:34:02
+2022-09-05 08:38:57
21
@@ -757,35 +759,35 @@ grouptag naming is inconsistent (Luftdaten
,
15
+MakeLight
+2022-03-30 11:25:43
+2022-03-30 11:25:43
+15
+
+
MSGB
2022-11-14 09:08:57
2022-11-14 10:19:24
15
-
+
MSHO
2022-12-20 09:28:40
2022-12-20 10:01:38
15
-
+
MSIN
2022-11-21 17:02:39
2022-11-21 23:06:22
15
-
+
MSKE
2023-01-05 15:40:58
2023-01-05 15:52:02
15
-
-MakeLight
-2022-03-30 11:25:43
-2022-03-30 11:25:43
-15
-
PMSI
2023-01-20 14:22:03
@@ -823,15 +825,15 @@ grouptag naming is inconsistent (Luftdaten
,
13
-Sofia
-2022-03-30 11:25:43
+co2mofetten
2022-03-30 11:25:43
+2023-01-17 07:38:21
12
-co2mofetten
+Sofia
+2022-03-30 11:25:43
2022-03-30 11:25:43
-2023-01-17 07:38:21
12
@@ -841,13 +843,13 @@ grouptag naming is inconsistent (Luftdaten
,
11
-Netlight
+home
2022-03-30 11:25:43
2022-03-30 11:25:43
11
-home
+Netlight
2022-03-30 11:25:43
2022-03-30 11:25:43
11
@@ -883,39 +885,39 @@ grouptag naming is inconsistent (Luftdaten
,
10
-HBG Bonn
+dwih-sp
2022-03-30 11:25:43
2022-03-30 11:25:43
10
-IntegrA
+esri-de
2022-03-30 11:25:43
2022-03-30 11:25:43
10
-Mikroklima H
-2022-05-07 17:29:00
-2022-05-07 17:47:42
+HBG Bonn
+2022-03-30 11:25:43
+2022-03-30 11:25:43
10
-dwih-sp
+IntegrA
2022-03-30 11:25:43
2022-03-30 11:25:43
10
-esri-de
-2022-03-30 11:25:43
+makerspace-partheland
2022-03-30 11:25:43
+2023-02-20 18:34:50
10
-makerspace-partheland
-2022-03-30 11:25:43
-2023-02-20 18:34:50
+Mikroklima H
+2022-05-07 17:29:00
+2022-05-07 17:47:42
10
@@ -931,31 +933,31 @@ grouptag naming is inconsistent (Luftdaten
,
9
-Fläming
-2022-08-15 19:16:48
-2022-12-13 06:29:22
+clevermint
+2022-03-30 11:25:43
+2022-03-30 11:25:43
9
-Mikroklima C-R
-2022-03-30 11:25:43
-2022-03-30 11:25:43
+Fläming
+2022-08-15 19:16:48
+2022-12-13 06:29:22
9
-Ostroda
+Mikroklima C-R
2022-03-30 11:25:43
2022-03-30 11:25:43
9
-RSS
+Ostroda
2022-03-30 11:25:43
2022-03-30 11:25:43
9
-clevermint
+RSS
2022-03-30 11:25:43
2022-03-30 11:25:43
9
@@ -973,13 +975,13 @@ grouptag naming is inconsistent (Luftdaten
,
8
-DBDS
+Data4City
2022-03-30 11:25:43
2022-03-30 11:25:43
8
-Data4City
+DBDS
2022-03-30 11:25:43
2022-03-30 11:25:43
8
@@ -1073,7 +1075,7 @@ big impact here.
geom_point(aes(y = count), size = 0.5) +
# moving average, make first and last value NA (to ensure identical length of vectors)
geom_line(aes(y = rollmean(count, mvavg_bins, fill = list(NA, NULL, NA))))
-
+
We see a sudden rise in early 2017, which lines up with the fast
growing grouptag Luftdaten
. This was enabled by an
integration of openSenseMap.org into the firmware of the air quality
@@ -1097,9 +1099,9 @@ each box, and look at metrics by exposure and grouptag once more:
ggplot(duration, aes(x = exposure, y = duration)) +
geom_boxplot() +
coord_flip() + ylab('Duration active in Days')
-
- The time of activity averages at only 157 days, though there are
-boxes with 2389 days of activity, spanning a large chunk of
+
+The time of activity averages at only 158 days, though there are
+boxes with 2394 days of activity, spanning a large chunk of
openSenseMap’s existence.
@@ -1113,7 +1115,7 @@ openSenseMap’s existence.
ggplot(duration, aes(x = grouptag, y = duration)) +
geom_boxplot() +
coord_flip() + ylab('Duration active in Days')
-
+
%>%
duration summarize(
duration_avg = round(mean(duration)),
@@ -1143,625 +1145,625 @@ openSenseMap’s existence.
Ostroda
-330 days
-330 days
-330 days
-330 days
+335 days
+335 days
+335 days
+343 days
Mikroklima C-R
-328 days
+332 days
321 days
-330 days
-330 days
+335 days
+343 days
Apeldoorn
-326 days
+331 days
263 days
-330 days
-330 days
+335 days
+343 days
freshairbromley
-298 days
-23 days
-330 days
-330 days
+304 days
+28 days
+335 days
+343 days
Mikroklima
-280 days
+283 days
42 days
-330 days
-330 days
+335 days
+343 days
Mikroklima H
-279 days
+283 days
229 days
-292 days
-292 days
+297 days
+305 days
Smart City MS
272 days
0 days
-330 days
-330 days
+335 days
+343 days
Feinstaub
-220 days
+223 days
0 days
-330 days
-330 days
+335 days
+343 days
-co2mofetten
-212 days
+makerspace-partheland
+217 days
0 days
-330 days
-330 days
+335 days
+343 days
-makerspace-partheland
-210 days
+co2mofetten
+213 days
0 days
-330 days
-330 days
+334 days
+343 days
Luftdaten
-208 days
+211 days
0 days
-330 days
-330 days
+335 days
+343 days
luftdaten.info
-197 days
+200 days
0 days
-330 days
-330 days
+335 days
+343 days
Burgermeetnet
-188 days
+190 days
0 days
-330 days
-330 days
+335 days
+343 days
esri-de
-188 days
+190 days
0 days
-330 days
-330 days
+335 days
+343 days
#stropdeaer
-185 days
+187 days
0 days
-330 days
-330 days
+335 days
+343 days
Sofia
-170 days
+172 days
0 days
-330 days
-330 days
+335 days
+343 days
WAUW!denberg
-166 days
+168 days
0 days
-330 days
-330 days
+335 days
+343 days
KJR-M
-165 days
+167 days
0 days
-330 days
-330 days
+335 days
+343 days
IKG
-162 days
+163 days
0 days
-330 days
-330 days
+335 days
+343 days
AirAberdeen
-153 days
+155 days
0 days
-330 days
-330 days
+335 days
+343 days
M7
-152 days
-87 days
+155 days
+92 days
243 days
-330 days
+343 days
1
-145 days
+148 days
0 days
-330 days
-330 days
+335 days
+343 days
BurgerMeetnet
-139 days
+141 days
0 days
-330 days
-330 days
+335 days
+343 days
Luftdaten.info
-138 days
+139 days
0 days
-330 days
-330 days
+335 days
+343 days
Bottrop-Feinstaub
-132 days
+133 days
0 days
-330 days
-330 days
+335 days
+343 days
-stw
-129 days
+cleanairfrome
+130 days
0 days
-330 days
-330 days
+335 days
+343 days
-cleanairfrome
-128 days
+montorioveronese.it
+130 days
0 days
-330 days
-330 days
+335 days
+343 days
-montorioveronese.it
-128 days
+stw
+130 days
0 days
-330 days
-330 days
+335 days
+343 days
RB-DSJ
122 days
0 days
-330 days
-330 days
+335 days
+343 days
Mikroprojekt Mitmachklima
-117 days
+118 days
0 days
-330 days
-330 days
+335 days
+343 days
-Luchtwachters Delft
+BRGL
+113 days
109 days
-0 days
-330 days
-330 days
+114 days
+122 days
-BRGL
-107 days
-85 days
-109 days
-109 days
+Luchtwachters Delft
+111 days
+0 days
+335 days
+343 days
Fläming
-107 days
+110 days
23 days
-175 days
-192 days
+180 days
+205 days
BRGW
-106 days
+109 days
98 days
-113 days
-113 days
+118 days
+126 days
PIE
-101 days
+103 days
0 days
-330 days
-330 days
+335 days
+343 days
Riga
-101 days
+103 days
0 days
-330 days
-330 days
+335 days
+343 days
kerekdomb_
100 days
0 days
-330 days
-330 days
+335 days
+343 days
luftdaten
-99 days
+100 days
0 days
-330 days
-330 days
+335 days
+343 days
home
-94 days
+95 days
0 days
-330 days
-330 days
+335 days
+343 days
Bad_Hersfeld
-93 days
+94 days
0 days
-330 days
-330 days
+335 days
+343 days
-dwih-sp
-91 days
-0 days
-330 days
-330 days
-
-
MSGB
-89 days
+94 days
50 days
-101 days
-101 days
+106 days
+114 days
+
+
+dwih-sp
+92 days
+0 days
+335 days
+343 days
AGIN
-86 days
-86 days
-86 days
+91 days
87 days
+92 days
+100 days
HTLJ
-84 days
+91 days
58 days
-94 days
-94 days
+99 days
+107 days
-bad_hersfeld
-84 days
+Соседи по воздуху
+87 days
0 days
-330 days
-330 days
+335 days
+343 days
-Соседи по воздуху
+bad_hersfeld
84 days
0 days
-330 days
-330 days
+335 days
+343 days
Captographies
-78 days
+82 days
0 days
-643 days
-643 days
+648 days
+656 days
Save Dnipro
-74 days
+75 days
0 days
-330 days
-330 days
+335 days
+343 days
PGKN
-67 days
+68 days
0 days
-330 days
-330 days
+335 days
+343 days
-Netlight
-60 days
-0 days
-330 days
-330 days
-
-
MSHO
-57 days
+61 days
36 days
-65 days
-65 days
+70 days
+78 days
+
+
+Netlight
+61 days
+0 days
+335 days
+343 days
-Futurium
-52 days
+#STROPDEAER
+55 days
0 days
-330 days
-330 days
+335 days
+343 days
-MSIN
-52 days
+Futurium
+54 days
0 days
-79 days
-94 days
+335 days
+343 days
test
-52 days
+54 days
0 days
-329 days
-330 days
+335 days
+343 days
ifgi
-51 days
+52 days
0 days
-330 days
-330 days
+335 days
+343 days
-#STROPDEAER
-50 days
+MSIN
+52 days
0 days
-330 days
-330 days
+79 days
+107 days
-ATSO
-48 days
+2
+50 days
0 days
-279 days
-330 days
+331 days
+343 days
-2
-46 days
+ATSO
+48 days
0 days
-310 days
-330 days
+279 days
+343 days
MakeLight
-46 days
+47 days
0 days
-330 days
-330 days
+335 days
+343 days
Haus B
44 days
0 days
239 days
-330 days
+343 days
Futurium 2021
43 days
0 days
329 days
-330 days
+343 days
-IVKOWeek
+DBDS
42 days
0 days
-330 days
-330 days
+335 days
+343 days
-DBDS
-41 days
+IVKOWeek
+42 days
0 days
-330 days
-330 days
+335 days
+343 days
-GIZ Clean Air Day Project
-36 days
-0 days
-330 days
-330 days
+PMSI
+38 days
+38 days
+38 days
+47 days
edu
-36 days
+37 days
0 days
-330 days
-330 days
+335 days
+343 days
+GIZ Clean Air Day Project
+37 days
+0 days
+335 days
+343 days
+
+
TKS Bonn
32 days
0 days
-330 days
-330 days
+335 days
+343 days
-
+
HU Explorers
28 days
0 days
319 days
-330 days
+343 days
-
+
321heiss
24 days
0 days
43 days
-241 days
+254 days
-
+
SUGUCS
9 days
0 days
53 days
-85 days
+98 days
-
+
APPI
3 days
0 days
7 days
-28 days
+41 days
-
+
MSKE
3 days
0 days
7 days
-49 days
-
-
-PMSI
-3 days
-0 days
-4 days
-34 days
+62 days
RSS
3 days
0 days
28 days
-330 days
+343 days
CS:iDrop
2 days
0 days
36 days
-44 days
+57 days
UrbanGarden
2 days
0 days
-12 days
-21 days
+16 days
+34 days
Balthasar-Neumann-Schule 1
0 days
0 days
0 days
-330 days
+343 days
Bestäuberprojekt
0 days
0 days
0 days
-330 days
+343 days
Che Aria Tira?
0 days
0 days
0 days
-330 days
+343 days
-Data4City
+clevermint
0 days
0 days
0 days
-330 days
+343 days
-HBG Bonn
+Data4City
0 days
0 days
0 days
-330 days
+343 days
Haus C
0 days
0 days
0 days
-330 days
+343 days
Haus D
0 days
0 days
0 days
-330 days
+343 days
-IntegrA
+HBG Bonn
0 days
0 days
0 days
-330 days
+343 days
-Koerber-Stiftung
+IntegrA
0 days
0 days
0 days
-330 days
+343 days
-Natlab Ökologie
+Koerber-Stiftung
0 days
0 days
0 days
-330 days
+343 days
-Raumanmeri
+Natlab Ökologie
0 days
0 days
0 days
-330 days
+343 days
-SekSeeland
+Raumanmeri
0 days
0 days
0 days
-330 days
+343 days
-clevermint
+SekSeeland
0 days
0 days
0 days
-330 days
+343 days
-The time of activity averages at only 89 days, though there are boxes
-with 643 days of activity, spanning a large chunk of openSenseMap’s
+
The time of activity averages at only 90 days, though there are boxes
+with 648 days of activity, spanning a large chunk of openSenseMap’s
existence.
@@ -1780,7 +1782,7 @@ Request!
ggplot(duration, aes(x = substr(as.character(year), 0, 4), y = duration)) +
geom_boxplot() +
coord_flip() + ylab('Duration active in Days') + xlab('Year of Registration')
-
+
diff --git a/inst/doc/osem-history_revised.R b/inst/doc/osem-history_revised.R
index 84ea635..d0fc6b6 100644
--- a/inst/doc/osem-history_revised.R
+++ b/inst/doc/osem-history_revised.R
@@ -9,18 +9,15 @@ library(zoo) # rollmean()
## ----download, results='hide', message=FALSE, warning=FALSE-------------------
# if you want to see results for a specific subset of boxes,
# just specify a filter such as grouptag='ifgi' here
-boxes_all = osem_boxes()
-boxes = boxes_all
+
+# boxes = osem_boxes(cache = '.')
+boxes = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
## -----------------------------------------------------------------------------
boxes = filter(boxes, locationtimestamp >= "2022-01-01" & locationtimestamp <="2022-12-31")
summary(boxes) -> summary.data.frame
-## ----message=F, warning=F-----------------------------------------------------
-if (!require('maps')) install.packages('maps')
-if (!require('maptools')) install.packages('maptools')
-if (!require('rgeos')) install.packages('rgeos')
-
+## ---- message=FALSE, warning=FALSE--------------------------------------------
plot(boxes)
## -----------------------------------------------------------------------------
diff --git a/inst/doc/osem-history_revised.Rmd b/inst/doc/osem-history_revised.Rmd
index c4cbaaa..91cf446 100644
--- a/inst/doc/osem-history_revised.Rmd
+++ b/inst/doc/osem-history_revised.Rmd
@@ -45,8 +45,9 @@ So the first step is to retrieve *all the boxes*.
```{r download, results='hide', message=FALSE, warning=FALSE}
# if you want to see results for a specific subset of boxes,
# just specify a filter such as grouptag='ifgi' here
-boxes_all = osem_boxes()
-boxes = boxes_all
+
+# boxes = osem_boxes(cache = '.')
+boxes = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
```
# Introduction
In the following we just want to have a look at the boxes created in 2022, so we filter for them.
@@ -65,11 +66,7 @@ summary(boxes) -> summary.data.frame
Another feature of interest is the spatial distribution of the boxes: `plot()`
can help us out here. This function requires a bunch of optional dependencies though.
-```{r message=F, warning=F}
-if (!require('maps')) install.packages('maps')
-if (!require('maptools')) install.packages('maptools')
-if (!require('rgeos')) install.packages('rgeos')
-
+```{r, message=FALSE, warning=FALSE}
plot(boxes)
```
diff --git a/inst/doc/osem-history_revised.html b/inst/doc/osem-history_revised.html
index 8077d56..1c9babc 100644
--- a/inst/doc/osem-history_revised.html
+++ b/inst/doc/osem-history_revised.html
@@ -11,7 +11,7 @@
-
+
Visualising the Development of openSenseMap.org in 2022
@@ -1600,7 +1600,7 @@ border-radius: 0px;
Visualising the Development of
openSenseMap.org in 2022
Jan Stenkamp
-2023-02-23
+2023-03-08
@@ -1617,7 +1617,7 @@ library(ggplot2) # plotting
library(lubridate) # date arithmetic
library(zoo) # rollmean()
openSenseMap.org has grown quite a bit in the last years; it would be
-interesting to see how we got to the current 11367 sensor stations,
+interesting to see how we got to the current 11448 sensor stations,
split up by various attributes of the boxes.
While opensensmapr
provides extensive methods of
filtering boxes by attributes on the server, we do the filtering within
@@ -1625,23 +1625,24 @@ R to save time and gain flexibility.
So the first step is to retrieve all the boxes.
# if you want to see results for a specific subset of boxes,
# just specify a filter such as grouptag='ifgi' here
-boxes_all = osem_boxes()
-boxes = boxes_all
+
+# boxes = osem_boxes(cache = '.')
+boxes = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
Introduction
In the following we just want to have a look at the boxes created in
2022, so we filter for them.
boxes = filter(boxes, locationtimestamp >= "2022-01-01" & locationtimestamp <="2022-12-31")
summary(boxes) -> summary.data.frame
-## boxes total: 2108
+## boxes total: 2107
##
## boxes by exposure:
## indoor mobile outdoor unknown
-## 524 200 1383 1
+## 524 200 1382 1
##
## boxes by model:
## custom hackair_home_v2 homeEthernet
-## 932 5 5
+## 931 5 5
## homeEthernetFeinstaub homeV2Ethernet homeV2EthernetFeinstaub
## 2 5 5
## homeV2Lora homeV2Wifi homeV2WifiFeinstaub
@@ -1659,7 +1660,7 @@ summary(boxes) -> summary.data.frame
##
## $last_measurement_within
## 1h 1d 30d 365d never
-## 708 758 889 1559 498
+## 0 0 851 1542 497
##
## oldest box: 2020-02-29 23:00:31 (Kirchardt 1)
## newest box: 2022-12-30 09:19:46 (Balkon)
@@ -1675,11 +1676,7 @@ summary(boxes) -> summary.data.frame
Another feature of interest is the spatial distribution of the boxes:
plot()
can help us out here. This function requires a bunch
of optional dependencies though.
-if (!require('maps')) install.packages('maps')
-if (!require('maptools')) install.packages('maptools')
-if (!require('rgeos')) install.packages('rgeos')
-
-plot(boxes)
+plot(boxes)
But what do these sensor stations actually measure? Lets find out.
osem_phenomena()
gives us a named list of of the counts of
@@ -1689,8 +1686,8 @@ str(phenoms)
## List of 966
## $ Temperatur : int 1520
## $ rel. Luftfeuchte : int 1289
-## $ PM10 : int 1070
-## $ PM2.5 : int 1069
+## $ PM10 : int 1069
+## $ PM2.5 : int 1068
## $ Luftdruck : int 994
## $ Beleuchtungsstärke : int 282
## $ UV-Intensität : int 282
@@ -1698,19 +1695,19 @@ str(phenoms)
## $ Lufttemperatur : int 208
## $ CO₂ : int 178
## $ Bodenfeuchte : int 170
-## $ Temperature : int 168
+## $ Temperature : int 167
## $ Luftfeuchte : int 134
## $ Lautstärke : int 131
-## $ Humidity : int 124
+## $ Humidity : int 123
## $ atm. Luftdruck : int 114
## $ Kalibrierungswert : int 108
## $ CO2eq : int 107
## $ IAQ : int 107
## $ Temperatur SCD30 : int 107
## $ rel. Luftfeuchte SCD30 : int 107
-## $ Pressure : int 98
+## $ Pressure : int 97
## $ Luftfeuchtigkeit : int 62
-## $ PM01 : int 58
+## $ PM01 : int 57
## $ Bodentemperatur : int 55
## $ Windgeschwindigkeit : int 46
## $ Feinstaub PM10 : int 41
@@ -1800,10 +1797,10 @@ numbers:
## [1] 1289
##
## $PM10
-## [1] 1070
+## [1] 1069
##
## $PM2.5
-## [1] 1069
+## [1] 1068
##
## $Luftdruck
## [1] 994
@@ -1827,7 +1824,7 @@ numbers:
## [1] 170
##
## $Temperature
-## [1] 168
+## [1] 167
##
## $Luftfeuchte
## [1] 134
@@ -1836,7 +1833,7 @@ numbers:
## [1] 131
##
## $Humidity
-## [1] 124
+## [1] 123
##
## $`atm. Luftdruck`
## [1] 114
@@ -1857,13 +1854,13 @@ numbers:
## [1] 107
##
## $Pressure
-## [1] 98
+## [1] 97
##
## $Luftfeuchtigkeit
## [1] 62
##
## $PM01
-## [1] 58
+## [1] 57
##
## $Bodentemperatur
## [1] 55
@@ -1889,7 +1886,7 @@ ggplot(exposure_counts, aes(x = locationtimestamp, y = count, colour = exposure)
geom_line() +
scale_colour_manual(values = exposure_colors) +
xlab('Registration Date') + ylab('senseBox count')
-
+
Outdoor boxes are growing fast! We can also see the
introduction of mobile
sensor “stations” in 2017.
Let’s have a quick summary:
@@ -1915,7 +1912,7 @@ introduction of mobile
sensor “stations” in 2017.
outdoor
2022-01-01 11:59:16
2022-12-30 09:19:46
-1383
+1382
indoor
@@ -1963,7 +1960,7 @@ grouptag_counts$grouptag = sortLvls(grouptag_counts$grouptag, ascending = FALSE)
ggplot(grouptag_counts, aes(x = locationtimestamp, y = count, colour = grouptag)) +
geom_line(aes(group = grouptag)) +
xlab('Registration Date') + ylab('senseBox count')
-
+
grouptag_counts %>%
summarise(
oldest = min(locationtimestamp),
@@ -2010,7 +2007,7 @@ ggplot(grouptag_counts, aes(x = locationtimestamp, y = count, colour = grouptag)
Captographies
2022-06-03 11:25:27
2022-11-16 13:26:39
-58
+57
SUGUCS
@@ -2061,29 +2058,29 @@ ggplot(grouptag_counts, aes(x = locationtimestamp, y = count, colour = grouptag)
15
+Mikroprojekt Mitmachklima
+2022-02-09 10:28:40
+2022-08-23 13:14:11
+15
+
+
MSGB
2022-11-14 09:08:57
2022-11-14 10:19:24
15
-
+
MSHO
2022-12-20 09:28:40
2022-12-20 10:01:38
15
-
+
MSIN
2022-11-21 17:02:39
2022-11-21 23:06:22
15
-
-Mikroprojekt Mitmachklima
-2022-02-09 10:28:40
-2022-08-23 13:14:11
-15
-
And see in which weeks the most boxes become (in)active:
boxes_by_date %>%
filter(count > 50) %>%
@@ -2147,61 +2144,66 @@ ggplot(boxes_by_date, aes(x = as.Date(week), colour = event)) +
+2023-02-27
+769
+inactive
+
+
2022-11-21
92
registered
-
+
2022-06-06
-77
+76
registered
-
+
2022-08-29
76
registered
-
+
2022-10-31
71
registered
-
+
2022-11-14
67
registered
-
+
2022-11-28
65
registered
-
+
2022-08-22
61
registered
-
+
2022-02-28
57
registered
-
+
2022-08-29
55
inactive
-
+
2022-03-21
54
registered
-
+
2022-12-12
54
registered
-
+
2022-01-24
51
registered
@@ -2227,9 +2229,9 @@ more:
ggplot(durations, aes(x = exposure, y = duration)) +
geom_boxplot() +
coord_flip() + ylab('Duration active in Days')
-
-The time of activity averages at only 145 days, though there are
-boxes with 418 days of activity, spanning a large chunk of
+
+The time of activity averages at only 148 days, though there are
+boxes with 423 days of activity, spanning a large chunk of
openSenseMap’s existence.
@@ -2245,7 +2247,7 @@ openSenseMap’s existence.
ggplot(durations, aes(x = grouptag, y = duration)) +
geom_boxplot() +
coord_flip() + ylab('Duration active in Days')
-
+
durations %>%
summarize(
duration_avg = round(mean(duration)),
@@ -2268,86 +2270,86 @@ ggplot(durations, aes(x = grouptag, y = duration)) +
Burgermeetnet
-236 days
+238 days
0 days
-404 days
-404 days
+409 days
+417 days
BurgerMeetnet
-169 days
+171 days
0 days
-395 days
-395 days
+400 days
+408 days
Captographies
-122 days
+125 days
0 days
-261 days
-263 days
+266 days
+276 days
BRGL
-107 days
-85 days
-109 days
+113 days
109 days
+114 days
+122 days
MSGB
-87 days
+92 days
39 days
-101 days
-101 days
+106 days
+114 days
-edu
-87 days
-0 days
-408 days
-408 days
-
-
AGIN
-86 days
-86 days
-86 days
+91 days
87 days
+92 days
+100 days
-
+
HTLJ
-84 days
+91 days
58 days
-94 days
-94 days
+99 days
+107 days
+
+
+edu
+89 days
+0 days
+413 days
+421 days
MSHO
-57 days
+61 days
36 days
-65 days
-65 days
+70 days
+78 days
HU Explorers
44 days
0 days
319 days
-328 days
+341 days
321heiss
0 days
0 days
0 days
-229 days
+243 days
-The time of activity averages at only 70 days, though there are boxes
-with 408 days of activity, spanning a large chunk of openSenseMap’s
+
The time of activity averages at only 72 days, though there are boxes
+with 413 days of activity, spanning a large chunk of openSenseMap’s
existence.
@@ -2367,7 +2369,7 @@ duration = boxes %>%
ggplot(duration, aes(x = substr(as.character(year), 0, 4), y = duration)) +
geom_boxplot() +
coord_flip() + ylab('Duration active in Days') + xlab('Year of Registration')
-
+
diff --git a/inst/doc/osem-intro.R b/inst/doc/osem-intro.R
index 0732734..161fa12 100644
--- a/inst/doc/osem-intro.R
+++ b/inst/doc/osem-intro.R
@@ -1,20 +1,17 @@
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
-## ----results = F--------------------------------------------------------------
+## ----results = FALSE----------------------------------------------------------
library(magrittr)
library(opensensmapr)
-all_sensors = osem_boxes()
+# all_sensors = osem_boxes(cache = '.')
+all_sensors = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
## -----------------------------------------------------------------------------
summary(all_sensors)
-## ----message=F, warning=F-----------------------------------------------------
-if (!require('maps')) install.packages('maps')
-if (!require('maptools')) install.packages('maptools')
-if (!require('rgeos')) install.packages('rgeos')
-
+## ---- message=FALSE, warning=FALSE--------------------------------------------
plot(all_sensors)
## -----------------------------------------------------------------------------
@@ -24,49 +21,54 @@ str(phenoms)
## -----------------------------------------------------------------------------
phenoms[phenoms > 20]
-## ----results = F--------------------------------------------------------------
-pm25_sensors = osem_boxes(
- exposure = 'outdoor',
- date = Sys.time(), # ±4 hours
- phenomenon = 'PM2.5'
-)
+## ----results = FALSE, eval=FALSE----------------------------------------------
+# pm25_sensors = osem_boxes(
+# exposure = 'outdoor',
+# date = Sys.time(), # ±4 hours
+# phenomenon = 'PM2.5'
+# )
## -----------------------------------------------------------------------------
+pm25_sensors = readRDS('pm25_sensors.rds') # read precomputed file to save resources
+
summary(pm25_sensors)
plot(pm25_sensors)
-## -----------------------------------------------------------------------------
+## ---- results=FALSE, message=FALSE--------------------------------------------
library(sf)
library(units)
library(lubridate)
library(dplyr)
-# construct a bounding box: 12 kilometers around Berlin
-berlin = st_point(c(13.4034, 52.5120)) %>%
- st_sfc(crs = 4326) %>%
- st_transform(3857) %>% # allow setting a buffer in meters
- st_buffer(set_units(12, km)) %>%
- st_transform(4326) %>% # the opensensemap expects WGS 84
- st_bbox()
-
-## ----results = F--------------------------------------------------------------
-pm25 = osem_measurements(
- berlin,
- phenomenon = 'PM2.5',
- from = now() - days(3), # defaults to 2 days
- to = now()
-)
-plot(pm25)
+## ----bbox, results = FALSE, eval=FALSE----------------------------------------
+# # construct a bounding box: 12 kilometers around Berlin
+# berlin = st_point(c(13.4034, 52.5120)) %>%
+# st_sfc(crs = 4326) %>%
+# st_transform(3857) %>% # allow setting a buffer in meters
+# st_buffer(set_units(12, km)) %>%
+# st_transform(4326) %>% # the opensensemap expects WGS 84
+# st_bbox()
+# pm25 = osem_measurements(
+# berlin,
+# phenomenon = 'PM2.5',
+# from = now() - days(3), # defaults to 2 days
+# to = now()
+# )
+#
## -----------------------------------------------------------------------------
+pm25 = readRDS('pm25_berlin.rds') # read precomputed file to save resources
+plot(pm25)
+
+## ---- warning=FALSE-----------------------------------------------------------
outliers = filter(pm25, value > 100)$sensorId
-bad_sensors = outliers[, drop = T] %>% levels()
+bad_sensors = outliers[, drop = TRUE] %>% levels()
pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors)
## -----------------------------------------------------------------------------
-st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = T)
+st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = TRUE)
## -----------------------------------------------------------------------------
pm25 %>% filter(invalid == FALSE) %>% plot()
diff --git a/inst/doc/osem-intro.Rmd b/inst/doc/osem-intro.Rmd
index 8714a9f..20509da 100644
--- a/inst/doc/osem-intro.Rmd
+++ b/inst/doc/osem-intro.Rmd
@@ -28,11 +28,12 @@ Its main goals are to provide means for:
Before we look at actual observations, lets get a grasp of the openSenseMap
datasets' structure.
-```{r results = F}
+```{r results = FALSE}
library(magrittr)
library(opensensmapr)
-all_sensors = osem_boxes()
+# all_sensors = osem_boxes(cache = '.')
+all_sensors = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
```
```{r}
summary(all_sensors)
@@ -47,11 +48,7 @@ couple of minutes ago.
Another feature of interest is the spatial distribution of the boxes: `plot()`
can help us out here. This function requires a bunch of optional dependencies though.
-```{r message=F, warning=F}
-if (!require('maps')) install.packages('maps')
-if (!require('maptools')) install.packages('maptools')
-if (!require('rgeos')) install.packages('rgeos')
-
+```{r, message=FALSE, warning=FALSE}
plot(all_sensors)
```
@@ -81,7 +78,7 @@ We should check how many sensor stations provide useful data: We want only those
boxes with a PM2.5 sensor, that are placed outdoors and are currently submitting
measurements:
-```{r results = F}
+```{r results = FALSE, eval=FALSE}
pm25_sensors = osem_boxes(
exposure = 'outdoor',
date = Sys.time(), # ±4 hours
@@ -89,6 +86,8 @@ pm25_sensors = osem_boxes(
)
```
```{r}
+pm25_sensors = readRDS('pm25_sensors.rds') # read precomputed file to save resources
+
summary(pm25_sensors)
plot(pm25_sensors)
```
@@ -101,12 +100,16 @@ We could call `osem_measurements(pm25_sensors)` now, however we are focusing on
a restricted area of interest, the city of Berlin.
Luckily we can get the measurements filtered by a bounding box:
-```{r}
+```{r, results=FALSE, message=FALSE}
library(sf)
library(units)
library(lubridate)
library(dplyr)
+```
+
+Since the API takes quite long to response measurements, especially filtered on space and time, we do not run the following chunks for publication of the package on CRAN.
+```{r bbox, results = FALSE, eval=FALSE}
# construct a bounding box: 12 kilometers around Berlin
berlin = st_point(c(13.4034, 52.5120)) %>%
st_sfc(crs = 4326) %>%
@@ -114,8 +117,6 @@ berlin = st_point(c(13.4034, 52.5120)) %>%
st_buffer(set_units(12, km)) %>%
st_transform(4326) %>% # the opensensemap expects WGS 84
st_bbox()
-```
-```{r results = F}
pm25 = osem_measurements(
berlin,
phenomenon = 'PM2.5',
@@ -123,15 +124,19 @@ pm25 = osem_measurements(
to = now()
)
+```
+
+```{r}
+pm25 = readRDS('pm25_berlin.rds') # read precomputed file to save resources
plot(pm25)
```
Now we can get started with actual spatiotemporal data analysis.
First, lets mask the seemingly uncalibrated sensors:
-```{r}
+```{r, warning=FALSE}
outliers = filter(pm25, value > 100)$sensorId
-bad_sensors = outliers[, drop = T] %>% levels()
+bad_sensors = outliers[, drop = TRUE] %>% levels()
pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors)
```
@@ -139,7 +144,7 @@ pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors)
Then plot the measuring locations, flagging the outliers:
```{r}
-st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = T)
+st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = TRUE)
```
Removing these sensors yields a nicer time series plot:
diff --git a/inst/doc/osem-intro.html b/inst/doc/osem-intro.html
index f28d894..e0d21b7 100644
--- a/inst/doc/osem-intro.html
+++ b/inst/doc/osem-intro.html
@@ -12,7 +12,7 @@
-
+
Exploring the openSenseMap Dataset
@@ -340,7 +340,7 @@ code > span.er { color: #a61717; background-color: #e3d2d2; }
Exploring the openSenseMap Dataset
Norwin Roosen
-2023-02-23
+2023-03-08
@@ -359,21 +359,22 @@ openSenseMap datasets’ structure.
library(magrittr)
library(opensensmapr)
-= osem_boxes() all_sensors
+# all_sensors = osem_boxes(cache = '.')
+= readRDS('boxes_precomputed.rds') # read precomputed file to save resources all_sensors
summary(all_sensors)
-## boxes total: 11367
+## boxes total: 11390
##
## boxes by exposure:
## indoor mobile outdoor unknown
-## 2344 591 8413 19
+## 2364 590 8417 19
##
## boxes by model:
## custom hackair_home_v2 homeEthernet
-## 2776 73 73
+## 2800 73 73
## homeEthernetFeinstaub homeV2Ethernet homeV2EthernetFeinstaub
## 55 21 40
## homeV2Lora homeV2Wifi homeV2WifiFeinstaub
-## 246 578 743
+## 240 577 743
## homeWifi homeWifiFeinstaub luftdaten_pms1003
## 215 222 9
## luftdaten_pms1003_bme280 luftdaten_pms3003 luftdaten_pms3003_bme280
@@ -381,20 +382,20 @@ openSenseMap datasets’ structure.
## luftdaten_pms5003 luftdaten_pms5003_bme280 luftdaten_pms7003
## 7 60 6
## luftdaten_pms7003_bme280 luftdaten_sds011 luftdaten_sds011_bme280
-## 78 285 3060
+## 78 286 3066
## luftdaten_sds011_bmp180 luftdaten_sds011_dht11 luftdaten_sds011_dht22
-## 114 135 2553
+## 114 135 2552
##
## $last_measurement_within
## 1h 1d 30d 365d never
-## 3601 3756 4252 5938 2052
+## 0 0 4151 5909 2062
##
## oldest box: 2016-08-09 19:34:42 (OBS Bohmte UK_02)
-## newest box: 2023-02-23 07:56:59 (Steinbrink 29)
+## newest box: 2023-02-28 09:47:17 (bitburg)
##
## sensors per box:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
-## 1.000 4.000 5.000 4.981 5.000 76.000
+## 1.000 4.000 5.000 4.994 5.000 76.000
This gives a good overview already: As of writing this, there are
more than 700 sensor stations, of which ~50% are currently running. Most
of them are placed outdoors and have around 5 sensors each. The oldest
@@ -403,62 +404,58 @@ couple of minutes ago.
Another feature of interest is the spatial distribution of the boxes:
plot()
can help us out here. This function requires a bunch
of optional dependencies though.
-if (!require('maps')) install.packages('maps')
-if (!require('maptools')) install.packages('maptools')
-if (!require('rgeos')) install.packages('rgeos')
-
-plot(all_sensors)
-
+plot(all_sensors)
+
It seems we have to reduce our area of interest to Germany.
But what do these sensor stations actually measure? Lets find out.
osem_phenomena()
gives us a named list of of the counts of
each observed phenomenon for the given set of sensor stations:
= osem_phenomena(all_sensors)
phenoms str(phenoms)
-## List of 3289
-## $ Temperatur : int 9385
-## $ rel. Luftfeuchte : int 8317
-## $ PM10 : int 8147
-## $ PM2.5 : int 8135
-## $ Luftdruck : int 5667
-## $ Beleuchtungsstärke : int 1674
-## $ UV-Intensität : int 1665
-## $ Temperature : int 643
+## List of 3298
+## $ Temperatur : int 9405
+## $ rel. Luftfeuchte : int 8315
+## $ PM10 : int 8148
+## $ PM2.5 : int 8136
+## $ Luftdruck : int 5668
+## $ Beleuchtungsstärke : int 1670
+## $ UV-Intensität : int 1660
+## $ Temperature : int 644
## $ Humidity : int 473
-## $ VOC : int 422
-## $ Luftfeuchte : int 362
-## $ Lufttemperatur : int 356
-## $ CO₂ : int 304
+## $ VOC : int 423
+## $ Luftfeuchte : int 363
+## $ Lufttemperatur : int 357
+## $ CO₂ : int 305
## $ Pressure : int 293
-## $ Bodenfeuchte : int 284
+## $ Bodenfeuchte : int 283
## $ Luftfeuchtigkeit : int 272
-## $ atm. Luftdruck : int 245
+## $ atm. Luftdruck : int 246
## $ Lautstärke : int 240
## $ PM01 : int 206
## $ IAQ : int 162
## $ Kalibrierungswert : int 156
## $ rel. Luftfeuchte SCD30 : int 156
-## $ Bodentemperatur : int 155
+## $ Bodentemperatur : int 154
## $ Temperatur SCD30 : int 154
## $ CO2eq : int 153
## $ Windgeschwindigkeit : int 152
-## $ pH-Wert : int 123
-## $ Gesamthärte : int 122
-## $ Blei : int 120
-## $ Eisen : int 120
+## $ pH-Wert : int 143
+## $ Gesamthärte : int 142
+## $ Blei : int 140
+## $ Eisen : int 140
+## $ Gesamthärte 2 : int 140
+## $ Kupfer C : int 140
+## $ Kupfer D : int 140
+## $ Kupfer1 : int 140
+## $ Kupfer2 : int 140
+## $ Nitrat : int 140
+## $ Nitrit : int 140
## $ GesamthaerteLabor : int 120
-## $ Gesamthärte 2 : int 120
-## $ Kupfer C : int 120
-## $ Kupfer D : int 120
-## $ Kupfer1 : int 120
-## $ Kupfer2 : int 120
-## $ Nitrat : int 120
-## $ Nitrit : int 120
-## $ CO2 : int 112
+## $ CO2 : int 113
## $ Feinstaub PM10 : int 98
## $ Windrichtung : int 82
-## $ rel. Luftfeuchte (HECA) : int 74
-## $ Temperatur (HECA) : int 72
+## $ rel. Luftfeuchte (HECA) : int 75
+## $ Temperatur (HECA) : int 73
## $ Temperatura : int 69
## $ Helligkeit : int 67
## $ Feinstaub PM2.5 : int 65
@@ -468,8 +465,8 @@ each observed phenomenon for the given set of sensor stations:
## $ Durchschnitt Umgebungslautstärke : int 51
## $ Minimum Umgebungslautstärke : int 51
## $ UV-Index : int 49
+## $ Batterie : int 46
## $ temperature : int 46
-## $ Batterie : int 45
## $ Feinstaub PM1.0 : int 41
## $ Umgebungslautstärke : int 41
## $ UV : int 40
@@ -497,6 +494,7 @@ each observed phenomenon for the given set of sensor stations:
## $ PM 2.5 : int 21
## $ Battery : int 20
## $ Ciśnienie : int 20
+## $ EisenLabor : int 20
## $ Air Pressure : int 19
## $ Regen : int 19
## $ Schall : int 19
@@ -514,7 +512,6 @@ each observed phenomenon for the given set of sensor stations:
## $ Illuminance : int 15
## $ Speed : int 15
## $ Wind Speed : int 15
-## $ pressure : int 15
## [list output truncated]
Thats quite some noise there, with many phenomena being measured by a
single sensor only, or many duplicated phenomena due to slightly
@@ -523,55 +520,55 @@ filter out the noise and find those phenomena with high sensor
numbers:
> 20] phenoms[phenoms
## $Temperatur
-## [1] 9385
+## [1] 9405
##
## $`rel. Luftfeuchte`
-## [1] 8317
+## [1] 8315
##
## $PM10
-## [1] 8147
+## [1] 8148
##
## $PM2.5
-## [1] 8135
+## [1] 8136
##
## $Luftdruck
-## [1] 5667
+## [1] 5668
##
## $Beleuchtungsstärke
-## [1] 1674
+## [1] 1670
##
## $`UV-Intensität`
-## [1] 1665
+## [1] 1660
##
## $Temperature
-## [1] 643
+## [1] 644
##
## $Humidity
## [1] 473
##
## $VOC
-## [1] 422
+## [1] 423
##
## $Luftfeuchte
-## [1] 362
+## [1] 363
##
## $Lufttemperatur
-## [1] 356
+## [1] 357
##
## $`CO₂`
-## [1] 304
+## [1] 305
##
## $Pressure
## [1] 293
##
## $Bodenfeuchte
-## [1] 284
+## [1] 283
##
## $Luftfeuchtigkeit
## [1] 272
##
## $`atm. Luftdruck`
-## [1] 245
+## [1] 246
##
## $Lautstärke
## [1] 240
@@ -589,7 +586,7 @@ numbers:
## [1] 156
##
## $Bodentemperatur
-## [1] 155
+## [1] 154
##
## $`Temperatur SCD30`
## [1] 154
@@ -601,43 +598,43 @@ numbers:
## [1] 152
##
## $`pH-Wert`
-## [1] 123
+## [1] 143
##
## $Gesamthärte
-## [1] 122
+## [1] 142
##
## $Blei
-## [1] 120
+## [1] 140
##
## $Eisen
-## [1] 120
-##
-## $GesamthaerteLabor
-## [1] 120
+## [1] 140
##
## $`Gesamthärte 2`
-## [1] 120
+## [1] 140
##
## $`Kupfer C`
-## [1] 120
+## [1] 140
##
## $`Kupfer D`
-## [1] 120
+## [1] 140
##
## $Kupfer1
-## [1] 120
+## [1] 140
##
## $Kupfer2
-## [1] 120
+## [1] 140
##
## $Nitrat
-## [1] 120
+## [1] 140
##
## $Nitrit
+## [1] 140
+##
+## $GesamthaerteLabor
## [1] 120
##
## $CO2
-## [1] 112
+## [1] 113
##
## $`Feinstaub PM10`
## [1] 98
@@ -646,10 +643,10 @@ numbers:
## [1] 82
##
## $`rel. Luftfeuchte (HECA)`
-## [1] 74
+## [1] 75
##
## $`Temperatur (HECA)`
-## [1] 72
+## [1] 73
##
## $Temperatura
## [1] 69
@@ -678,11 +675,11 @@ numbers:
## $`UV-Index`
## [1] 49
##
-## $temperature
+## $Batterie
## [1] 46
##
-## $Batterie
-## [1] 45
+## $temperature
+## [1] 46
##
## $`Feinstaub PM1.0`
## [1] 41
@@ -768,41 +765,43 @@ measurements:
date = Sys.time(), # ±4 hours
phenomenon = 'PM2.5'
)
-summary(pm25_sensors)
-## boxes total: 3002
+= readRDS('pm25_sensors.rds') # read precomputed file to save resources
+ pm25_sensors
+summary(pm25_sensors)
+## boxes total: 3011
##
## boxes by exposure:
## outdoor
-## 3002
+## 3011
##
## boxes by model:
## custom hackair_home_v2 homeEthernetFeinstaub
-## 174 8 12
+## 175 8 12
## homeV2EthernetFeinstaub homeV2Lora homeV2Wifi
-## 10 21 2
+## 9 22 2
## homeV2WifiFeinstaub homeWifi homeWifiFeinstaub
-## 126 3 30
+## 132 3 32
## luftdaten_pms1003 luftdaten_pms1003_bme280 luftdaten_pms5003
-## 1 2 3
+## 1 3 3
## luftdaten_pms5003_bme280 luftdaten_pms7003 luftdaten_pms7003_bme280
-## 11 2 26
+## 10 2 28
## luftdaten_sds011 luftdaten_sds011_bme280 luftdaten_sds011_bmp180
-## 115 1365 59
+## 117 1365 60
## luftdaten_sds011_dht11 luftdaten_sds011_dht22
-## 45 987
+## 44 983
##
## $last_measurement_within
## 1h 1d 30d 365d never
-## 2977 3002 3002 3002 0
+## 0 0 3011 3011 0
##
## oldest box: 2017-03-03 18:20:43 (Witten Heven Dorf)
-## newest box: 2023-02-23 07:56:59 (Steinbrink 29)
+## newest box: 2023-02-28 08:28:27 (eth0)
##
## sensors per box:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
-## 2.000 4.000 5.000 4.838 5.000 26.000
+## 2.000 4.000 5.000 4.854 5.000 26.000
plot(pm25_sensors)
-
+
Thats still more than 200 measuring stations, we can work with
that.
@@ -813,41 +812,41 @@ measurements. We could call osem_measurements(pm25_sensors)
now, however we are focusing on a restricted area of interest, the city
of Berlin. Luckily we can get the measurements filtered by a bounding
box:
-library(sf)
-## Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
-library(units)
-## udunits database from C:/Software/RPackages/units/share/udunits/udunits2.xml
-library(lubridate)
-library(dplyr)
-
-# construct a bounding box: 12 kilometers around Berlin
-= st_point(c(13.4034, 52.5120)) %>%
- berlin st_sfc(crs = 4326) %>%
- st_transform(3857) %>% # allow setting a buffer in meters
- st_buffer(set_units(12, km)) %>%
- st_transform(4326) %>% # the opensensemap expects WGS 84
- st_bbox()
-= osem_measurements(
- pm25
- berlin,phenomenon = 'PM2.5',
- from = now() - days(3), # defaults to 2 days
- to = now()
-
- )
-plot(pm25)
-
+library(sf)
+library(units)
+library(lubridate)
+library(dplyr)
+Since the API takes quite long to response measurements, especially
+filtered on space and time, we do not run the following chunks for
+publication of the package on CRAN.
+# construct a bounding box: 12 kilometers around Berlin
+= st_point(c(13.4034, 52.5120)) %>%
+ berlin st_sfc(crs = 4326) %>%
+ st_transform(3857) %>% # allow setting a buffer in meters
+ st_buffer(set_units(12, km)) %>%
+ st_transform(4326) %>% # the opensensemap expects WGS 84
+ st_bbox()
+ = osem_measurements(
+ pm25
+ berlin,phenomenon = 'PM2.5',
+ from = now() - days(3), # defaults to 2 days
+ to = now()
+ )
+= readRDS('pm25_berlin.rds') # read precomputed file to save resources
+ pm25 plot(pm25)
+
Now we can get started with actual spatiotemporal data analysis.
First, lets mask the seemingly uncalibrated sensors:
-= filter(pm25, value > 100)$sensorId
- outliers = outliers[, drop = T] %>% levels()
- bad_sensors
-= mutate(pm25, invalid = sensorId %in% bad_sensors) pm25
+= filter(pm25, value > 100)$sensorId
+ outliers = outliers[, drop = TRUE] %>% levels()
+ bad_sensors
+= mutate(pm25, invalid = sensorId %in% bad_sensors) pm25
Then plot the measuring locations, flagging the outliers:
-st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = T)
-
+st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = TRUE)
+
Removing these sensors yields a nicer time series plot:
-%>% filter(invalid == FALSE) %>% plot() pm25
-
+%>% filter(invalid == FALSE) %>% plot() pm25
+
Further analysis: comparison with LANUV data TODO
diff --git a/inst/doc/osem-serialization.R b/inst/doc/osem-serialization.R
index ef056b6..8a97672 100644
--- a/inst/doc/osem-serialization.R
+++ b/inst/doc/osem-serialization.R
@@ -27,25 +27,25 @@ b = osem_boxes(grouptag = 'ifgi', cache = cacheDir)
osem_clear_cache() # clears default cache
osem_clear_cache(getwd()) # clears a custom cache
-## ----data, results='hide'-----------------------------------------------------
-# first get our example data:
-measurements = osem_measurements('Windgeschwindigkeit')
-
-## ----serialize_json-----------------------------------------------------------
-# serializing senseBoxes to JSON, and loading from file again:
-write(jsonlite::serializeJSON(measurements), 'measurements.json')
-measurements_from_file = jsonlite::unserializeJSON(readr::read_file('measurements.json'))
-class(measurements_from_file)
-
-## ----serialize_attrs----------------------------------------------------------
-# note the toJSON call instead of serializeJSON
-write(jsonlite::toJSON(measurements), 'measurements_bad.json')
-measurements_without_attrs = jsonlite::fromJSON('measurements_bad.json')
-class(measurements_without_attrs)
-
-measurements_with_attrs = osem_as_measurements(measurements_without_attrs)
-class(measurements_with_attrs)
-
-## ----cleanup, include=FALSE---------------------------------------------------
-file.remove('measurements.json', 'measurements_bad.json')
+## ----data, results='hide', eval=FALSE-----------------------------------------
+# # first get our example data:
+# measurements = osem_measurements('Windgeschwindigkeit')
+
+## ----serialize_json, eval=FALSE-----------------------------------------------
+# # serializing senseBoxes to JSON, and loading from file again:
+# write(jsonlite::serializeJSON(measurements), 'measurements.json')
+# measurements_from_file = jsonlite::unserializeJSON(readr::read_file('measurements.json'))
+# class(measurements_from_file)
+
+## ----serialize_attrs, eval=FALSE----------------------------------------------
+# # note the toJSON call instead of serializeJSON
+# write(jsonlite::toJSON(measurements), 'measurements_bad.json')
+# measurements_without_attrs = jsonlite::fromJSON('measurements_bad.json')
+# class(measurements_without_attrs)
+#
+# measurements_with_attrs = osem_as_measurements(measurements_without_attrs)
+# class(measurements_with_attrs)
+
+## ----cleanup, include=FALSE, eval=FALSE---------------------------------------
+# file.remove('measurements.json', 'measurements_bad.json')
diff --git a/inst/doc/osem-serialization.Rmd b/inst/doc/osem-serialization.Rmd
index c09e567..d42c9be 100644
--- a/inst/doc/osem-serialization.Rmd
+++ b/inst/doc/osem-serialization.Rmd
@@ -71,7 +71,7 @@ osem_clear_cache(getwd()) # clears a custom cache
If you want to roll your own serialization method to support custom data formats,
here's how:
-```{r data, results='hide'}
+```{r data, results='hide', eval=FALSE}
# first get our example data:
measurements = osem_measurements('Windgeschwindigkeit')
```
@@ -79,7 +79,7 @@ measurements = osem_measurements('Windgeschwindigkeit')
If you are paranoid and worry about `.rds` files not being decodable anymore
in the (distant) future, you could serialize to a plain text format such as JSON.
This of course comes at the cost of storage space and performance.
-```{r serialize_json}
+```{r serialize_json, eval=FALSE}
# serializing senseBoxes to JSON, and loading from file again:
write(jsonlite::serializeJSON(measurements), 'measurements.json')
measurements_from_file = jsonlite::unserializeJSON(readr::read_file('measurements.json'))
@@ -90,7 +90,7 @@ This method also persists the R object metadata (classes, attributes).
If you were to use a serialization method that can't persist object metadata, you
could re-apply it with the following functions:
-```{r serialize_attrs}
+```{r serialize_attrs, eval=FALSE}
# note the toJSON call instead of serializeJSON
write(jsonlite::toJSON(measurements), 'measurements_bad.json')
measurements_without_attrs = jsonlite::fromJSON('measurements_bad.json')
@@ -101,6 +101,6 @@ class(measurements_with_attrs)
```
The same goes for boxes via `osem_as_sensebox()`.
-```{r cleanup, include=FALSE}
+```{r cleanup, include=FALSE, eval=FALSE}
file.remove('measurements.json', 'measurements_bad.json')
```
diff --git a/inst/doc/osem-serialization.html b/inst/doc/osem-serialization.html
index e9bb8f3..f1a5343 100644
--- a/inst/doc/osem-serialization.html
+++ b/inst/doc/osem-serialization.html
@@ -12,7 +12,7 @@
-
+
Caching openSenseMap Data for Reproducibility
@@ -341,7 +341,7 @@ code > span.er { color: #a61717; background-color: #e3d2d2; }
Caching openSenseMap Data for
Reproducibility
Norwin Roosen
-2023-02-23
+2023-03-08
@@ -407,21 +407,17 @@ storage space and performance.
write(jsonlite::serializeJSON(measurements), 'measurements.json')
measurements_from_file = jsonlite::unserializeJSON(readr::read_file('measurements.json'))
class(measurements_from_file)
-## [1] "osem_measurements" "tbl_df" "tbl"
-## [4] "data.frame"
This method also persists the R object metadata (classes,
attributes). If you were to use a serialization method that can’t
persist object metadata, you could re-apply it with the following
functions:
-# note the toJSON call instead of serializeJSON
-write(jsonlite::toJSON(measurements), 'measurements_bad.json')
-= jsonlite::fromJSON('measurements_bad.json')
- measurements_without_attrs class(measurements_without_attrs)
-## [1] "data.frame"
-= osem_as_measurements(measurements_without_attrs)
- measurements_with_attrs class(measurements_with_attrs)
-## [1] "osem_measurements" "tbl_df" "tbl"
-## [4] "data.frame"
+# note the toJSON call instead of serializeJSON
+write(jsonlite::toJSON(measurements), 'measurements_bad.json')
+= jsonlite::fromJSON('measurements_bad.json')
+ measurements_without_attrs class(measurements_without_attrs)
+
+= osem_as_measurements(measurements_without_attrs)
+ measurements_with_attrs class(measurements_with_attrs)
The same goes for boxes via osem_as_sensebox()
.