diff --git a/inst/doc/osem-history.R b/inst/doc/osem-history.R new file mode 100644 index 0000000..cda0dcb --- /dev/null +++ b/inst/doc/osem-history.R @@ -0,0 +1,133 @@ +## ----setup, results='hide', message=FALSE, warning=FALSE---------------------- +# required packages: +library(opensensmapr) # data download +library(dplyr) # data wrangling +library(ggplot2) # plotting +library(lubridate) # date arithmetic +library(zoo) # rollmean() + +## ----download----------------------------------------------------------------- +# if you want to see results for a specific subset of boxes, +# just specify a filter such as grouptag='ifgi' here +boxes = osem_boxes() + +## ----exposure_counts, message=FALSE------------------------------------------- +exposure_counts = boxes %>% + group_by(exposure) %>% + mutate(count = row_number(createdAt)) + +exposure_colors = c(indoor = 'red', outdoor = 'lightgreen', mobile = 'blue', unknown = 'darkgrey') +ggplot(exposure_counts, aes(x = createdAt, y = count, colour = exposure)) + + geom_line() + + scale_colour_manual(values = exposure_colors) + + xlab('Registration Date') + ylab('senseBox count') + +## ----exposure_summary--------------------------------------------------------- +exposure_counts %>% + summarise( + oldest = min(createdAt), + newest = max(createdAt), + count = max(count) + ) %>% + arrange(desc(count)) + +## ----grouptag_counts, message=FALSE------------------------------------------- +grouptag_counts = boxes %>% + group_by(grouptag) %>% + # only include grouptags with 8 or more members + filter(length(grouptag) >= 8 & !is.na(grouptag)) %>% + mutate(count = row_number(createdAt)) + +# helper for sorting the grouptags by boxcount +sortLvls = function(oldFactor, ascending = TRUE) { + lvls = table(oldFactor) %>% sort(., decreasing = !ascending) %>% names() + factor(oldFactor, levels = lvls) +} +grouptag_counts$grouptag = sortLvls(grouptag_counts$grouptag, ascending = FALSE) + +ggplot(grouptag_counts, aes(x = createdAt, y = count, colour = grouptag)) + + geom_line(aes(group = grouptag)) + + xlab('Registration Date') + ylab('senseBox count') + +## ----grouptag_summary--------------------------------------------------------- +grouptag_counts %>% + summarise( + oldest = min(createdAt), + newest = max(createdAt), + count = max(count) + ) %>% + arrange(desc(count)) + +## ----growthrate_registered, warning=FALSE, message=FALSE, results='hide'------ +bins = 'week' +mvavg_bins = 6 + +growth = boxes %>% + mutate(week = cut(as.Date(createdAt), breaks = bins)) %>% + group_by(week) %>% + summarize(count = length(week)) %>% + mutate(event = 'registered') + +## ----growthrate_inactive, warning=FALSE, message=FALSE, results='hide'-------- +inactive = boxes %>% + # remove boxes that were updated in the last two days, + # b/c any box becomes inactive at some point by definition of updatedAt + filter(updatedAt < now() - days(2)) %>% + mutate(week = cut(as.Date(updatedAt), breaks = bins)) %>% + group_by(week) %>% + summarize(count = length(week)) %>% + mutate(event = 'inactive') + +## ----growthrate, warning=FALSE, message=FALSE, results='hide'----------------- +boxes_by_date = bind_rows(growth, inactive) %>% group_by(event) + +ggplot(boxes_by_date, aes(x = as.Date(week), colour = event)) + + xlab('Time') + ylab(paste('rate per ', bins)) + + scale_x_date(date_breaks="years", date_labels="%Y") + + scale_colour_manual(values = c(registered = 'lightgreen', inactive = 'grey')) + + geom_point(aes(y = count), size = 0.5) + + # moving average, make first and last value NA (to ensure identical length of vectors) + geom_line(aes(y = rollmean(count, mvavg_bins, fill = list(NA, NULL, NA)))) + +## ----exposure_duration, message=FALSE----------------------------------------- +duration = boxes %>% + group_by(exposure) %>% + filter(!is.na(updatedAt)) %>% + mutate(duration = difftime(updatedAt, createdAt, units='days')) + +ggplot(duration, aes(x = exposure, y = duration)) + + geom_boxplot() + + coord_flip() + ylab('Duration active in Days') + +## ----grouptag_duration, message=FALSE----------------------------------------- +duration = boxes %>% + group_by(grouptag) %>% + # only include grouptags with 8 or more members + filter(length(grouptag) >= 8 & !is.na(grouptag) & !is.na(updatedAt)) %>% + mutate(duration = difftime(updatedAt, createdAt, units='days')) + +ggplot(duration, aes(x = grouptag, y = duration)) + + geom_boxplot() + + coord_flip() + ylab('Duration active in Days') + +duration %>% + summarize( + duration_avg = round(mean(duration)), + duration_min = round(min(duration)), + duration_max = round(max(duration)), + oldest_box = round(max(difftime(now(), createdAt, units='days'))) + ) %>% + arrange(desc(duration_avg)) + +## ----year_duration, message=FALSE--------------------------------------------- +# NOTE: boxes older than 2016 missing due to missing updatedAt in database +duration = boxes %>% + mutate(year = cut(as.Date(createdAt), breaks = 'year')) %>% + group_by(year) %>% + filter(!is.na(updatedAt)) %>% + mutate(duration = difftime(updatedAt, createdAt, units='days')) + +ggplot(duration, aes(x = substr(as.character(year), 0, 4), y = duration)) + + geom_boxplot() + + coord_flip() + ylab('Duration active in Days') + xlab('Year of Registration') + diff --git a/inst/doc/osem-history.Rmd b/inst/doc/osem-history.Rmd new file mode 100644 index 0000000..5a6ab21 --- /dev/null +++ b/inst/doc/osem-history.Rmd @@ -0,0 +1,243 @@ +--- +title: "Visualising the History of openSenseMap.org" +author: "Norwin Roosen" +date: '`r Sys.Date()`' +output: + rmarkdown::html_vignette: + df_print: kable + fig_height: 5 + fig_width: 7 + toc: yes + html_document: + code_folding: hide + df_print: kable + theme: lumen + toc: yes + toc_float: yes +vignette: > + %\VignetteIndexEntry{Visualising the History of openSenseMap.org} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +> This vignette serves as an example on data wrangling & visualization with +`opensensmapr`, `dplyr` and `ggplot2`. + +```{r setup, results='hide', message=FALSE, warning=FALSE} +# required packages: +library(opensensmapr) # data download +library(dplyr) # data wrangling +library(ggplot2) # plotting +library(lubridate) # date arithmetic +library(zoo) # rollmean() +``` + +openSenseMap.org has grown quite a bit in the last years; it would be interesting +to see how we got to the current `r osem_counts()$boxes` sensor stations, +split up by various attributes of the boxes. + +While `opensensmapr` provides extensive methods of filtering boxes by attributes +on the server, we do the filtering within R to save time and gain flexibility. +So the first step is to retrieve *all the boxes*: + +```{r download} +# if you want to see results for a specific subset of boxes, +# just specify a filter such as grouptag='ifgi' here +boxes = osem_boxes() +``` + +# Plot count of boxes by time {.tabset} +By looking at the `createdAt` attribute of each box we know the exact time a box +was registered. +With this approach we have no information about boxes that were deleted in the +meantime, but that's okay for now. + +## ...and exposure +```{r exposure_counts, message=FALSE} +exposure_counts = boxes %>% + group_by(exposure) %>% + mutate(count = row_number(createdAt)) + +exposure_colors = c(indoor = 'red', outdoor = 'lightgreen', mobile = 'blue', unknown = 'darkgrey') +ggplot(exposure_counts, aes(x = createdAt, y = count, colour = exposure)) + + geom_line() + + scale_colour_manual(values = exposure_colors) + + xlab('Registration Date') + ylab('senseBox count') +``` + +Outdoor boxes are growing *fast*! +We can also see the introduction of `mobile` sensor "stations" in 2017. While +mobile boxes are still few, we can expect a quick rise in 2018 once the new +senseBox MCU with GPS support is released. + +Let's have a quick summary: +```{r exposure_summary} +exposure_counts %>% + summarise( + oldest = min(createdAt), + newest = max(createdAt), + count = max(count) + ) %>% + arrange(desc(count)) +``` + +## ...and grouptag +We can try to find out where the increases in growth came from, by analysing the +box count by grouptag. + +Caveats: Only a small subset of boxes has a grouptag, and we should assume +that these groups are actually bigger. Also, we can see that grouptag naming is +inconsistent (`Luftdaten`, `luftdaten.info`, ...) + +```{r grouptag_counts, message=FALSE} +grouptag_counts = boxes %>% + group_by(grouptag) %>% + # only include grouptags with 8 or more members + filter(length(grouptag) >= 8 & !is.na(grouptag)) %>% + mutate(count = row_number(createdAt)) + +# helper for sorting the grouptags by boxcount +sortLvls = function(oldFactor, ascending = TRUE) { + lvls = table(oldFactor) %>% sort(., decreasing = !ascending) %>% names() + factor(oldFactor, levels = lvls) +} +grouptag_counts$grouptag = sortLvls(grouptag_counts$grouptag, ascending = FALSE) + +ggplot(grouptag_counts, aes(x = createdAt, y = count, colour = grouptag)) + + geom_line(aes(group = grouptag)) + + xlab('Registration Date') + ylab('senseBox count') +``` + +```{r grouptag_summary} +grouptag_counts %>% + summarise( + oldest = min(createdAt), + newest = max(createdAt), + count = max(count) + ) %>% + arrange(desc(count)) +``` + +# Plot rate of growth and inactivity per week +First we group the boxes by `createdAt` into bins of one week: +```{r growthrate_registered, warning=FALSE, message=FALSE, results='hide'} +bins = 'week' +mvavg_bins = 6 + +growth = boxes %>% + mutate(week = cut(as.Date(createdAt), breaks = bins)) %>% + group_by(week) %>% + summarize(count = length(week)) %>% + mutate(event = 'registered') +``` + +We can do the same for `updatedAt`, which informs us about the last change to +a box, including uploaded measurements. +This method of determining inactive boxes is fairly inaccurate and should be +considered an approximation, because we have no information about intermediate +inactive phases. +Also deleted boxes would probably have a big impact here. +```{r growthrate_inactive, warning=FALSE, message=FALSE, results='hide'} +inactive = boxes %>% + # remove boxes that were updated in the last two days, + # b/c any box becomes inactive at some point by definition of updatedAt + filter(updatedAt < now() - days(2)) %>% + mutate(week = cut(as.Date(updatedAt), breaks = bins)) %>% + group_by(week) %>% + summarize(count = length(week)) %>% + mutate(event = 'inactive') +``` + +Now we can combine both datasets for plotting: +```{r growthrate, warning=FALSE, message=FALSE, results='hide'} +boxes_by_date = bind_rows(growth, inactive) %>% group_by(event) + +ggplot(boxes_by_date, aes(x = as.Date(week), colour = event)) + + xlab('Time') + ylab(paste('rate per ', bins)) + + scale_x_date(date_breaks="years", date_labels="%Y") + + scale_colour_manual(values = c(registered = 'lightgreen', inactive = 'grey')) + + geom_point(aes(y = count), size = 0.5) + + # moving average, make first and last value NA (to ensure identical length of vectors) + geom_line(aes(y = rollmean(count, mvavg_bins, fill = list(NA, NULL, NA)))) +``` + +We see a sudden rise in early 2017, which lines up with the fast growing grouptag `Luftdaten`. +This was enabled by an integration of openSenseMap.org into the firmware of the +air quality monitoring project [luftdaten.info](https://sensor.community/de/). +The dips in mid 2017 and early 2018 could possibly be explained by production/delivery issues +of the senseBox hardware, but I have no data on the exact time frames to verify. + +# Plot duration of boxes being active {.tabset} +While we are looking at `createdAt` and `updatedAt`, we can also extract the duration of activity +of each box, and look at metrics by exposure and grouptag once more: + +## ...by exposure +```{r exposure_duration, message=FALSE} +duration = boxes %>% + group_by(exposure) %>% + filter(!is.na(updatedAt)) %>% + mutate(duration = difftime(updatedAt, createdAt, units='days')) + +ggplot(duration, aes(x = exposure, y = duration)) + + geom_boxplot() + + coord_flip() + ylab('Duration active in Days') +``` + +The time of activity averages at only `r round(mean(duration$duration))` days, +though there are boxes with `r round(max(duration$duration))` days of activity, +spanning a large chunk of openSenseMap's existence. + +## ...by grouptag +```{r grouptag_duration, message=FALSE} +duration = boxes %>% + group_by(grouptag) %>% + # only include grouptags with 8 or more members + filter(length(grouptag) >= 8 & !is.na(grouptag) & !is.na(updatedAt)) %>% + mutate(duration = difftime(updatedAt, createdAt, units='days')) + +ggplot(duration, aes(x = grouptag, y = duration)) + + geom_boxplot() + + coord_flip() + ylab('Duration active in Days') + +duration %>% + summarize( + duration_avg = round(mean(duration)), + duration_min = round(min(duration)), + duration_max = round(max(duration)), + oldest_box = round(max(difftime(now(), createdAt, units='days'))) + ) %>% + arrange(desc(duration_avg)) +``` + +The time of activity averages at only `r round(mean(duration$duration))` days, +though there are boxes with `r round(max(duration$duration))` days of activity, +spanning a large chunk of openSenseMap's existence. + +## ...by year of registration +This is less useful, as older boxes are active for a longer time by definition. +If you have an idea how to compensate for that, please send a [Pull Request][PR]! + +```{r year_duration, message=FALSE} +# NOTE: boxes older than 2016 missing due to missing updatedAt in database +duration = boxes %>% + mutate(year = cut(as.Date(createdAt), breaks = 'year')) %>% + group_by(year) %>% + filter(!is.na(updatedAt)) %>% + mutate(duration = difftime(updatedAt, createdAt, units='days')) + +ggplot(duration, aes(x = substr(as.character(year), 0, 4), y = duration)) + + geom_boxplot() + + coord_flip() + ylab('Duration active in Days') + xlab('Year of Registration') +``` + +# More Visualisations +Other visualisations come to mind, and are left as an exercise to the reader. +If you implemented some, feel free to add them to this vignette via a [Pull Request][PR]. + +* growth by phenomenon +* growth by location -> (interactive) map +* set inactive rate in relation to total box count +* filter timespans with big dips in growth rate, and extrapolate the amount of + senseBoxes that could be on the platform today, assuming there were no production issues ;) + +[PR]: https://github.com/sensebox/opensensmapr/pulls diff --git a/inst/doc/osem-history.html b/inst/doc/osem-history.html new file mode 100644 index 0000000..4f7b3d5 --- /dev/null +++ b/inst/doc/osem-history.html @@ -0,0 +1,1818 @@ + + + + +
+ + + + + + + + + + + +++This vignette serves as an example on data wrangling & +visualization with
+opensensmapr
,dplyr
and +ggplot2
.
# required packages:
+library(opensensmapr) # data download
+library(dplyr) # data wrangling
+library(ggplot2) # plotting
+library(lubridate) # date arithmetic
+library(zoo) # rollmean()
openSenseMap.org has grown quite a bit in the last years; it would be +interesting to see how we got to the current 11367 sensor stations, +split up by various attributes of the boxes.
+While opensensmapr
provides extensive methods of
+filtering boxes by attributes on the server, we do the filtering within
+R to save time and gain flexibility. So the first step is to retrieve
+all the boxes:
# if you want to see results for a specific subset of boxes,
+# just specify a filter such as grouptag='ifgi' here
+= osem_boxes() boxes
By looking at the createdAt
attribute of each box we
+know the exact time a box was registered. With this approach we have no
+information about boxes that were deleted in the meantime, but that’s
+okay for now.
= boxes %>%
+ exposure_counts group_by(exposure) %>%
+ mutate(count = row_number(createdAt))
+
+= c(indoor = 'red', outdoor = 'lightgreen', mobile = 'blue', unknown = 'darkgrey')
+ exposure_colors ggplot(exposure_counts, aes(x = createdAt, y = count, colour = exposure)) +
+geom_line() +
+ scale_colour_manual(values = exposure_colors) +
+ xlab('Registration Date') + ylab('senseBox count')
Outdoor boxes are growing fast! We can also see the
+introduction of mobile
sensor “stations” in 2017. While
+mobile boxes are still few, we can expect a quick rise in 2018 once the
+new senseBox MCU with GPS support is released.
Let’s have a quick summary:
+%>%
+ exposure_counts summarise(
+ oldest = min(createdAt),
+ newest = max(createdAt),
+ count = max(count)
+ %>%
+ ) arrange(desc(count))
exposure | +oldest | +newest | +count | +
---|---|---|---|
outdoor | +2016-08-09 19:34:42 | +2023-02-23 07:56:59 | +8413 | +
indoor | +2018-05-10 20:14:44 | +2023-02-21 14:23:52 | +2344 | +
mobile | +2020-10-24 14:39:30 | +2023-02-20 16:32:48 | +591 | +
unknown | +2022-03-01 07:04:31 | +2022-03-30 11:25:43 | +19 | +
We can try to find out where the increases in growth came from, by +analysing the box count by grouptag.
+Caveats: Only a small subset of boxes has a grouptag, and we should
+assume that these groups are actually bigger. Also, we can see that
+grouptag naming is inconsistent (Luftdaten
,
+luftdaten.info
, …)
= boxes %>%
+ grouptag_counts group_by(grouptag) %>%
+ # only include grouptags with 8 or more members
+ filter(length(grouptag) >= 8 & !is.na(grouptag)) %>%
+ mutate(count = row_number(createdAt))
+
+# helper for sorting the grouptags by boxcount
+= function(oldFactor, ascending = TRUE) {
+ sortLvls = table(oldFactor) %>% sort(., decreasing = !ascending) %>% names()
+ lvls factor(oldFactor, levels = lvls)
+
+ }$grouptag = sortLvls(grouptag_counts$grouptag, ascending = FALSE)
+ grouptag_counts
+ggplot(grouptag_counts, aes(x = createdAt, y = count, colour = grouptag)) +
+geom_line(aes(group = grouptag)) +
+ xlab('Registration Date') + ylab('senseBox count')
%>%
+ grouptag_counts summarise(
+ oldest = min(createdAt),
+ newest = max(createdAt),
+ count = max(count)
+ %>%
+ ) arrange(desc(count))
grouptag | +oldest | +newest | +count | +
---|---|---|---|
edu | +2022-03-30 11:25:43 | +2023-02-20 11:06:45 | +430 | +
Save Dnipro | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +354 | +
Luftdaten | +2022-03-30 11:25:43 | +2023-01-27 15:22:54 | +244 | +
HU Explorers | +2022-03-30 11:25:43 | +2022-12-14 10:11:34 | +124 | +
CS:iDrop | +2023-01-10 10:22:33 | +2023-01-31 15:13:46 | +120 | +
#stropdeaer | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +101 | +
321heiss | +2022-06-27 14:12:25 | +2022-08-08 10:22:21 | +91 | +
GIZ Clean Air Day Project | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +76 | +
Captographies | +2021-05-21 15:24:45 | +2023-01-31 12:11:49 | +62 | +
Futurium | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +40 | +
Bad_Hersfeld | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +37 | +
TKS Bonn | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +36 | +
Mikroprojekt Mitmachklima | +2022-03-30 11:25:43 | +2022-08-23 13:14:11 | +34 | +
kerekdomb_ | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +34 | +
Bottrop-Feinstaub | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +33 | +
Luchtwachters Delft | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +33 | +
Futurium 2021 | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +32 | +
Feinstaub | +2022-03-30 11:25:43 | +2022-08-01 16:27:10 | +29 | +
luftdaten.info | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +28 | +
ifgi | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +26 | +
SUGUCS | +2022-11-30 15:25:32 | +2023-01-23 13:17:54 | +25 | +
cleanairfrome | +2022-03-30 11:25:43 | +2022-05-15 21:13:30 | +24 | +
WAUW!denberg | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +23 | +
freshairbromley | +2022-03-30 11:25:43 | +2023-01-31 10:18:57 | +23 | +
Riga | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +22 | +
KJR-M | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +21 | +
Mikroklima | +2022-03-30 11:25:43 | +2022-09-05 08:38:57 | +21 | +
bad_hersfeld | +2022-03-30 11:25:43 | +2022-06-14 09:34:02 | +21 | +
Smart City MS | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +20 | +
SekSeeland | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +19 | +
Luftdaten.info | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +18 | +
1 | +2022-03-30 11:25:43 | +2022-04-25 15:07:39 | +17 | +
Apeldoorn | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +17 | +
luftdaten | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +17 | +
BurgerMeetnet | +2022-03-30 11:25:43 | +2022-05-10 21:22:35 | +16 | +
Haus C | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +16 | +
AGIN | +2022-11-28 17:33:12 | +2022-11-28 17:42:18 | +15 | +
APPI | +2023-01-26 13:38:22 | +2023-01-26 13:40:59 | +15 | +
BRGL | +2022-11-06 19:23:43 | +2022-11-06 22:08:36 | +15 | +
BRGW | +2022-11-02 10:28:52 | +2022-11-02 13:32:12 | +15 | +
Burgermeetnet | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +15 | +
HTLJ | +2022-11-21 22:04:17 | +2022-11-21 22:05:47 | +15 | +
MSGB | +2022-11-14 09:08:57 | +2022-11-14 10:19:24 | +15 | +
MSHO | +2022-12-20 09:28:40 | +2022-12-20 10:01:38 | +15 | +
MSIN | +2022-11-21 17:02:39 | +2022-11-21 23:06:22 | +15 | +
MSKE | +2023-01-05 15:40:58 | +2023-01-05 15:52:02 | +15 | +
MakeLight | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +15 | +
PMSI | +2023-01-20 14:22:03 | +2023-01-20 14:31:52 | +15 | +
Haus B | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +14 | +
UrbanGarden | +2023-02-02 19:27:40 | +2023-02-18 14:50:19 | +14 | +
Соседи по воздуху | +2022-03-30 11:25:43 | +2023-01-27 09:50:43 | +14 | +
PIE | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +13 | +
RB-DSJ | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +13 | +
Sofia | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +12 | +
co2mofetten | +2022-03-30 11:25:43 | +2023-01-17 07:38:21 | +12 | +
Haus D | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +11 | +
Netlight | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +11 | +
home | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +11 | +
#STROPDEAER | +2022-03-30 11:25:43 | +2023-02-16 15:12:50 | +10 | +
AirAberdeen | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +10 | +
Balthasar-Neumann-Schule 1 | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +10 | +
Bestäuberprojekt | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +10 | +
Che Aria Tira? | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +10 | +
HBG Bonn | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +10 | +
IntegrA | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +10 | +
Mikroklima H | +2022-05-07 17:29:00 | +2022-05-07 17:47:42 | +10 | +
dwih-sp | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +10 | +
esri-de | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +10 | +
makerspace-partheland | +2022-03-30 11:25:43 | +2023-02-20 18:34:50 | +10 | +
montorioveronese.it | +2022-03-30 11:25:43 | +2022-12-29 07:45:57 | +10 | +
ATSO | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +9 | +
Fläming | +2022-08-15 19:16:48 | +2022-12-13 06:29:22 | +9 | +
Mikroklima C-R | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +9 | +
Ostroda | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +9 | +
RSS | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +9 | +
clevermint | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +9 | +
test | +2022-03-30 11:25:43 | +2022-12-18 22:20:34 | +9 | +
2 | +2022-03-30 11:25:43 | +2023-01-07 15:44:29 | +8 | +
DBDS | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +8 | +
Data4City | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +8 | +
IKG | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +8 | +
IVKOWeek | +2022-03-30 11:25:43 | +2022-07-05 09:42:31 | +8 | +
Koerber-Stiftung | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +8 | +
M7 | +2022-03-30 11:25:43 | +2022-11-28 13:00:44 | +8 | +
Natlab Ökologie | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +8 | +
PGKN | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +8 | +
Raumanmeri | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +8 | +
stw | +2022-03-30 11:25:43 | +2022-03-30 11:25:43 | +8 | +
First we group the boxes by createdAt
into bins of one
+week:
= 'week'
+ bins = 6
+ mvavg_bins
+= boxes %>%
+ growth mutate(week = cut(as.Date(createdAt), breaks = bins)) %>%
+ group_by(week) %>%
+ summarize(count = length(week)) %>%
+ mutate(event = 'registered')
We can do the same for updatedAt
, which informs us about
+the last change to a box, including uploaded measurements. This method
+of determining inactive boxes is fairly inaccurate and should be
+considered an approximation, because we have no information about
+intermediate inactive phases. Also deleted boxes would probably have a
+big impact here.
= boxes %>%
+ inactive # remove boxes that were updated in the last two days,
+ # b/c any box becomes inactive at some point by definition of updatedAt
+ filter(updatedAt < now() - days(2)) %>%
+ mutate(week = cut(as.Date(updatedAt), breaks = bins)) %>%
+ group_by(week) %>%
+ summarize(count = length(week)) %>%
+ mutate(event = 'inactive')
Now we can combine both datasets for plotting:
+= bind_rows(growth, inactive) %>% group_by(event)
+ boxes_by_date
+ggplot(boxes_by_date, aes(x = as.Date(week), colour = event)) +
+xlab('Time') + ylab(paste('rate per ', bins)) +
+ scale_x_date(date_breaks="years", date_labels="%Y") +
+ scale_colour_manual(values = c(registered = 'lightgreen', inactive = 'grey')) +
+ geom_point(aes(y = count), size = 0.5) +
+ # moving average, make first and last value NA (to ensure identical length of vectors)
+ geom_line(aes(y = rollmean(count, mvavg_bins, fill = list(NA, NULL, NA))))
We see a sudden rise in early 2017, which lines up with the fast
+growing grouptag Luftdaten
. This was enabled by an
+integration of openSenseMap.org into the firmware of the air quality
+monitoring project luftdaten.info. The dips in mid
+2017 and early 2018 could possibly be explained by production/delivery
+issues of the senseBox hardware, but I have no data on the exact time
+frames to verify.
While we are looking at createdAt
and
+updatedAt
, we can also extract the duration of activity of
+each box, and look at metrics by exposure and grouptag once more:
= boxes %>%
+ duration group_by(exposure) %>%
+ filter(!is.na(updatedAt)) %>%
+ mutate(duration = difftime(updatedAt, createdAt, units='days'))
+
+ggplot(duration, aes(x = exposure, y = duration)) +
+geom_boxplot() +
+ coord_flip() + ylab('Duration active in Days')
The time of activity averages at only 157 days, though there are +boxes with 2389 days of activity, spanning a large chunk of +openSenseMap’s existence.
+= boxes %>%
+ duration group_by(grouptag) %>%
+ # only include grouptags with 8 or more members
+ filter(length(grouptag) >= 8 & !is.na(grouptag) & !is.na(updatedAt)) %>%
+ mutate(duration = difftime(updatedAt, createdAt, units='days'))
+
+ ggplot(duration, aes(x = grouptag, y = duration)) +
+geom_boxplot() +
+ coord_flip() + ylab('Duration active in Days')
%>%
+ duration summarize(
+ duration_avg = round(mean(duration)),
+ duration_min = round(min(duration)),
+ duration_max = round(max(duration)),
+ oldest_box = round(max(difftime(now(), createdAt, units='days')))
+ %>%
+ ) arrange(desc(duration_avg))
grouptag | +duration_avg | +duration_min | +duration_max | +oldest_box | +
---|---|---|---|---|
Ostroda | +330 days | +330 days | +330 days | +330 days | +
Mikroklima C-R | +328 days | +321 days | +330 days | +330 days | +
Apeldoorn | +326 days | +263 days | +330 days | +330 days | +
freshairbromley | +298 days | +23 days | +330 days | +330 days | +
Mikroklima | +280 days | +42 days | +330 days | +330 days | +
Mikroklima H | +279 days | +229 days | +292 days | +292 days | +
Smart City MS | +272 days | +0 days | +330 days | +330 days | +
Feinstaub | +220 days | +0 days | +330 days | +330 days | +
co2mofetten | +212 days | +0 days | +330 days | +330 days | +
makerspace-partheland | +210 days | +0 days | +330 days | +330 days | +
Luftdaten | +208 days | +0 days | +330 days | +330 days | +
luftdaten.info | +197 days | +0 days | +330 days | +330 days | +
Burgermeetnet | +188 days | +0 days | +330 days | +330 days | +
esri-de | +188 days | +0 days | +330 days | +330 days | +
#stropdeaer | +185 days | +0 days | +330 days | +330 days | +
Sofia | +170 days | +0 days | +330 days | +330 days | +
WAUW!denberg | +166 days | +0 days | +330 days | +330 days | +
KJR-M | +165 days | +0 days | +330 days | +330 days | +
IKG | +162 days | +0 days | +330 days | +330 days | +
AirAberdeen | +153 days | +0 days | +330 days | +330 days | +
M7 | +152 days | +87 days | +243 days | +330 days | +
1 | +145 days | +0 days | +330 days | +330 days | +
BurgerMeetnet | +139 days | +0 days | +330 days | +330 days | +
Luftdaten.info | +138 days | +0 days | +330 days | +330 days | +
Bottrop-Feinstaub | +132 days | +0 days | +330 days | +330 days | +
stw | +129 days | +0 days | +330 days | +330 days | +
cleanairfrome | +128 days | +0 days | +330 days | +330 days | +
montorioveronese.it | +128 days | +0 days | +330 days | +330 days | +
RB-DSJ | +122 days | +0 days | +330 days | +330 days | +
Mikroprojekt Mitmachklima | +117 days | +0 days | +330 days | +330 days | +
Luchtwachters Delft | +109 days | +0 days | +330 days | +330 days | +
BRGL | +107 days | +85 days | +109 days | +109 days | +
Fläming | +107 days | +23 days | +175 days | +192 days | +
BRGW | +106 days | +98 days | +113 days | +113 days | +
PIE | +101 days | +0 days | +330 days | +330 days | +
Riga | +101 days | +0 days | +330 days | +330 days | +
kerekdomb_ | +100 days | +0 days | +330 days | +330 days | +
luftdaten | +99 days | +0 days | +330 days | +330 days | +
home | +94 days | +0 days | +330 days | +330 days | +
Bad_Hersfeld | +93 days | +0 days | +330 days | +330 days | +
dwih-sp | +91 days | +0 days | +330 days | +330 days | +
MSGB | +89 days | +50 days | +101 days | +101 days | +
AGIN | +86 days | +86 days | +86 days | +87 days | +
HTLJ | +84 days | +58 days | +94 days | +94 days | +
bad_hersfeld | +84 days | +0 days | +330 days | +330 days | +
Соседи по воздуху | +84 days | +0 days | +330 days | +330 days | +
Captographies | +78 days | +0 days | +643 days | +643 days | +
Save Dnipro | +74 days | +0 days | +330 days | +330 days | +
PGKN | +67 days | +0 days | +330 days | +330 days | +
Netlight | +60 days | +0 days | +330 days | +330 days | +
MSHO | +57 days | +36 days | +65 days | +65 days | +
Futurium | +52 days | +0 days | +330 days | +330 days | +
MSIN | +52 days | +0 days | +79 days | +94 days | +
test | +52 days | +0 days | +329 days | +330 days | +
ifgi | +51 days | +0 days | +330 days | +330 days | +
#STROPDEAER | +50 days | +0 days | +330 days | +330 days | +
ATSO | +48 days | +0 days | +279 days | +330 days | +
2 | +46 days | +0 days | +310 days | +330 days | +
MakeLight | +46 days | +0 days | +330 days | +330 days | +
Haus B | +44 days | +0 days | +239 days | +330 days | +
Futurium 2021 | +43 days | +0 days | +329 days | +330 days | +
IVKOWeek | +42 days | +0 days | +330 days | +330 days | +
DBDS | +41 days | +0 days | +330 days | +330 days | +
GIZ Clean Air Day Project | +36 days | +0 days | +330 days | +330 days | +
edu | +36 days | +0 days | +330 days | +330 days | +
TKS Bonn | +32 days | +0 days | +330 days | +330 days | +
HU Explorers | +28 days | +0 days | +319 days | +330 days | +
321heiss | +24 days | +0 days | +43 days | +241 days | +
SUGUCS | +9 days | +0 days | +53 days | +85 days | +
APPI | +3 days | +0 days | +7 days | +28 days | +
MSKE | +3 days | +0 days | +7 days | +49 days | +
PMSI | +3 days | +0 days | +4 days | +34 days | +
RSS | +3 days | +0 days | +28 days | +330 days | +
CS:iDrop | +2 days | +0 days | +36 days | +44 days | +
UrbanGarden | +2 days | +0 days | +12 days | +21 days | +
Balthasar-Neumann-Schule 1 | +0 days | +0 days | +0 days | +330 days | +
Bestäuberprojekt | +0 days | +0 days | +0 days | +330 days | +
Che Aria Tira? | +0 days | +0 days | +0 days | +330 days | +
Data4City | +0 days | +0 days | +0 days | +330 days | +
HBG Bonn | +0 days | +0 days | +0 days | +330 days | +
Haus C | +0 days | +0 days | +0 days | +330 days | +
Haus D | +0 days | +0 days | +0 days | +330 days | +
IntegrA | +0 days | +0 days | +0 days | +330 days | +
Koerber-Stiftung | +0 days | +0 days | +0 days | +330 days | +
Natlab Ökologie | +0 days | +0 days | +0 days | +330 days | +
Raumanmeri | +0 days | +0 days | +0 days | +330 days | +
SekSeeland | +0 days | +0 days | +0 days | +330 days | +
clevermint | +0 days | +0 days | +0 days | +330 days | +
The time of activity averages at only 89 days, though there are boxes +with 643 days of activity, spanning a large chunk of openSenseMap’s +existence.
+This is less useful, as older boxes are active for a longer time by +definition. If you have an idea how to compensate for that, please send +a Pull +Request!
+# NOTE: boxes older than 2016 missing due to missing updatedAt in database
+= boxes %>%
+ duration mutate(year = cut(as.Date(createdAt), breaks = 'year')) %>%
+ group_by(year) %>%
+ filter(!is.na(updatedAt)) %>%
+ mutate(duration = difftime(updatedAt, createdAt, units='days'))
+
+ggplot(duration, aes(x = substr(as.character(year), 0, 4), y = duration)) +
+geom_boxplot() +
+ coord_flip() + ylab('Duration active in Days') + xlab('Year of Registration')
Other visualisations come to mind, and are left as an exercise to the +reader. If you implemented some, feel free to add them to this vignette +via a Pull +Request.
+++This vignette serves as an example on data wrangling & +visualization with
+opensensmapr
,dplyr
and +ggplot2
.
# required packages:
+library(opensensmapr) # data download
+library(dplyr) # data wrangling
+library(ggplot2) # plotting
+library(lubridate) # date arithmetic
+library(zoo) # rollmean()
+openSenseMap.org has grown quite a bit in the last years; it would be +interesting to see how we got to the current 11367 sensor stations, +split up by various attributes of the boxes.
+While opensensmapr
provides extensive methods of
+filtering boxes by attributes on the server, we do the filtering within
+R to save time and gain flexibility.
So the first step is to retrieve all the boxes.
+# if you want to see results for a specific subset of boxes,
+# just specify a filter such as grouptag='ifgi' here
+boxes_all = osem_boxes()
+boxes = boxes_all
+In the following we just want to have a look at the boxes created in +2022, so we filter for them.
+boxes = filter(boxes, locationtimestamp >= "2022-01-01" & locationtimestamp <="2022-12-31")
+summary(boxes) -> summary.data.frame
+## boxes total: 2108
+##
+## boxes by exposure:
+## indoor mobile outdoor unknown
+## 524 200 1383 1
+##
+## boxes by model:
+## custom hackair_home_v2 homeEthernet
+## 932 5 5
+## homeEthernetFeinstaub homeV2Ethernet homeV2EthernetFeinstaub
+## 2 5 5
+## homeV2Lora homeV2Wifi homeV2WifiFeinstaub
+## 58 220 114
+## homeWifi homeWifiFeinstaub luftdaten_pms1003
+## 14 17 0
+## luftdaten_pms1003_bme280 luftdaten_pms3003 luftdaten_pms3003_bme280
+## 1 0 0
+## luftdaten_pms5003 luftdaten_pms5003_bme280 luftdaten_pms7003
+## 2 8 0
+## luftdaten_pms7003_bme280 luftdaten_sds011 luftdaten_sds011_bme280
+## 8 28 462
+## luftdaten_sds011_bmp180 luftdaten_sds011_dht11 luftdaten_sds011_dht22
+## 27 7 188
+##
+## $last_measurement_within
+## 1h 1d 30d 365d never
+## 708 758 889 1559 498
+##
+## oldest box: 2020-02-29 23:00:31 (Kirchardt 1)
+## newest box: 2022-12-30 09:19:46 (Balkon)
+##
+## sensors per box:
+## Min. 1st Qu. Median Mean 3rd Qu. Max.
+## 1.000 3.000 5.000 4.978 6.000 29.000
+
+
+
+
+
+Another feature of interest is the spatial distribution of the boxes:
+plot()
can help us out here. This function requires a bunch
+of optional dependencies though.
if (!require('maps')) install.packages('maps')
+if (!require('maptools')) install.packages('maptools')
+if (!require('rgeos')) install.packages('rgeos')
+
+plot(boxes)
+
+But what do these sensor stations actually measure? Lets find out.
+osem_phenomena()
gives us a named list of of the counts of
+each observed phenomenon for the given set of sensor stations:
phenoms = osem_phenomena(boxes)
+str(phenoms)
+## List of 966
+## $ Temperatur : int 1520
+## $ rel. Luftfeuchte : int 1289
+## $ PM10 : int 1070
+## $ PM2.5 : int 1069
+## $ Luftdruck : int 994
+## $ Beleuchtungsstärke : int 282
+## $ UV-Intensität : int 282
+## $ VOC : int 224
+## $ Lufttemperatur : int 208
+## $ CO₂ : int 178
+## $ Bodenfeuchte : int 170
+## $ Temperature : int 168
+## $ Luftfeuchte : int 134
+## $ Lautstärke : int 131
+## $ Humidity : int 124
+## $ atm. Luftdruck : int 114
+## $ Kalibrierungswert : int 108
+## $ CO2eq : int 107
+## $ IAQ : int 107
+## $ Temperatur SCD30 : int 107
+## $ rel. Luftfeuchte SCD30 : int 107
+## $ Pressure : int 98
+## $ Luftfeuchtigkeit : int 62
+## $ PM01 : int 58
+## $ Bodentemperatur : int 55
+## $ Windgeschwindigkeit : int 46
+## $ Feinstaub PM10 : int 41
+## $ Feinstaub PM2.5 : int 32
+## $ Feinstaub PM1.0 : int 28
+## $ Batterie : int 22
+## $ Taupunkt : int 19
+## $ Windrichtung : int 19
+## $ rel. Luftfeuchte (HECA) : int 16
+## $ Temperatur (HECA) : int 15
+## $ Temperatura : int 15
+## $ Durchschnitt Umgebungslautstärke : int 14
+## $ Helligkeit : int 14
+## $ Latitude : int 14
+## $ Longtitude : int 14
+## $ Minimum Umgebungslautstärke : int 14
+## $ RSSI : int 14
+## $ UV-Index : int 11
+## $ CO2 : int 10
+## $ PM1 : int 10
+## $ rel-. Luftfeuchte : int 10
+## $ temperature : int 10
+## $ Abstand nach links : int 9
+## $ Abstand nach rechts : int 9
+## $ Beschleunigung X-Achse : int 9
+## $ Beschleunigung Y-Achse : int 9
+## $ Beschleunigung Z-Achse : int 9
+## $ Feinstaub PM25 : int 9
+## $ Luftdruck absolut : int 9
+## $ Luftdruck relativ : int 9
+## $ Regenrate : int 9
+## $ Sonnenstrahlung : int 9
+## $ gefühlte Temperatur : int 9
+## $ Geschwindigkeit : int 8
+## $ humidity : int 8
+## $ Bodenfeuchtigkeit : int 7
+## $ Baterie : int 6
+## $ Bodenfeuchte 10cm : int 6
+## $ Bodenfeuchte 30cm : int 6
+## $ Bodentemperatur 10cm : int 6
+## $ Bodentemperatur 30cm : int 6
+## $ Lumina : int 6
+## $ Taupunktdifferenz (Spread) : int 6
+## $ UV : int 6
+## $ Umiditate : int 6
+## $ Air pressure : int 5
+## $ Battery : int 5
+## $ PM4 : int 5
+## $ Pegel : int 5
+## $ Prezenta-foc : int 5
+## $ Regenmenge : int 5
+## $ W-LAN : int 5
+## $ Bodenfeuchte 1 : int 4
+## $ Bodenfeuchte 2 : int 4
+## $ Bodentemperatur 1 : int 4
+## $ Bodentemperatur 2 : int 4
+## $ CO2 Konzentration : int 4
+## $ Gesamt Gewicht : int 4
+## $ PM 2.5 : int 4
+## $ Température : int 4
+## $ Wilgotność : int 4
+## $ Windspeed : int 4
+## $ absolute Luftfeuchtigkeit : int 4
+## $ Battery Voltage : int 3
+## $ Befehl : int 3
+## $ Bodenfeuchte 50cm : int 3
+## $ Bodentemperatur 60cm : int 3
+## $ Ciśnienie : int 3
+## $ Feinstaub : int 3
+## $ Humedad : int 3
+## $ Höhe : int 3
+## $ Laufzeit : int 3
+## $ LuftfeuchteBME : int 3
+## $ Luftfeuchtigkeit_01 : int 3
+## $ LufttemperaturBME : int 3
+## [list output truncated]
+Thats quite some noise there, with many phenomena being measured by a +single sensor only, or many duplicated phenomena due to slightly +different spellings. We should clean that up, but for now let’s just +filter out the noise and find those phenomena with high sensor +numbers:
+phenoms[phenoms > 50]
+## $Temperatur
+## [1] 1520
+##
+## $`rel. Luftfeuchte`
+## [1] 1289
+##
+## $PM10
+## [1] 1070
+##
+## $PM2.5
+## [1] 1069
+##
+## $Luftdruck
+## [1] 994
+##
+## $Beleuchtungsstärke
+## [1] 282
+##
+## $`UV-Intensität`
+## [1] 282
+##
+## $VOC
+## [1] 224
+##
+## $Lufttemperatur
+## [1] 208
+##
+## $`CO₂`
+## [1] 178
+##
+## $Bodenfeuchte
+## [1] 170
+##
+## $Temperature
+## [1] 168
+##
+## $Luftfeuchte
+## [1] 134
+##
+## $Lautstärke
+## [1] 131
+##
+## $Humidity
+## [1] 124
+##
+## $`atm. Luftdruck`
+## [1] 114
+##
+## $Kalibrierungswert
+## [1] 108
+##
+## $CO2eq
+## [1] 107
+##
+## $IAQ
+## [1] 107
+##
+## $`Temperatur SCD30`
+## [1] 107
+##
+## $`rel. Luftfeuchte SCD30`
+## [1] 107
+##
+## $Pressure
+## [1] 98
+##
+## $Luftfeuchtigkeit
+## [1] 62
+##
+## $PM01
+## [1] 58
+##
+## $Bodentemperatur
+## [1] 55
+By looking at the createdAt
attribute of each box we
+know the exact time a box was registered. Because of some database
+migration issues the createdAt
values are mostly wrong
+(~80% of boxes created 2022-03-30), so we are using the
+timestamp
attribute of the currentlocation
+which should in most cases correspond to the creation date.
With this approach we have no information about boxes that were +deleted in the meantime, but that’s okay for now.
+exposure_counts = boxes %>%
+ group_by(exposure) %>%
+ mutate(count = row_number(locationtimestamp))
+
+exposure_colors = c(indoor = 'red', outdoor = 'lightgreen', mobile = 'blue', unknown = 'darkgrey')
+ggplot(exposure_counts, aes(x = locationtimestamp, y = count, colour = exposure)) +
+ geom_line() +
+ scale_colour_manual(values = exposure_colors) +
+ xlab('Registration Date') + ylab('senseBox count')
+
+Outdoor boxes are growing fast! We can also see the
+introduction of mobile
sensor “stations” in 2017.
Let’s have a quick summary:
+exposure_counts %>%
+ summarise(
+ oldest = min(locationtimestamp),
+ newest = max(locationtimestamp),
+ count = max(count)
+ ) %>%
+ arrange(desc(count))
+exposure | +oldest | +newest | +count | +
---|---|---|---|
outdoor | +2022-01-01 11:59:16 | +2022-12-30 09:19:46 | +1383 | +
indoor | +2022-01-02 11:06:08 | +2022-12-23 20:46:45 | +524 | +
mobile | +2022-01-06 13:20:00 | +2022-12-21 21:35:16 | +200 | +
unknown | +2022-03-01 07:04:30 | +2022-03-01 07:04:30 | +1 | +
We can try to find out where the increases in growth came from, by +analysing the box count by grouptag.
+Caveats: Only a small subset of boxes has a grouptag, and we should
+assume that these groups are actually bigger. Also, we can see that
+grouptag naming is inconsistent (Luftdaten
,
+luftdaten.info
, …)
grouptag_counts = boxes %>%
+ group_by(grouptag) %>%
+ # only include grouptags with 15 or more members
+ filter(length(grouptag) >= 15 & !is.na(grouptag) & grouptag != '') %>%
+ mutate(count = row_number(locationtimestamp))
+
+# helper for sorting the grouptags by boxcount
+sortLvls = function(oldFactor, ascending = TRUE) {
+ lvls = table(oldFactor) %>% sort(., decreasing = !ascending) %>% names()
+ factor(oldFactor, levels = lvls)
+}
+grouptag_counts$grouptag = sortLvls(grouptag_counts$grouptag, ascending = FALSE)
+
+ggplot(grouptag_counts, aes(x = locationtimestamp, y = count, colour = grouptag)) +
+ geom_line(aes(group = grouptag)) +
+ xlab('Registration Date') + ylab('senseBox count')
+
+grouptag_counts %>%
+ summarise(
+ oldest = min(locationtimestamp),
+ newest = max(locationtimestamp),
+ count = max(count)
+ ) %>%
+ arrange(desc(count))
+grouptag | +oldest | +newest | +count | +
---|---|---|---|
edu | +2022-01-02 11:06:08 | +2022-12-18 12:38:27 | +127 | +
HU Explorers | +2022-04-01 09:07:41 | +2022-12-14 10:11:34 | +123 | +
321heiss | +2022-07-09 01:29:37 | +2022-09-01 06:27:35 | +91 | +
Captographies | +2022-06-03 11:25:27 | +2022-11-16 13:26:39 | +58 | +
SUGUCS | +2022-11-30 15:25:32 | +2022-12-03 10:11:41 | +23 | +
SekSeeland | +2022-03-14 13:17:17 | +2022-03-22 20:23:58 | +19 | +
BurgerMeetnet | +2022-01-24 15:33:19 | +2022-05-10 21:22:35 | +16 | +
AGIN | +2022-11-28 17:33:12 | +2022-11-28 17:42:18 | +15 | +
BRGL | +2022-11-06 19:23:43 | +2022-11-06 22:08:36 | +15 | +
BRGW | +2022-11-02 10:28:52 | +2022-11-02 13:32:12 | +15 | +
Burgermeetnet | +2022-01-15 20:43:16 | +2022-02-11 17:59:05 | +15 | +
HTLJ | +2022-11-21 22:04:17 | +2022-11-21 22:05:47 | +15 | +
MSGB | +2022-11-14 09:08:57 | +2022-11-14 10:19:24 | +15 | +
MSHO | +2022-12-20 09:28:40 | +2022-12-20 10:01:38 | +15 | +
MSIN | +2022-11-21 17:02:39 | +2022-11-21 23:06:22 | +15 | +
Mikroprojekt Mitmachklima | +2022-02-09 10:28:40 | +2022-08-23 13:14:11 | +15 | +
First we group the boxes by locationtimestamp
into bins
+of one week:
bins = 'week'
+mvavg_bins = 6
+
+growth = boxes %>%
+ mutate(week = cut(as.Date(locationtimestamp), breaks = bins)) %>%
+ group_by(week) %>%
+ summarize(count = length(week)) %>%
+ mutate(event = 'registered')
+We can do the same for updatedAt
, which informs us about
+the last change to a box, including uploaded measurements. As a lot of
+boxes were “updated” by the database migration, many of them are updated
+at 2022-03-30, so we try to use the lastMeasurement
+attribute instead of updatedAt
. This leads to fewer boxes
+but also automatically excludes boxes which were created but never made
+a measurement.
This method of determining inactive boxes is fairly inaccurate and +should be considered an approximation, because we have no information +about intermediate inactive phases. Also deleted boxes would probably +have a big impact here.
+inactive = boxes %>%
+ # remove boxes that were updated in the last two days,
+ # b/c any box becomes inactive at some point by definition of updatedAt
+ filter(lastMeasurement < now() - days(2)) %>%
+ mutate(week = cut(as.Date(lastMeasurement), breaks = bins)) %>%
+ filter(as.Date(week) > as.Date("2021-12-31")) %>%
+ group_by(week) %>%
+ summarize(count = length(week)) %>%
+ mutate(event = 'inactive')
+Now we can combine both datasets for plotting:
+boxes_by_date = bind_rows(growth, inactive) %>% group_by(event)
+
+ggplot(boxes_by_date, aes(x = as.Date(week), colour = event)) +
+ xlab('Time') + ylab(paste('rate per ', bins)) +
+ scale_x_date(date_breaks="years", date_labels="%Y") +
+ scale_colour_manual(values = c(registered = 'lightgreen', inactive = 'grey')) +
+ geom_point(aes(y = count), size = 0.5) +
+ # moving average, make first and last value NA (to ensure identical length of vectors)
+ geom_line(aes(y = rollmean(count, mvavg_bins, fill = list(NA, NULL, NA))))
+
+And see in which weeks the most boxes become (in)active:
+boxes_by_date %>%
+ filter(count > 50) %>%
+ arrange(desc(count))
+week | +count | +event | +
---|---|---|
2022-11-21 | +92 | +registered | +
2022-06-06 | +77 | +registered | +
2022-08-29 | +76 | +registered | +
2022-10-31 | +71 | +registered | +
2022-11-14 | +67 | +registered | +
2022-11-28 | +65 | +registered | +
2022-08-22 | +61 | +registered | +
2022-02-28 | +57 | +registered | +
2022-08-29 | +55 | +inactive | +
2022-03-21 | +54 | +registered | +
2022-12-12 | +54 | +registered | +
2022-01-24 | +51 | +registered | +
While we are looking at locationtimestamp
and
+lastMeasurement
, we can also extract the duration of
+activity of each box, and look at metrics by exposure and grouptag once
+more:
durations = boxes %>%
+ group_by(exposure) %>%
+ filter(!is.na(lastMeasurement)) %>%
+ mutate(duration = difftime(lastMeasurement, locationtimestamp, units='days')) %>%
+ filter(duration >= 0)
+
+ggplot(durations, aes(x = exposure, y = duration)) +
+ geom_boxplot() +
+ coord_flip() + ylab('Duration active in Days')
+
+The time of activity averages at only 145 days, though there are +boxes with 418 days of activity, spanning a large chunk of +openSenseMap’s existence.
+durations = boxes %>%
+ filter(!is.na(lastMeasurement)) %>%
+ group_by(grouptag) %>%
+ # only include grouptags with 20 or more members
+ filter(length(grouptag) >= 15 & !is.na(grouptag) & !is.na(lastMeasurement)) %>%
+ mutate(duration = difftime(lastMeasurement, locationtimestamp, units='days')) %>%
+ filter(duration >= 0)
+
+ggplot(durations, aes(x = grouptag, y = duration)) +
+ geom_boxplot() +
+ coord_flip() + ylab('Duration active in Days')
+
+durations %>%
+ summarize(
+ duration_avg = round(mean(duration)),
+ duration_min = round(min(duration)),
+ duration_max = round(max(duration)),
+ oldest_box = round(max(difftime(now(), locationtimestamp, units='days')))
+ ) %>%
+ arrange(desc(duration_avg))
+grouptag | +duration_avg | +duration_min | +duration_max | +oldest_box | +
---|---|---|---|---|
Burgermeetnet | +236 days | +0 days | +404 days | +404 days | +
BurgerMeetnet | +169 days | +0 days | +395 days | +395 days | +
Captographies | +122 days | +0 days | +261 days | +263 days | +
BRGL | +107 days | +85 days | +109 days | +109 days | +
MSGB | +87 days | +39 days | +101 days | +101 days | +
edu | +87 days | +0 days | +408 days | +408 days | +
AGIN | +86 days | +86 days | +86 days | +87 days | +
HTLJ | +84 days | +58 days | +94 days | +94 days | +
MSHO | +57 days | +36 days | +65 days | +65 days | +
HU Explorers | +44 days | +0 days | +319 days | +328 days | +
321heiss | +0 days | +0 days | +0 days | +229 days | +
The time of activity averages at only 70 days, though there are boxes +with 408 days of activity, spanning a large chunk of openSenseMap’s +existence.
+This is less useful, as older boxes are active for a longer time by +definition. If you have an idea how to compensate for that, please send +a Pull +Request!
+# NOTE: boxes older than 2016 missing due to missing updatedAt in database
+duration = boxes %>%
+ mutate(year = cut(as.Date(locationtimestamp), breaks = 'year')) %>%
+ group_by(year) %>%
+ filter(!is.na(lastMeasurement)) %>%
+ mutate(duration = difftime(lastMeasurement, locationtimestamp, units='days')) %>%
+ filter(duration >= 0)
+
+ggplot(duration, aes(x = substr(as.character(year), 0, 4), y = duration)) +
+ geom_boxplot() +
+ coord_flip() + ylab('Duration active in Days') + xlab('Year of Registration')
+
+Other visualisations come to mind, and are left as an exercise to the +reader. If you implemented some, feel free to add them to this vignette +via a Pull +Request.
+This package provides data ingestion functions for almost any data +stored on the open data platform for environmental sensordata https://opensensemap.org. Its main goals are to provide +means for:
+Before we look at actual observations, lets get a grasp of the +openSenseMap datasets’ structure.
+library(magrittr)
+library(opensensmapr)
+
+= osem_boxes() all_sensors
summary(all_sensors)
## boxes total: 11367
+##
+## boxes by exposure:
+## indoor mobile outdoor unknown
+## 2344 591 8413 19
+##
+## boxes by model:
+## custom hackair_home_v2 homeEthernet
+## 2776 73 73
+## homeEthernetFeinstaub homeV2Ethernet homeV2EthernetFeinstaub
+## 55 21 40
+## homeV2Lora homeV2Wifi homeV2WifiFeinstaub
+## 246 578 743
+## homeWifi homeWifiFeinstaub luftdaten_pms1003
+## 215 222 9
+## luftdaten_pms1003_bme280 luftdaten_pms3003 luftdaten_pms3003_bme280
+## 10 1 7
+## luftdaten_pms5003 luftdaten_pms5003_bme280 luftdaten_pms7003
+## 7 60 6
+## luftdaten_pms7003_bme280 luftdaten_sds011 luftdaten_sds011_bme280
+## 78 285 3060
+## luftdaten_sds011_bmp180 luftdaten_sds011_dht11 luftdaten_sds011_dht22
+## 114 135 2553
+##
+## $last_measurement_within
+## 1h 1d 30d 365d never
+## 3601 3756 4252 5938 2052
+##
+## oldest box: 2016-08-09 19:34:42 (OBS Bohmte UK_02)
+## newest box: 2023-02-23 07:56:59 (Steinbrink 29)
+##
+## sensors per box:
+## Min. 1st Qu. Median Mean 3rd Qu. Max.
+## 1.000 4.000 5.000 4.981 5.000 76.000
+This gives a good overview already: As of writing this, there are +more than 700 sensor stations, of which ~50% are currently running. Most +of them are placed outdoors and have around 5 sensors each. The oldest +station is from May 2014, while the latest station was registered a +couple of minutes ago.
+Another feature of interest is the spatial distribution of the boxes:
+plot()
can help us out here. This function requires a bunch
+of optional dependencies though.
if (!require('maps')) install.packages('maps')
+if (!require('maptools')) install.packages('maptools')
+if (!require('rgeos')) install.packages('rgeos')
+
+plot(all_sensors)
It seems we have to reduce our area of interest to Germany.
+But what do these sensor stations actually measure? Lets find out.
+osem_phenomena()
gives us a named list of of the counts of
+each observed phenomenon for the given set of sensor stations:
= osem_phenomena(all_sensors)
+ phenoms str(phenoms)
## List of 3289
+## $ Temperatur : int 9385
+## $ rel. Luftfeuchte : int 8317
+## $ PM10 : int 8147
+## $ PM2.5 : int 8135
+## $ Luftdruck : int 5667
+## $ Beleuchtungsstärke : int 1674
+## $ UV-Intensität : int 1665
+## $ Temperature : int 643
+## $ Humidity : int 473
+## $ VOC : int 422
+## $ Luftfeuchte : int 362
+## $ Lufttemperatur : int 356
+## $ CO₂ : int 304
+## $ Pressure : int 293
+## $ Bodenfeuchte : int 284
+## $ Luftfeuchtigkeit : int 272
+## $ atm. Luftdruck : int 245
+## $ Lautstärke : int 240
+## $ PM01 : int 206
+## $ IAQ : int 162
+## $ Kalibrierungswert : int 156
+## $ rel. Luftfeuchte SCD30 : int 156
+## $ Bodentemperatur : int 155
+## $ Temperatur SCD30 : int 154
+## $ CO2eq : int 153
+## $ Windgeschwindigkeit : int 152
+## $ pH-Wert : int 123
+## $ Gesamthärte : int 122
+## $ Blei : int 120
+## $ Eisen : int 120
+## $ GesamthaerteLabor : int 120
+## $ Gesamthärte 2 : int 120
+## $ Kupfer C : int 120
+## $ Kupfer D : int 120
+## $ Kupfer1 : int 120
+## $ Kupfer2 : int 120
+## $ Nitrat : int 120
+## $ Nitrit : int 120
+## $ CO2 : int 112
+## $ Feinstaub PM10 : int 98
+## $ Windrichtung : int 82
+## $ rel. Luftfeuchte (HECA) : int 74
+## $ Temperatur (HECA) : int 72
+## $ Temperatura : int 69
+## $ Helligkeit : int 67
+## $ Feinstaub PM2.5 : int 65
+## $ Taupunkt : int 62
+## $ Latitude : int 61
+## $ Longtitude : int 58
+## $ Durchschnitt Umgebungslautstärke : int 51
+## $ Minimum Umgebungslautstärke : int 51
+## $ UV-Index : int 49
+## $ temperature : int 46
+## $ Batterie : int 45
+## $ Feinstaub PM1.0 : int 41
+## $ Umgebungslautstärke : int 41
+## $ UV : int 40
+## $ humidity : int 38
+## $ Abstand nach links : int 34
+## $ Beschleunigung Z-Achse : int 34
+## $ Beschleunigung X-Achse : int 33
+## $ Beschleunigung Y-Achse : int 33
+## $ Geschwindigkeit : int 33
+## $ Niederschlag : int 33
+## $ Feinstaub PM25 : int 32
+## $ PM1 : int 32
+## $ Abstand nach rechts : int 31
+## $ PM1.0 : int 30
+## $ rel. Luftfeuchtigkeit : int 30
+## $ Relative Humidity : int 29
+## $ Sonnenstrahlung : int 29
+## $ Luftdruck relativ : int 28
+## $ Luftdruck absolut : int 26
+## $ Rain : int 26
+## $ Regenrate : int 26
+## $ CO2 Konzentration : int 25
+## $ RSSI : int 22
+## $ gefühlte Temperatur : int 22
+## $ PM 2.5 : int 21
+## $ Battery : int 20
+## $ Ciśnienie : int 20
+## $ Air Pressure : int 19
+## $ Regen : int 19
+## $ Schall : int 19
+## $ Signal : int 19
+## $ Ilmanpaine : int 18
+## $ Lämpötila : int 18
+## $ UV Index : int 18
+## $ Wind speed : int 18
+## $ PM 10 : int 17
+## $ PM4 : int 17
+## $ Air pressure : int 16
+## $ Temperatur DHT22 : int 16
+## $ Wind Direction : int 16
+## $ Altitude : int 15
+## $ Illuminance : int 15
+## $ Speed : int 15
+## $ Wind Speed : int 15
+## $ pressure : int 15
+## [list output truncated]
+Thats quite some noise there, with many phenomena being measured by a +single sensor only, or many duplicated phenomena due to slightly +different spellings. We should clean that up, but for now let’s just +filter out the noise and find those phenomena with high sensor +numbers:
+> 20] phenoms[phenoms
## $Temperatur
+## [1] 9385
+##
+## $`rel. Luftfeuchte`
+## [1] 8317
+##
+## $PM10
+## [1] 8147
+##
+## $PM2.5
+## [1] 8135
+##
+## $Luftdruck
+## [1] 5667
+##
+## $Beleuchtungsstärke
+## [1] 1674
+##
+## $`UV-Intensität`
+## [1] 1665
+##
+## $Temperature
+## [1] 643
+##
+## $Humidity
+## [1] 473
+##
+## $VOC
+## [1] 422
+##
+## $Luftfeuchte
+## [1] 362
+##
+## $Lufttemperatur
+## [1] 356
+##
+## $`CO₂`
+## [1] 304
+##
+## $Pressure
+## [1] 293
+##
+## $Bodenfeuchte
+## [1] 284
+##
+## $Luftfeuchtigkeit
+## [1] 272
+##
+## $`atm. Luftdruck`
+## [1] 245
+##
+## $Lautstärke
+## [1] 240
+##
+## $PM01
+## [1] 206
+##
+## $IAQ
+## [1] 162
+##
+## $Kalibrierungswert
+## [1] 156
+##
+## $`rel. Luftfeuchte SCD30`
+## [1] 156
+##
+## $Bodentemperatur
+## [1] 155
+##
+## $`Temperatur SCD30`
+## [1] 154
+##
+## $CO2eq
+## [1] 153
+##
+## $Windgeschwindigkeit
+## [1] 152
+##
+## $`pH-Wert`
+## [1] 123
+##
+## $Gesamthärte
+## [1] 122
+##
+## $Blei
+## [1] 120
+##
+## $Eisen
+## [1] 120
+##
+## $GesamthaerteLabor
+## [1] 120
+##
+## $`Gesamthärte 2`
+## [1] 120
+##
+## $`Kupfer C`
+## [1] 120
+##
+## $`Kupfer D`
+## [1] 120
+##
+## $Kupfer1
+## [1] 120
+##
+## $Kupfer2
+## [1] 120
+##
+## $Nitrat
+## [1] 120
+##
+## $Nitrit
+## [1] 120
+##
+## $CO2
+## [1] 112
+##
+## $`Feinstaub PM10`
+## [1] 98
+##
+## $Windrichtung
+## [1] 82
+##
+## $`rel. Luftfeuchte (HECA)`
+## [1] 74
+##
+## $`Temperatur (HECA)`
+## [1] 72
+##
+## $Temperatura
+## [1] 69
+##
+## $Helligkeit
+## [1] 67
+##
+## $`Feinstaub PM2.5`
+## [1] 65
+##
+## $Taupunkt
+## [1] 62
+##
+## $Latitude
+## [1] 61
+##
+## $Longtitude
+## [1] 58
+##
+## $`Durchschnitt Umgebungslautstärke`
+## [1] 51
+##
+## $`Minimum Umgebungslautstärke`
+## [1] 51
+##
+## $`UV-Index`
+## [1] 49
+##
+## $temperature
+## [1] 46
+##
+## $Batterie
+## [1] 45
+##
+## $`Feinstaub PM1.0`
+## [1] 41
+##
+## $Umgebungslautstärke
+## [1] 41
+##
+## $UV
+## [1] 40
+##
+## $humidity
+## [1] 38
+##
+## $`Abstand nach links`
+## [1] 34
+##
+## $`Beschleunigung Z-Achse`
+## [1] 34
+##
+## $`Beschleunigung X-Achse`
+## [1] 33
+##
+## $`Beschleunigung Y-Achse`
+## [1] 33
+##
+## $Geschwindigkeit
+## [1] 33
+##
+## $Niederschlag
+## [1] 33
+##
+## $`Feinstaub PM25`
+## [1] 32
+##
+## $PM1
+## [1] 32
+##
+## $`Abstand nach rechts`
+## [1] 31
+##
+## $PM1.0
+## [1] 30
+##
+## $`rel. Luftfeuchtigkeit`
+## [1] 30
+##
+## $`Relative Humidity`
+## [1] 29
+##
+## $Sonnenstrahlung
+## [1] 29
+##
+## $`Luftdruck relativ`
+## [1] 28
+##
+## $`Luftdruck absolut`
+## [1] 26
+##
+## $Rain
+## [1] 26
+##
+## $Regenrate
+## [1] 26
+##
+## $`CO2 Konzentration`
+## [1] 25
+##
+## $RSSI
+## [1] 22
+##
+## $`gefühlte Temperatur`
+## [1] 22
+##
+## $`PM 2.5`
+## [1] 21
+Alright, temperature it is! Fine particulate matter (PM2.5) seems to +be more interesting to analyze though. We should check how many sensor +stations provide useful data: We want only those boxes with a PM2.5 +sensor, that are placed outdoors and are currently submitting +measurements:
+= osem_boxes(
+ pm25_sensors exposure = 'outdoor',
+ date = Sys.time(), # ±4 hours
+ phenomenon = 'PM2.5'
+ )
summary(pm25_sensors)
## boxes total: 3002
+##
+## boxes by exposure:
+## outdoor
+## 3002
+##
+## boxes by model:
+## custom hackair_home_v2 homeEthernetFeinstaub
+## 174 8 12
+## homeV2EthernetFeinstaub homeV2Lora homeV2Wifi
+## 10 21 2
+## homeV2WifiFeinstaub homeWifi homeWifiFeinstaub
+## 126 3 30
+## luftdaten_pms1003 luftdaten_pms1003_bme280 luftdaten_pms5003
+## 1 2 3
+## luftdaten_pms5003_bme280 luftdaten_pms7003 luftdaten_pms7003_bme280
+## 11 2 26
+## luftdaten_sds011 luftdaten_sds011_bme280 luftdaten_sds011_bmp180
+## 115 1365 59
+## luftdaten_sds011_dht11 luftdaten_sds011_dht22
+## 45 987
+##
+## $last_measurement_within
+## 1h 1d 30d 365d never
+## 2977 3002 3002 3002 0
+##
+## oldest box: 2017-03-03 18:20:43 (Witten Heven Dorf)
+## newest box: 2023-02-23 07:56:59 (Steinbrink 29)
+##
+## sensors per box:
+## Min. 1st Qu. Median Mean 3rd Qu. Max.
+## 2.000 4.000 5.000 4.838 5.000 26.000
+plot(pm25_sensors)
Thats still more than 200 measuring stations, we can work with +that.
+Having analyzed the available data sources, let’s finally get some
+measurements. We could call osem_measurements(pm25_sensors)
+now, however we are focusing on a restricted area of interest, the city
+of Berlin. Luckily we can get the measurements filtered by a bounding
+box:
library(sf)
## Linking to GEOS 3.9.3, GDAL 3.5.2, PROJ 8.2.1; sf_use_s2() is TRUE
+library(units)
## udunits database from C:/Software/RPackages/units/share/udunits/udunits2.xml
+library(lubridate)
+library(dplyr)
+
+# construct a bounding box: 12 kilometers around Berlin
+= st_point(c(13.4034, 52.5120)) %>%
+ berlin st_sfc(crs = 4326) %>%
+ st_transform(3857) %>% # allow setting a buffer in meters
+ st_buffer(set_units(12, km)) %>%
+ st_transform(4326) %>% # the opensensemap expects WGS 84
+ st_bbox()
= osem_measurements(
+ pm25
+ berlin,phenomenon = 'PM2.5',
+ from = now() - days(3), # defaults to 2 days
+ to = now()
+
+ )
+plot(pm25)
Now we can get started with actual spatiotemporal data analysis. +First, lets mask the seemingly uncalibrated sensors:
+= filter(pm25, value > 100)$sensorId
+ outliers = outliers[, drop = T] %>% levels()
+ bad_sensors
+= mutate(pm25, invalid = sensorId %in% bad_sensors) pm25
Then plot the measuring locations, flagging the outliers:
+st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = T)
Removing these sensors yields a nicer time series plot:
+%>% filter(invalid == FALSE) %>% plot() pm25
Further analysis: comparison with LANUV data TODO
It may be useful to download data from openSenseMap only once. For +reproducible results, the data should be saved to disk, and reloaded at +a later point.
+This avoids..
+This vignette shows how to use this built in
+opensensmapr
feature, and how to do it yourself in case you
+want to save to other data formats.
# this vignette requires:
+library(opensensmapr)
+library(jsonlite)
+library(readr)
All data retrieval functions of opensensmapr
have a
+built in caching feature, which serializes an API response to disk.
+Subsequent identical requests will then return the serialized data
+instead of making another request.
To use this feature, just add a path to a directory to the
+cache
parameter:
= osem_boxes(grouptag = 'ifgi', cache = tempdir())
+ b
+# the next identical request will hit the cache only!
+= osem_boxes(grouptag = 'ifgi', cache = tempdir())
+ b
+# requests without the cache parameter will still be performed normally
+= osem_boxes(grouptag = 'ifgi') b
Looking at the cache directory we can see one file for each request, +which is identified through a hash of the request URL:
+list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
## [1] "osemcache.17db5c57fc6fca4d836fa2cf30345ce8767cd61a.rds"
+You can maintain multiple caches simultaneously which allows to only +store data related to a script in the same directory:
+= getwd() # current working directory
+ cacheDir = osem_boxes(grouptag = 'ifgi', cache = cacheDir)
+ b
+# the next identical request will hit the cache only!
+= osem_boxes(grouptag = 'ifgi', cache = cacheDir) b
To get fresh results again, just call osem_clear_cache()
+for the respective cache:
osem_clear_cache() # clears default cache
+osem_clear_cache(getwd()) # clears a custom cache
If you want to roll your own serialization method to support custom +data formats, here’s how:
+# first get our example data:
+= osem_measurements('Windgeschwindigkeit') measurements
If you are paranoid and worry about .rds
files not being
+decodable anymore in the (distant) future, you could serialize to a
+plain text format such as JSON. This of course comes at the cost of
+storage space and performance.
# serializing senseBoxes to JSON, and loading from file again:
+write(jsonlite::serializeJSON(measurements), 'measurements.json')
+= jsonlite::unserializeJSON(readr::read_file('measurements.json'))
+ measurements_from_file class(measurements_from_file)
## [1] "osem_measurements" "tbl_df" "tbl"
+## [4] "data.frame"
+This method also persists the R object metadata (classes, +attributes). If you were to use a serialization method that can’t +persist object metadata, you could re-apply it with the following +functions:
+# note the toJSON call instead of serializeJSON
+write(jsonlite::toJSON(measurements), 'measurements_bad.json')
+= jsonlite::fromJSON('measurements_bad.json')
+ measurements_without_attrs class(measurements_without_attrs)
## [1] "data.frame"
+= osem_as_measurements(measurements_without_attrs)
+ measurements_with_attrs class(measurements_with_attrs)
## [1] "osem_measurements" "tbl_df" "tbl"
+## [4] "data.frame"
+The same goes for boxes via osem_as_sensebox()
.