diff --git a/inst/doc/osem-history.R b/inst/doc/osem-history.R new file mode 100644 index 0000000..09048eb --- /dev/null +++ b/inst/doc/osem-history.R @@ -0,0 +1,133 @@ +## ----setup, results='hide', message=FALSE, warning=FALSE----------------- +# required packages: +library(opensensmapr) # data download +library(dplyr) # data wrangling +library(ggplot2) # plotting +library(lubridate) # date arithmetic +library(zoo) # rollmean() + +## ----download------------------------------------------------------------ +# if you want to see results for a specific subset of boxes, +# just specify a filter such as grouptag='ifgi' here +boxes = osem_boxes() + +## ----exposure_counts, message=FALSE-------------------------------------- +exposure_counts = boxes %>% + group_by(exposure) %>% + mutate(count = row_number(createdAt)) + +exposure_colors = c(indoor = 'red', outdoor = 'lightgreen', mobile = 'blue', unknown = 'darkgrey') +ggplot(exposure_counts, aes(x = createdAt, y = count, colour = exposure)) + + geom_line() + + scale_colour_manual(values = exposure_colors) + + xlab('Registration Date') + ylab('senseBox count') + +## ----exposure_summary---------------------------------------------------- +exposure_counts %>% + summarise( + oldest = min(createdAt), + newest = max(createdAt), + count = max(count) + ) %>% + arrange(desc(count)) + +## ----grouptag_counts, message=FALSE-------------------------------------- +grouptag_counts = boxes %>% + group_by(grouptag) %>% + # only include grouptags with 8 or more members + filter(length(grouptag) >= 8 && !is.na(grouptag)) %>% + mutate(count = row_number(createdAt)) + +# helper for sorting the grouptags by boxcount +sortLvls = function(oldFactor, ascending = TRUE) { + lvls = table(oldFactor) %>% sort(., decreasing = !ascending) %>% names() + factor(oldFactor, levels = lvls) +} +grouptag_counts$grouptag = sortLvls(grouptag_counts$grouptag, ascending = FALSE) + +ggplot(grouptag_counts, aes(x = createdAt, y = count, colour = grouptag)) + + geom_line(aes(group = grouptag)) + + xlab('Registration Date') + ylab('senseBox count') + +## ----grouptag_summary---------------------------------------------------- +grouptag_counts %>% + summarise( + oldest = min(createdAt), + newest = max(createdAt), + count = max(count) + ) %>% + arrange(desc(count)) + +## ----growthrate_registered, warning=FALSE, message=FALSE, results='hide'---- +bins = 'week' +mvavg_bins = 6 + +growth = boxes %>% + mutate(week = cut(as.Date(createdAt), breaks = bins)) %>% + group_by(week) %>% + summarize(count = length(week)) %>% + mutate(event = 'registered') + +## ----growthrate_inactive, warning=FALSE, message=FALSE, results='hide'---- +inactive = boxes %>% + # remove boxes that were updated in the last two days, + # b/c any box becomes inactive at some point by definition of updatedAt + filter(updatedAt < now() - days(2)) %>% + mutate(week = cut(as.Date(updatedAt), breaks = bins)) %>% + group_by(week) %>% + summarize(count = length(week)) %>% + mutate(event = 'inactive') + +## ----growthrate, warning=FALSE, message=FALSE, results='hide'------------ +boxes_by_date = bind_rows(growth, inactive) %>% group_by(event) + +ggplot(boxes_by_date, aes(x = as.Date(week), colour = event)) + + xlab('Time') + ylab(paste('rate per ', bins)) + + scale_x_date(date_breaks="years", date_labels="%Y") + + scale_colour_manual(values = c(registered = 'lightgreen', inactive = 'grey')) + + geom_point(aes(y = count), size = 0.5) + + # moving average, make first and last value NA (to ensure identical length of vectors) + geom_line(aes(y = rollmean(count, mvavg_bins, fill = list(NA, NULL, NA)))) + +## ----exposure_duration, message=FALSE------------------------------------ +duration = boxes %>% + group_by(exposure) %>% + filter(!is.na(updatedAt)) %>% + mutate(duration = difftime(updatedAt, createdAt, units='days')) + +ggplot(duration, aes(x = exposure, y = duration)) + + geom_boxplot() + + coord_flip() + ylab('Duration active in Days') + +## ----grouptag_duration, message=FALSE------------------------------------ +duration = boxes %>% + group_by(grouptag) %>% + # only include grouptags with 8 or more members + filter(length(grouptag) >= 8 && !is.na(grouptag) && !is.na(updatedAt)) %>% + mutate(duration = difftime(updatedAt, createdAt, units='days')) + +ggplot(duration, aes(x = grouptag, y = duration)) + + geom_boxplot() + + coord_flip() + ylab('Duration active in Days') + +duration %>% + summarize( + duration_avg = round(mean(duration)), + duration_min = round(min(duration)), + duration_max = round(max(duration)), + oldest_box = round(max(difftime(now(), createdAt, units='days'))) + ) %>% + arrange(desc(duration_avg)) + +## ----year_duration, message=FALSE---------------------------------------- +# NOTE: boxes older than 2016 missing due to missing updatedAt in database +duration = boxes %>% + mutate(year = cut(as.Date(createdAt), breaks = 'year')) %>% + group_by(year) %>% + filter(!is.na(updatedAt)) %>% + mutate(duration = difftime(updatedAt, createdAt, units='days')) + +ggplot(duration, aes(x = substr(as.character(year), 0, 4), y = duration)) + + geom_boxplot() + + coord_flip() + ylab('Duration active in Days') + xlab('Year of Registration') + diff --git a/inst/doc/osem-history.Rmd b/inst/doc/osem-history.Rmd new file mode 100644 index 0000000..ff9f3d3 --- /dev/null +++ b/inst/doc/osem-history.Rmd @@ -0,0 +1,243 @@ +--- +title: "Visualising the History of openSenseMap.org" +author: "Norwin Roosen" +date: '`r Sys.Date()`' +output: + rmarkdown::html_vignette: + df_print: kable + fig_height: 5 + fig_width: 7 + toc: yes + html_document: + code_folding: hide + df_print: kable + theme: lumen + toc: yes + toc_float: yes +vignette: > + %\VignetteIndexEntry{Visualising the History of openSenseMap.org} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} +--- + +> This vignette serves as an example on data wrangling & visualization with +`opensensmapr`, `dplyr` and `ggplot2`. + +```{r setup, results='hide', message=FALSE, warning=FALSE} +# required packages: +library(opensensmapr) # data download +library(dplyr) # data wrangling +library(ggplot2) # plotting +library(lubridate) # date arithmetic +library(zoo) # rollmean() +``` + +openSenseMap.org has grown quite a bit in the last years; it would be interesting +to see how we got to the current `r osem_counts()$boxes` sensor stations, +split up by various attributes of the boxes. + +While `opensensmapr` provides extensive methods of filtering boxes by attributes +on the server, we do the filtering within R to save time and gain flexibility. +So the first step is to retrieve *all the boxes*: + +```{r download} +# if you want to see results for a specific subset of boxes, +# just specify a filter such as grouptag='ifgi' here +boxes = osem_boxes() +``` + +# Plot count of boxes by time {.tabset} +By looking at the `createdAt` attribute of each box we know the exact time a box +was registered. +With this approach we have no information about boxes that were deleted in the +meantime, but that's okay for now. + +## ...and exposure +```{r exposure_counts, message=FALSE} +exposure_counts = boxes %>% + group_by(exposure) %>% + mutate(count = row_number(createdAt)) + +exposure_colors = c(indoor = 'red', outdoor = 'lightgreen', mobile = 'blue', unknown = 'darkgrey') +ggplot(exposure_counts, aes(x = createdAt, y = count, colour = exposure)) + + geom_line() + + scale_colour_manual(values = exposure_colors) + + xlab('Registration Date') + ylab('senseBox count') +``` + +Outdoor boxes are growing *fast*! +We can also see the introduction of `mobile` sensor "stations" in 2017. While +mobile boxes are still few, we can expect a quick rise in 2018 once the new +[senseBox MCU with GPS support is released](https://sensebox.de/blog/2018-03-06-senseBox_MCU). + +Let's have a quick summary: +```{r exposure_summary} +exposure_counts %>% + summarise( + oldest = min(createdAt), + newest = max(createdAt), + count = max(count) + ) %>% + arrange(desc(count)) +``` + +## ...and grouptag +We can try to find out where the increases in growth came from, by analysing the +box count by grouptag. + +Caveats: Only a small subset of boxes has a grouptag, and we should assume +that these groups are actually bigger. Also, we can see that grouptag naming is +inconsistent (`Luftdaten`, `luftdaten.info`, ...) + +```{r grouptag_counts, message=FALSE} +grouptag_counts = boxes %>% + group_by(grouptag) %>% + # only include grouptags with 8 or more members + filter(length(grouptag) >= 8 && !is.na(grouptag)) %>% + mutate(count = row_number(createdAt)) + +# helper for sorting the grouptags by boxcount +sortLvls = function(oldFactor, ascending = TRUE) { + lvls = table(oldFactor) %>% sort(., decreasing = !ascending) %>% names() + factor(oldFactor, levels = lvls) +} +grouptag_counts$grouptag = sortLvls(grouptag_counts$grouptag, ascending = FALSE) + +ggplot(grouptag_counts, aes(x = createdAt, y = count, colour = grouptag)) + + geom_line(aes(group = grouptag)) + + xlab('Registration Date') + ylab('senseBox count') +``` + +```{r grouptag_summary} +grouptag_counts %>% + summarise( + oldest = min(createdAt), + newest = max(createdAt), + count = max(count) + ) %>% + arrange(desc(count)) +``` + +# Plot rate of growth and inactivity per week +First we group the boxes by `createdAt` into bins of one week: +```{r growthrate_registered, warning=FALSE, message=FALSE, results='hide'} +bins = 'week' +mvavg_bins = 6 + +growth = boxes %>% + mutate(week = cut(as.Date(createdAt), breaks = bins)) %>% + group_by(week) %>% + summarize(count = length(week)) %>% + mutate(event = 'registered') +``` + +We can do the same for `updatedAt`, which informs us about the last change to +a box, including uploaded measurements. +This method of determining inactive boxes is fairly inaccurate and should be +considered an approximation, because we have no information about intermediate +inactive phases. +Also deleted boxes would probably have a big impact here. +```{r growthrate_inactive, warning=FALSE, message=FALSE, results='hide'} +inactive = boxes %>% + # remove boxes that were updated in the last two days, + # b/c any box becomes inactive at some point by definition of updatedAt + filter(updatedAt < now() - days(2)) %>% + mutate(week = cut(as.Date(updatedAt), breaks = bins)) %>% + group_by(week) %>% + summarize(count = length(week)) %>% + mutate(event = 'inactive') +``` + +Now we can combine both datasets for plotting: +```{r growthrate, warning=FALSE, message=FALSE, results='hide'} +boxes_by_date = bind_rows(growth, inactive) %>% group_by(event) + +ggplot(boxes_by_date, aes(x = as.Date(week), colour = event)) + + xlab('Time') + ylab(paste('rate per ', bins)) + + scale_x_date(date_breaks="years", date_labels="%Y") + + scale_colour_manual(values = c(registered = 'lightgreen', inactive = 'grey')) + + geom_point(aes(y = count), size = 0.5) + + # moving average, make first and last value NA (to ensure identical length of vectors) + geom_line(aes(y = rollmean(count, mvavg_bins, fill = list(NA, NULL, NA)))) +``` + +We see a sudden rise in early 2017, which lines up with the fast growing grouptag `Luftdaten`. +This was enabled by an integration of openSenseMap.org into the firmware of the +air quality monitoring project [luftdaten.info](https://luftdaten.info). +The dips in mid 2017 and early 2018 could possibly be explained by production/delivery issues +of the senseBox hardware, but I have no data on the exact time frames to verify. + +# Plot duration of boxes being active {.tabset} +While we are looking at `createdAt` and `updatedAt`, we can also extract the duration of activity +of each box, and look at metrics by exposure and grouptag once more: + +## ...by exposure +```{r exposure_duration, message=FALSE} +duration = boxes %>% + group_by(exposure) %>% + filter(!is.na(updatedAt)) %>% + mutate(duration = difftime(updatedAt, createdAt, units='days')) + +ggplot(duration, aes(x = exposure, y = duration)) + + geom_boxplot() + + coord_flip() + ylab('Duration active in Days') +``` + +The time of activity averages at only `r round(mean(duration$duration))` days, +though there are boxes with `r round(max(duration$duration))` days of activity, +spanning a large chunk of openSenseMap's existence. + +## ...by grouptag +```{r grouptag_duration, message=FALSE} +duration = boxes %>% + group_by(grouptag) %>% + # only include grouptags with 8 or more members + filter(length(grouptag) >= 8 && !is.na(grouptag) && !is.na(updatedAt)) %>% + mutate(duration = difftime(updatedAt, createdAt, units='days')) + +ggplot(duration, aes(x = grouptag, y = duration)) + + geom_boxplot() + + coord_flip() + ylab('Duration active in Days') + +duration %>% + summarize( + duration_avg = round(mean(duration)), + duration_min = round(min(duration)), + duration_max = round(max(duration)), + oldest_box = round(max(difftime(now(), createdAt, units='days'))) + ) %>% + arrange(desc(duration_avg)) +``` + +The time of activity averages at only `r round(mean(duration$duration))` days, +though there are boxes with `r round(max(duration$duration))` days of activity, +spanning a large chunk of openSenseMap's existence. + +## ...by year of registration +This is less useful, as older boxes are active for a longer time by definition. +If you have an idea how to compensate for that, please send a [Pull Request][PR]! + +```{r year_duration, message=FALSE} +# NOTE: boxes older than 2016 missing due to missing updatedAt in database +duration = boxes %>% + mutate(year = cut(as.Date(createdAt), breaks = 'year')) %>% + group_by(year) %>% + filter(!is.na(updatedAt)) %>% + mutate(duration = difftime(updatedAt, createdAt, units='days')) + +ggplot(duration, aes(x = substr(as.character(year), 0, 4), y = duration)) + + geom_boxplot() + + coord_flip() + ylab('Duration active in Days') + xlab('Year of Registration') +``` + +# More Visualisations +Other visualisations come to mind, and are left as an exercise to the reader. +If you implemented some, feel free to add them to this vignette via a [Pull Request][PR]. + +* growth by phenomenon +* growth by location -> (interactive) map +* set inactive rate in relation to total box count +* filter timespans with big dips in growth rate, and extrapolate the amount of + senseBoxes that could be on the platform today, assuming there were no production issues ;) + +[PR]: https://github.com/noerw/opensensmapr/pulls diff --git a/inst/doc/osem-history.html b/inst/doc/osem-history.html new file mode 100644 index 0000000..1c72d00 --- /dev/null +++ b/inst/doc/osem-history.html @@ -0,0 +1,501 @@ + + + + +
+ + + + + + + + + + + +++This vignette serves as an example on data wrangling & visualization with
+opensensmapr
,dplyr
andggplot2
.
# required packages:
+library(opensensmapr) # data download
+library(dplyr) # data wrangling
+library(ggplot2) # plotting
+library(lubridate) # date arithmetic
+library(zoo) # rollmean()
openSenseMap.org has grown quite a bit in the last years; it would be interesting to see how we got to the current 1781 sensor stations, split up by various attributes of the boxes.
+While opensensmapr
provides extensive methods of filtering boxes by attributes on the server, we do the filtering within R to save time and gain flexibility. So the first step is to retrieve all the boxes:
# if you want to see results for a specific subset of boxes,
+# just specify a filter such as grouptag='ifgi' here
+boxes = osem_boxes()
By looking at the createdAt
attribute of each box we know the exact time a box was registered. With this approach we have no information about boxes that were deleted in the meantime, but that’s okay for now.
exposure_counts = boxes %>%
+ group_by(exposure) %>%
+ mutate(count = row_number(createdAt))
+
+exposure_colors = c(indoor = 'red', outdoor = 'lightgreen', mobile = 'blue', unknown = 'darkgrey')
+ggplot(exposure_counts, aes(x = createdAt, y = count, colour = exposure)) +
+ geom_line() +
+ scale_colour_manual(values = exposure_colors) +
+ xlab('Registration Date') + ylab('senseBox count')
Outdoor boxes are growing fast! We can also see the introduction of mobile
sensor “stations” in 2017. While mobile boxes are still few, we can expect a quick rise in 2018 once the new senseBox MCU with GPS support is released.
Let’s have a quick summary:
+exposure_counts %>%
+ summarise(
+ oldest = min(createdAt),
+ newest = max(createdAt),
+ count = max(count)
+ ) %>%
+ arrange(desc(count))
exposure | +oldest | +newest | +count | +
---|---|---|---|
outdoor | +2015-02-18 16:53:41 | +2018-05-26 08:39:12 | +1416 | +
indoor | +2015-02-08 17:36:40 | +2018-05-26 10:29:27 | +290 | +
mobile | +2017-05-24 08:16:36 | +2018-05-24 07:08:32 | +55 | +
unknown | +2014-05-28 15:36:14 | +2016-06-25 15:11:11 | +20 | +
We can try to find out where the increases in growth came from, by analysing the box count by grouptag.
+Caveats: Only a small subset of boxes has a grouptag, and we should assume that these groups are actually bigger. Also, we can see that grouptag naming is inconsistent (Luftdaten
, luftdaten.info
, …)
grouptag_counts = boxes %>%
+ group_by(grouptag) %>%
+ # only include grouptags with 8 or more members
+ filter(length(grouptag) >= 8 && !is.na(grouptag)) %>%
+ mutate(count = row_number(createdAt))
+
+# helper for sorting the grouptags by boxcount
+sortLvls = function(oldFactor, ascending = TRUE) {
+ lvls = table(oldFactor) %>% sort(., decreasing = !ascending) %>% names()
+ factor(oldFactor, levels = lvls)
+}
+grouptag_counts$grouptag = sortLvls(grouptag_counts$grouptag, ascending = FALSE)
+
+ggplot(grouptag_counts, aes(x = createdAt, y = count, colour = grouptag)) +
+ geom_line(aes(group = grouptag)) +
+ xlab('Registration Date') + ylab('senseBox count')
grouptag_counts %>%
+ summarise(
+ oldest = min(createdAt),
+ newest = max(createdAt),
+ count = max(count)
+ ) %>%
+ arrange(desc(count))
grouptag | +oldest | +newest | +count | +
---|---|---|---|
Luftdaten | +2017-03-14 17:01:16 | +2018-05-21 02:20:50 | +109 | +
ifgi | +2016-06-17 08:04:54 | +2018-05-15 10:27:02 | +35 | +
MakeLight | +2015-02-18 16:53:41 | +2018-02-02 13:50:21 | +15 | +
Bad_Hersfeld | +2017-07-18 13:32:03 | +2018-03-22 09:10:07 | +13 | +
luftdaten.info | +2017-05-01 10:15:44 | +2018-05-17 11:47:21 | +12 | +
dwih-sp | +2016-08-09 08:06:02 | +2016-11-23 10:16:04 | +11 | +
Che Aria Tira? | +2018-03-11 10:50:42 | +2018-03-11 23:11:20 | +10 | +
Luftdaten.info | +2017-04-03 14:10:20 | +2018-04-16 16:31:24 | +10 | +
Feinstaub | +2017-04-08 06:38:25 | +2018-03-29 17:27:55 | +9 | +
PGKN | +2018-04-08 07:01:57 | +2018-04-27 18:38:51 | +9 | +
Raumanmeri | +2017-03-13 11:35:39 | +2017-04-27 05:36:20 | +9 | +
Sofia | +2017-04-11 04:40:11 | +2018-03-15 13:26:56 | +9 | +
IKG | +2017-03-21 19:02:11 | +2017-12-18 14:30:21 | +8 | +
First we group the boxes by createdAt
into bins of one week:
bins = 'week'
+mvavg_bins = 6
+
+growth = boxes %>%
+ mutate(week = cut(as.Date(createdAt), breaks = bins)) %>%
+ group_by(week) %>%
+ summarize(count = length(week)) %>%
+ mutate(event = 'registered')
We can do the same for updatedAt
, which informs us about the last change to a box, including uploaded measurements. This method of determining inactive boxes is fairly inaccurate and should be considered an approximation, because we have no information about intermediate inactive phases. Also deleted boxes would probably have a big impact here.
inactive = boxes %>%
+ # remove boxes that were updated in the last two days,
+ # b/c any box becomes inactive at some point by definition of updatedAt
+ filter(updatedAt < now() - days(2)) %>%
+ mutate(week = cut(as.Date(updatedAt), breaks = bins)) %>%
+ group_by(week) %>%
+ summarize(count = length(week)) %>%
+ mutate(event = 'inactive')
Now we can combine both datasets for plotting:
+boxes_by_date = bind_rows(growth, inactive) %>% group_by(event)
+
+ggplot(boxes_by_date, aes(x = as.Date(week), colour = event)) +
+ xlab('Time') + ylab(paste('rate per ', bins)) +
+ scale_x_date(date_breaks="years", date_labels="%Y") +
+ scale_colour_manual(values = c(registered = 'lightgreen', inactive = 'grey')) +
+ geom_point(aes(y = count), size = 0.5) +
+ # moving average, make first and last value NA (to ensure identical length of vectors)
+ geom_line(aes(y = rollmean(count, mvavg_bins, fill = list(NA, NULL, NA))))
We see a sudden rise in early 2017, which lines up with the fast growing grouptag Luftdaten
. This was enabled by an integration of openSenseMap.org into the firmware of the air quality monitoring project luftdaten.info. The dips in mid 2017 and early 2018 could possibly be explained by production/delivery issues of the senseBox hardware, but I have no data on the exact time frames to verify.
While we are looking at createdAt
and updatedAt
, we can also extract the duration of activity of each box, and look at metrics by exposure and grouptag once more:
duration = boxes %>%
+ group_by(exposure) %>%
+ filter(!is.na(updatedAt)) %>%
+ mutate(duration = difftime(updatedAt, createdAt, units='days'))
+
+ggplot(duration, aes(x = exposure, y = duration)) +
+ geom_boxplot() +
+ coord_flip() + ylab('Duration active in Days')
The time of activity averages at only 152 days, though there are boxes with 759 days of activity, spanning a large chunk of openSenseMap’s existence.
+duration = boxes %>%
+ group_by(grouptag) %>%
+ # only include grouptags with 8 or more members
+ filter(length(grouptag) >= 8 && !is.na(grouptag) && !is.na(updatedAt)) %>%
+ mutate(duration = difftime(updatedAt, createdAt, units='days'))
+
+ggplot(duration, aes(x = grouptag, y = duration)) +
+ geom_boxplot() +
+ coord_flip() + ylab('Duration active in Days')
duration %>%
+ summarize(
+ duration_avg = round(mean(duration)),
+ duration_min = round(min(duration)),
+ duration_max = round(max(duration)),
+ oldest_box = round(max(difftime(now(), createdAt, units='days')))
+ ) %>%
+ arrange(desc(duration_avg))
grouptag | +duration_avg | +duration_min | +duration_max | +oldest_box | +
---|---|---|---|---|
dwih-sp | +627 days | +549 days | +655 days | +655 days | +
Feinstaub | +219 days | +4 days | +413 days | +413 days | +
ifgi | +207 days | +0 days | +622 days | +708 days | +
Sofia | +200 days | +15 days | +410 days | +410 days | +
Bad_Hersfeld | +197 days | +65 days | +312 days | +312 days | +
Luftdaten | +187 days | +0 days | +424 days | +438 days | +
luftdaten.info | +183 days | +9 days | +360 days | +390 days | +
IKG | +163 days | +70 days | +260 days | +431 days | +
Luftdaten.info | +86 days | +5 days | +376 days | +418 days | +
Che Aria Tira? | +75 days | +71 days | +76 days | +76 days | +
Raumanmeri | +45 days | +7 days | +318 days | +439 days | +
PGKN | +35 days | +29 days | +48 days | +48 days | +
The time of activity averages at only 191 days, though there are boxes with 655 days of activity, spanning a large chunk of openSenseMap’s existence.
+This is less useful, as older boxes are active for a longer time by definition. If you have an idea how to compensate for that, please send a Pull Request!
+# NOTE: boxes older than 2016 missing due to missing updatedAt in database
+duration = boxes %>%
+ mutate(year = cut(as.Date(createdAt), breaks = 'year')) %>%
+ group_by(year) %>%
+ filter(!is.na(updatedAt)) %>%
+ mutate(duration = difftime(updatedAt, createdAt, units='days'))
+
+ggplot(duration, aes(x = substr(as.character(year), 0, 4), y = duration)) +
+ geom_boxplot() +
+ coord_flip() + ylab('Duration active in Days') + xlab('Year of Registration')
Other visualisations come to mind, and are left as an exercise to the reader. If you implemented some, feel free to add them to this vignette via a Pull Request.
+This package provides data ingestion functions for almost any data stored on the -open data platform for environemental sensordata https://opensensemap.org. -Its main goals are to provide means for:
+This package provides data ingestion functions for almost any data stored on the open data platform for environemental sensordata https://opensensemap.org. Its main goals are to provide means for:
Before we look at actual observations, lets get a grasp of the openSenseMap datasets’ structure.
+library(magrittr)
+library(opensensmapr)
-Before we look at actual observations, lets get a grasp of the openSenseMap
-datasets' structure.
-
-library(magrittr)
-library(opensensmapr)
-
-all_sensors = osem_boxes()
-
-
-summary(all_sensors)
-
-
-## boxes total: 1779
+all_sensors = osem_boxes()
summary(all_sensors)
## boxes total: 1781
##
## boxes by exposure:
## indoor mobile outdoor unknown
-## 288 55 1416 20
+## 290 55 1416 20
##
## boxes by model:
## custom homeEthernet homeEthernetFeinstaub
-## 335 92 49
+## 336 92 49
## homeWifi homeWifiFeinstaub luftdaten_pms1003
-## 192 144 1
+## 193 144 1
## luftdaten_pms1003_bme280 luftdaten_pms5003_bme280 luftdaten_pms7003_bme280
## 1 5 2
## luftdaten_sds011 luftdaten_sds011_bme280 luftdaten_sds011_bmp180
@@ -246,52 +107,34 @@ all_sensors = osem_boxes()
##
## $last_measurement_within
## 1h 1d 30d 365d never
-## 921 960 1089 1427 235
+## 929 954 1091 1428 235
##
## oldest box: 2014-05-28 15:36:14 (CALIMERO)
-## newest box: 2018-05-24 20:29:50 (Stadthalle)
+## newest box: 2018-05-26 10:29:27 (UOS_DDI)
##
## sensors per box:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
-## 1.000 4.000 4.000 4.601 5.000 33.000
-
-
-This gives a good overview already: As of writing this, there are more than 700 -sensor stations, of which ~50% are currently running. Most of them are placed -outdoors and have around 5 sensors each. -The oldest station is from May 2014, while the latest station was registered a -couple of minutes ago.
- -Another feature of interest is the spatial distribution of the boxes: plot()
-can help us out here. This function requires a bunch of optional dependencies though.
if (!require('maps')) install.packages('maps')
-if (!require('maptools')) install.packages('maptools')
-if (!require('rgeos')) install.packages('rgeos')
-
-plot(all_sensors)
-
-
-
-
+## 1.0 4.0 4.0 4.6 5.0 33.0
+This gives a good overview already: As of writing this, there are more than 700 sensor stations, of which ~50% are currently running. Most of them are placed outdoors and have around 5 sensors each. The oldest station is from May 2014, while the latest station was registered a couple of minutes ago.
+Another feature of interest is the spatial distribution of the boxes: plot()
can help us out here. This function requires a bunch of optional dependencies though.
if (!require('maps')) install.packages('maps')
+if (!require('maptools')) install.packages('maptools')
+if (!require('rgeos')) install.packages('rgeos')
+
+plot(all_sensors)
It seems we have to reduce our area of interest to Germany.
- -But what do these sensor stations actually measure? Lets find out.
-osem_phenomena()
gives us a named list of of the counts of each observed
-phenomenon for the given set of sensor stations:
phenoms = osem_phenomena(all_sensors)
-str(phenoms)
-
-
-## List of 432
-## $ Temperatur : int 1607
-## $ rel. Luftfeuchte : int 1421
+But what do these sensor stations actually measure? Lets find out. osem_phenomena()
gives us a named list of of the counts of each observed phenomenon for the given set of sensor stations:
+phenoms = osem_phenomena(all_sensors)
+str(phenoms)
+## List of 433
+## $ Temperatur : int 1608
+## $ rel. Luftfeuchte : int 1422
## $ PM10 : int 1200
## $ PM2.5 : int 1198
-## $ Luftdruck : int 824
-## $ Beleuchtungsstärke : int 480
-## $ UV-Intensität : int 471
+## $ Luftdruck : int 825
+## $ Beleuchtungsstärke : int 481
+## $ UV-Intensität : int 472
## $ Luftfeuchtigkeit : int 84
## $ Temperature : int 49
## $ Humidity : int 42
@@ -309,10 +152,10 @@ str(phenoms)
## $ Feinstaub PM10 : int 10
## $ Feinstaub PM2.5 : int 9
## $ Kosteus : int 8
+## $ Temperatur DHT22 : int 8
## $ Valonmäärä : int 8
## $ temperature : int 8
## $ PM01 : int 7
-## $ Temperatur DHT22 : int 7
## $ UV-säteily : int 7
## $ Niederschlag : int 6
## $ UV-Strahlung : int 6
@@ -330,6 +173,7 @@ str(phenoms)
## $ UV-Säteily : int 4
## $ lautstärke : int 4
## $ rel. Luftfeuchte 1 : int 4
+## $ rel. Luftfeuchte DHT22 : int 4
## $ relative Luftfeuchtigkeit : int 4
## $ Air pressure : int 3
## $ Batterie : int 3
@@ -352,7 +196,6 @@ str(phenoms)
## $ Valoisuus : int 3
## $ Wind Gust : int 3
## $ pressure : int 3
-## $ rel. Luftfeuchte DHT22 : int 3
## $ 1 : int 2
## $ 10 : int 2
## $ 2 : int 2
@@ -384,22 +227,14 @@ str(phenoms)
## $ Sound : int 2
## $ Temperatur (DHT22) : int 2
## $ Temperatur BMP180 : int 2
-## [list output truncated]
-
-
-Thats quite some noise there, with many phenomena being measured by a single
-sensor only, or many duplicated phenomena due to slightly different spellings.
-We should clean that up, but for now let's just filter out the noise and find
-those phenomena with high sensor numbers:
-
-phenoms[phenoms > 20]
-
-
+## [list output truncated]
+Thats quite some noise there, with many phenomena being measured by a single sensor only, or many duplicated phenomena due to slightly different spellings. We should clean that up, but for now let’s just filter out the noise and find those phenomena with high sensor numbers:
+phenoms[phenoms > 20]
## $Temperatur
-## [1] 1607
+## [1] 1608
##
## $`rel. Luftfeuchte`
-## [1] 1421
+## [1] 1422
##
## $PM10
## [1] 1200
@@ -408,13 +243,13 @@ those phenomena with high sensor numbers:
## [1] 1198
##
## $Luftdruck
-## [1] 824
+## [1] 825
##
## $Beleuchtungsstärke
-## [1] 480
+## [1] 481
##
## $`UV-Intensität`
-## [1] 471
+## [1] 472
##
## $Luftfeuchtigkeit
## [1] 84
@@ -429,159 +264,99 @@ those phenomena with high sensor numbers:
## [1] 25
##
## $Lautstärke
-## [1] 21
-
-
-Alright, temperature it is! Fine particulate matter (PM2.5) seems to be more -interesting to analyze though. -We should check how many sensor stations provide useful data: We want only those -boxes with a PM2.5 sensor, that are placed outdoors and are currently submitting -measurements:
- -pm25_sensors = osem_boxes(
- exposure = 'outdoor',
- date = Sys.time(), # ±4 hours
- phenomenon = 'PM2.5'
-)
-
-
-summary(pm25_sensors)
-
-
-## boxes total: 788
+## [1] 21
+Alright, temperature it is! Fine particulate matter (PM2.5) seems to be more interesting to analyze though. We should check how many sensor stations provide useful data: We want only those boxes with a PM2.5 sensor, that are placed outdoors and are currently submitting measurements:
+pm25_sensors = osem_boxes(
+ exposure = 'outdoor',
+ date = Sys.time(), # ±4 hours
+ phenomenon = 'PM2.5'
+)
summary(pm25_sensors)
## boxes total: 791
##
## boxes by exposure:
## outdoor
-## 788
+## 791
##
## boxes by model:
## custom homeEthernetFeinstaub homeWifi
-## 28 37 6
+## 29 37 6
## homeWifiFeinstaub luftdaten_pms1003_bme280 luftdaten_pms5003_bme280
-## 57 1 2
+## 57 1 1
## luftdaten_pms7003_bme280 luftdaten_sds011 luftdaten_sds011_bme280
-## 2 33 135
+## 2 32 137
## luftdaten_sds011_bmp180 luftdaten_sds011_dht11 luftdaten_sds011_dht22
-## 14 31 442
+## 14 32 443
##
## $last_measurement_within
## 1h 1d 30d 365d never
-## 764 777 780 785 3
+## 771 780 784 789 2
##
## oldest box: 2016-06-02 12:09:47 (BalkonBox Mindener Str.)
## newest box: 2018-05-24 20:29:50 (Stadthalle)
##
## sensors per box:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
-## 2.000 4.000 4.000 4.615 5.000 12.000
-
-
-plot(pm25_sensors)
-
-
-
-
+## 2.000 4.000 4.000 4.617 5.000 12.000
+plot(pm25_sensors)
Thats still more than 200 measuring stations, we can work with that.
- +Having analyzed the available data sources, let's finally get some measurements.
-We could call osem_measurements(pm25_sensors)
now, however we are focussing on
-a restricted area of interest, the city of Berlin.
-Luckily we can get the measurements filtered by a bounding box:
library(sf)
-
-
-## Linking to GEOS 3.6.1, GDAL 2.1.4, proj.4 4.9.3
-
-
-library(units)
-library(lubridate)
-
-
-##
-## Attaching package: 'lubridate'
-
-
-## The following object is masked from 'package:base':
-##
-## date
-
-
-library(dplyr)
-
-
+Having analyzed the available data sources, let’s finally get some measurements. We could call osem_measurements(pm25_sensors)
now, however we are focussing on a restricted area of interest, the city of Berlin. Luckily we can get the measurements filtered by a bounding box:
library(sf)
## Linking to GEOS 3.5.1, GDAL 2.2.2, proj.4 4.9.2
+library(units)
##
-## Attaching package: 'dplyr'
-
-
-## The following objects are masked from 'package:lubridate':
-##
-## intersect, setdiff, union
-
-
-## The following objects are masked from 'package:rgeos':
+## Attaching package: 'units'
+## The following object is masked from 'package:base':
##
-## intersect, setdiff, union
-
-
-## The following objects are masked from 'package:stats':
-##
-## filter, lag
-
-
-## The following objects are masked from 'package:base':
-##
-## intersect, setdiff, setequal, union
-
-
-# construct a bounding box: 12 kilometers around Berlin
-berlin = st_point(c(13.4034, 52.5120)) %>%
- st_sfc(crs = 4326) %>%
- st_transform(3857) %>% # allow setting a buffer in meters
- st_buffer(set_units(12, km)) %>%
- st_transform(4326) %>% # the opensensemap expects WGS 84
- st_bbox()
-
-
-pm25 = osem_measurements(
+## %*%
+library(lubridate)
+library(dplyr)
+
+# construct a bounding box: 12 kilometers around Berlin
+berlin = st_point(c(13.4034, 52.5120)) %>%
+ st_sfc(crs = 4326) %>%
+ st_transform(3857) %>% # allow setting a buffer in meters
+ st_buffer(set_units(12, km)) %>%
+ st_transform(4326) %>% # the opensensemap expects WGS 84
+ st_bbox()
pm25 = osem_measurements(
berlin,
- phenomenon = 'PM2.5',
- from = now() - days(20), # defaults to 2 days
- to = now()
+ phenomenon = 'PM2.5',
+ from = now() - days(20), # defaults to 2 days
+ to = now()
)
-plot(pm25)
-
-
-
-
-Now we can get started with actual spatiotemporal data analysis. -First, lets mask the seemingly uncalibrated sensors:
- -outliers = filter(pm25, value > 100)$sensorId
-bad_sensors = outliers[, drop = T] %>% levels()
-
-pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors)
-
+plot(pm25)Now we can get started with actual spatiotemporal data analysis. First, lets mask the seemingly uncalibrated sensors:
+outliers = filter(pm25, value > 100)$sensorId
+bad_sensors = outliers[, drop = T] %>% levels()
+pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors)
Then plot the measuring locations, flagging the outliers:
- -st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = T)
-
-
-
-
+st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = T)
Removing these sensors yields a nicer time series plot:
+pm25 %>% filter(invalid == FALSE) %>% plot()
Further analysis: comparison with LANUV data TODO
pm25 %>% filter(invalid == FALSE) %>% plot()
-
-
-Further analysis: comparison with LANUV data TODO
It may be useful to download data from openSenseMap only once. -For reproducible results, the data could be saved to disk, and reloaded at a -later point.
+It may be useful to download data from openSenseMap only once. For reproducible results, the data could be saved to disk, and reloaded at a later point.
This avoids..
-This vignette shows how to use this built in opensensmapr
feature, and
-how to do it yourself, if you want to store to other data formats.
This vignette shows how to use this built in opensensmapr
feature, and how to do it yourself, if you want to store to other data formats.
All data retrieval functions of opensensmapr
have a built in caching feature,
-which serializes an API response to disk.
-Subsequent identical requests will then return the serialized data instead of making
-another request.
-To do so, each request is given a unique ID based on its parameters.
All data retrieval functions of opensensmapr
have a built in caching feature, which serializes an API response to disk. Subsequent identical requests will then return the serialized data instead of making another request. To do so, each request is given a unique ID based on its parameters.
To use this feature, just add a path to a directory to the cache
parameter:
b = osem_boxes(cache = tempdir())
-list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
-
-
-## [1] "osemcache.c54710f66b662e29dd86b089962b0f598e47eddb.rds"
-
-
-# the next identical request will hit the cache only!
-b = osem_boxes(cache = tempdir())
-
-# requests without the cache parameter will still be performed normally
-b = osem_boxes()
-
-
-You can maintain multiple caches simultaneously which allows to store only -serialized data related to a script in its directory:
- -cacheDir = getwd() # current working directory
-b = osem_boxes(cache = cacheDir)
-
-# the next identical request will hit the cache only!
-b = osem_boxes(cache = cacheDir)
-
-
+b = osem_boxes(cache = tempdir())
+list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
## [1] "osemcache.c54710f66b662e29dd86b089962b0f598e47eddb.rds"
+# the next identical request will hit the cache only!
+b = osem_boxes(cache = tempdir())
+
+# requests without the cache parameter will still be performed normally
+b = osem_boxes()
You can maintain multiple caches simultaneously which allows to store only serialized data related to a script in its directory:
+cacheDir = getwd() # current working directory
+b = osem_boxes(cache = cacheDir)
+
+# the next identical request will hit the cache only!
+b = osem_boxes(cache = cacheDir)
To get fresh results again, just call osem_clear_cache()
for the respective cache:
osem_clear_cache() # clears default cache
-
-
-## [1] TRUE
-
-
-osem_clear_cache(getwd()) # clears a custom cache
-
-
-## [1] TRUE
-
-
+osem_clear_cache() # clears default cache
## [1] TRUE
+osem_clear_cache(getwd()) # clears a custom cache
## [1] TRUE
+If you want to roll your own serialization method to support custom data formats, -here's how:
- -# this section requires:
-library(opensensmapr)
-library(jsonlite)
-library(readr)
-
-# first get our example data:
-boxes = osem_boxes(grouptag = 'ifgi')
-measurements = osem_measurements(boxes, phenomenon = 'PM10')
-
-
-If you are paranoid and worry about .rds
files not being decodable anymore
-in the (distant) future, you could serialize to a plain text format such as JSON.
-This of course comes at the cost of storage space and performance.
# serializing senseBoxes to JSON, and loading from file again:
-write(jsonlite::serializeJSON(measurements), 'boxes.json')
-boxes_from_file = jsonlite::unserializeJSON(readr::read_file('boxes.json'))
-
-
-Both methods also persist the R object metadata (classes, attributes). -If you were to use a serialization method that can't persist object metadata, you -could re-apply it with the following functions:
- -# note the toJSON call
-write(jsonlite::toJSON(measurements), 'boxes_bad.json')
-boxes_without_attrs = jsonlite::fromJSON('boxes_bad.json')
-
-boxes_with_attrs = osem_as_sensebox(boxes_without_attrs)
-class(boxes_with_attrs)
-
-
-## [1] "sensebox" "data.frame"
-
-
+If you want to roll your own serialization method to support custom data formats, here’s how:
+# this section requires:
+library(opensensmapr)
+library(jsonlite)
+library(readr)
+
+# first get our example data:
+boxes = osem_boxes(grouptag = 'ifgi')
+measurements = osem_measurements(boxes, phenomenon = 'PM10')
If you are paranoid and worry about .rds
files not being decodable anymore in the (distant) future, you could serialize to a plain text format such as JSON. This of course comes at the cost of storage space and performance.
# serializing senseBoxes to JSON, and loading from file again:
+write(jsonlite::serializeJSON(measurements), 'boxes.json')
+boxes_from_file = jsonlite::unserializeJSON(readr::read_file('boxes.json'))
Both methods also persist the R object metadata (classes, attributes). If you were to use a serialization method that can’t persist object metadata, you could re-apply it with the following functions:
+# note the toJSON call
+write(jsonlite::toJSON(measurements), 'boxes_bad.json')
+boxes_without_attrs = jsonlite::fromJSON('boxes_bad.json')
+
+boxes_with_attrs = osem_as_sensebox(boxes_without_attrs)
+class(boxes_with_attrs)
## [1] "sensebox" "data.frame"
The same goes for measurements via osem_as_measurements()
.
For truly reproducible code you want it to work and return the same results – -no matter if you run it the first time or a consecutive time, and without making -changes to it.
- -Therefore we need a wrapper around the save-to-file & load-from-file logic. -The following examples show a way to do just that, and where inspired by -this reproducible analysis by Daniel Nuest.
- -# offline logic
-osem_offline = function (func, file, format='rds', ...) {
- # deserialize if file exists, otherwise download and serialize
- if (file.exists(file)) {
- if (format == 'json')
- jsonlite::unserializeJSON(readr::read_file(file))
+For truly reproducible code you want it to work and return the same results – no matter if you run it the first time or a consecutive time, and without making changes to it.
+Therefore we need a wrapper around the save-to-file & load-from-file logic. The following examples show a way to do just that, and where inspired by this reproducible analysis by Daniel Nuest.
+# offline logic
+osem_offline = function (func, file, format='rds', ...) {
+ # deserialize if file exists, otherwise download and serialize
+ if (file.exists(file)) {
+ if (format == 'json')
+ jsonlite::unserializeJSON(readr::read_file(file))
else
- readRDS(file)
+ readRDS(file)
} else {
- data = func(...)
- if (format == 'json')
- write(jsonlite::serializeJSON(data), file = file)
+ data = func(...)
+ if (format == 'json')
+ write(jsonlite::serializeJSON(data), file = file)
else
- saveRDS(data, file)
+ saveRDS(data, file)
data
}
}
-# wrappers for each download function
-osem_measurements_offline = function (file, ...) {
- osem_offline(opensensmapr::osem_measurements, file, ...)
-}
-osem_boxes_offline = function (file, ...) {
- osem_offline(opensensmapr::osem_boxes, file, ...)
-}
-osem_box_offline = function (file, ...) {
- osem_offline(opensensmapr::osem_box, file, ...)
-}
-osem_counts_offline = function (file, ...) {
- osem_offline(opensensmapr::osem_counts, file, ...)
-}
-
-
-Thats it! Now let's try it out:
-
-# first run; will download and save to disk
-b1 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
-
-# consecutive runs; will read from disk
-b2 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
-class(b1) == class(b2)
-
-
-## [1] TRUE TRUE
-
-
-# we can even omit the arguments now (though thats not really the point here)
-b3 = osem_boxes_offline('mobileboxes.rds')
-nrow(b1) == nrow(b3)
-
-
-## [1] TRUE
-
-
-# verify that the custom sensebox methods are still working
-summary(b2)
-
-
+# wrappers for each download function
+osem_measurements_offline = function (file, ...) {
+ osem_offline(opensensmapr::osem_measurements, file, ...)
+}
+osem_boxes_offline = function (file, ...) {
+ osem_offline(opensensmapr::osem_boxes, file, ...)
+}
+osem_box_offline = function (file, ...) {
+ osem_offline(opensensmapr::osem_box, file, ...)
+}
+osem_counts_offline = function (file, ...) {
+ osem_offline(opensensmapr::osem_counts, file, ...)
+}
+Thats it! Now let’s try it out:
+# first run; will download and save to disk
+b1 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
+
+# consecutive runs; will read from disk
+b2 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
+class(b1) == class(b2)
+## [1] TRUE TRUE
+# we can even omit the arguments now (though thats not really the point here)
+b3 = osem_boxes_offline('mobileboxes.rds')
+nrow(b1) == nrow(b3)
+## [1] TRUE
+# verify that the custom sensebox methods are still working
+summary(b2)
## boxes total: 55
##
## boxes by exposure:
@@ -390,35 +197,34 @@ summary(b2)
##
## $last_measurement_within
## 1h 1d 30d 365d never
-## 16 18 24 43 12
+## 16 16 24 43 12
##
## oldest box: 2017-05-24 08:16:36 (Feinstaub Hauptstrasse Steampunk-Design)
## newest box: 2018-05-24 07:08:32 (Josi Test)
##
## sensors per box:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
-## 1.000 4.000 4.000 4.618 5.000 22.000
-
-
-plot(b3)
-
-
-
-
+## 1.000 4.000 4.000 4.618 5.000 22.000
+plot(b3)
To re-download the data, just clear the files that were created in the process:
+file.remove('mobileboxes.rds', 'boxes_bad.json', 'boxes.json', 'measurements.rds')
## Warning in file.remove("mobileboxes.rds", "boxes_bad.json", "boxes.json", :
+## cannot remove file 'measurements.rds', reason 'No such file or directory'
+A possible extension to this scheme comes to mind: Omit the specification of a filename, and assign a unique ID to the request instead. For example, one could calculate the SHA-1 hash of the parameters, and use it as filename.
+file.remove('mobileboxes.rds', 'boxes_bad.json', 'boxes.json', 'measurements.rds')
-
-## Warning in file.remove("mobileboxes.rds", "boxes_bad.json", "boxes.json", :
-## cannot remove file 'measurements.rds', reason 'No such file or directory'
-
-A possible extension to this scheme comes to mind: Omit the specification of a -filename, and assign a unique ID to the request instead. -For example, one could calculate the SHA-1 hash of the parameters, and use it -as filename.
+ + - diff --git a/vignettes/osem-history.Rmd b/vignettes/osem-history.Rmd index ca7af72..ff9f3d3 100644 --- a/vignettes/osem-history.Rmd +++ b/vignettes/osem-history.Rmd @@ -14,8 +14,10 @@ output: theme: lumen toc: yes toc_float: yes -vignette: | - %\VignetteIndexEntry{Visualising the History of openSenseMap.org} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} +vignette: > + %\VignetteIndexEntry{Visualising the History of openSenseMap.org} + %\VignetteEngine{knitr::rmarkdown} + %\VignetteEncoding{UTF-8} --- > This vignette serves as an example on data wrangling & visualization with diff --git a/vignettes/osem-intro.Rmd b/vignettes/osem-intro.Rmd index 9906138..7a2ad09 100644 --- a/vignettes/osem-intro.Rmd +++ b/vignettes/osem-intro.Rmd @@ -1,5 +1,5 @@ --- -title: "Analyzing environmental sensor data from openSenseMap.org in R" +title: "Exploring the openSenseMap Dataset" author: "Norwin Roosen" date: "`r Sys.Date()`" output: @@ -8,7 +8,7 @@ output: fig_width: 6 fig_height: 4 vignette: > - %\VignetteIndexEntry{Analyzing environmental sensor data from openSenseMap.org in R} + %\VignetteIndexEntry{Exploring the openSenseMap Dataset} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- @@ -17,8 +17,6 @@ vignette: > knitr::opts_chunk$set(echo = TRUE) ``` -## Analyzing environmental sensor data from openSenseMap.org in R - This package provides data ingestion functions for almost any data stored on the open data platform for environemental sensordata