diff --git a/R/counts.R b/R/counts.R index fa0a1b0..4c341dd 100644 --- a/R/counts.R +++ b/R/counts.R @@ -7,6 +7,9 @@ #' @details Note that the API caches these values for 5 minutes. #' #' @param endpoint The URL of the openSenseMap API +#' @param cache Whether to cache the result, defaults to false. +#' If a valid path to a directory is given, the response will be cached there. +#' Subsequent identical requests will return the cached data instead. #' @return A named \code{list} containing the counts #' #' @export diff --git a/inst/doc/osem-intro.R b/inst/doc/osem-intro.R index 4cb7079..575f473 100644 --- a/inst/doc/osem-intro.R +++ b/inst/doc/osem-intro.R @@ -70,3 +70,4 @@ st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = T) ## ------------------------------------------------------------------------ pm25 %>% filter(invalid == FALSE) %>% plot() + diff --git a/inst/doc/osem-intro.html b/inst/doc/osem-intro.html index 31bf7e2..32e4d1f 100644 --- a/inst/doc/osem-intro.html +++ b/inst/doc/osem-intro.html @@ -1,346 +1,587 @@ + +
+ - +This package provides data ingestion functions for almost any data stored on the open data platform for environemental sensordata https://opensensemap.org. Its main goals are to provide means for:
+ +This package provides data ingestion functions for almost any data stored on the +open data platform for environemental sensordata https://opensensemap.org. +Its main goals are to provide means for:
+Before we look at actual observations, lets get a grasp of the openSenseMap datasets’ structure.
-library(magrittr)
-library(opensensmapr)
-all_sensors = osem_boxes()
summary(all_sensors)
## boxes total: 704
+Before we look at actual observations, lets get a grasp of the openSenseMap
+datasets' structure.
+
+library(magrittr)
+library(opensensmapr)
+
+all_sensors = osem_boxes()
+
+
+summary(all_sensors)
+
+
+## boxes total: 1779
##
## boxes by exposure:
-## indoor outdoor unknown
-## 127 556 21
+## indoor mobile outdoor unknown
+## 288 55 1416 20
##
## boxes by model:
-## custom homeEthernet homeEthernetFeinstaub
-## 209 78 8
-## homeWifi homeWifiFeinstaub luftdaten_sds011
-## 106 34 22
-## luftdaten_sds011_bme280 luftdaten_sds011_bmp180 luftdaten_sds011_dht11
-## 41 3 14
-## luftdaten_sds011_dht22
-## 189
+## custom homeEthernet homeEthernetFeinstaub
+## 335 92 49
+## homeWifi homeWifiFeinstaub luftdaten_pms1003
+## 192 144 1
+## luftdaten_pms1003_bme280 luftdaten_pms5003_bme280 luftdaten_pms7003_bme280
+## 1 5 2
+## luftdaten_sds011 luftdaten_sds011_bme280 luftdaten_sds011_bmp180
+## 57 197 19
+## luftdaten_sds011_dht11 luftdaten_sds011_dht22
+## 46 639
##
## $last_measurement_within
## 1h 1d 30d 365d never
-## 0 0 0 0 704
+## 921 960 1089 1427 235
##
## oldest box: 2014-05-28 15:36:14 (CALIMERO)
-## newest box: 2017-08-24 17:38:44 (Burgweinting)
+## newest box: 2018-05-24 20:29:50 (Stadthalle)
##
## sensors per box:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
-## 1.000 4.000 5.000 4.605 5.000 17.000
-This gives a good overview already: As of writing this, there are more than 700 sensor stations, of which ~50% are currently running. Most of them are placed outdoors and have around 5 sensors each. The oldest station is from May 2014, while the latest station was registered a couple of minutes ago.
-Another feature of interest is the spatial distribution of the boxes: plot()
can help us out here. This function requires a bunch of optional dependencies though.
-if (!require('maps')) install.packages('maps')
-if (!require('maptools')) install.packages('maptools')
-if (!require('rgeos')) install.packages('rgeos')
-
-plot(all_sensors)
-
+## 1.000 4.000 4.000 4.601 5.000 33.000
+
+
+This gives a good overview already: As of writing this, there are more than 700 +sensor stations, of which ~50% are currently running. Most of them are placed +outdoors and have around 5 sensors each. +The oldest station is from May 2014, while the latest station was registered a +couple of minutes ago.
+ +Another feature of interest is the spatial distribution of the boxes: plot()
+can help us out here. This function requires a bunch of optional dependencies though.
if (!require('maps')) install.packages('maps')
+if (!require('maptools')) install.packages('maptools')
+if (!require('rgeos')) install.packages('rgeos')
+
+plot(all_sensors)
+
+
+
+
It seems we have to reduce our area of interest to Germany.
-But what do these sensor stations actually measure? Lets find out. osem_phenomena()
gives us a named list of of the counts of each observed phenomenon for the given set of sensor stations:
phenoms = osem_phenomena(all_sensors)
-str(phenoms)
## List of 189
-## $ Temperatur : int 647
-## $ rel. Luftfeuchte : int 534
-## $ Luftdruck : int 368
-## $ PM10 : int 347
-## $ PM2.5 : int 347
-## $ UV-Intensität : int 255
-## $ Beleuchtungsstärke : int 251
-## $ Luftfeuchtigkeit : int 83
-## $ Schall : int 26
-## $ Helligkeit : int 20
-## $ Licht : int 20
-## $ UV : int 15
-## $ Humidity : int 12
-## $ Temperature : int 11
-## $ Anderer : int 10
-## $ Ilmanpaine : int 9
-## $ Lämpötila : int 9
-## $ Licht (digital) : int 9
-## $ Valonmäärä : int 8
-## $ Windgeschwindigkeit : int 8
-## $ Kosteus : int 7
-## $ Luftfeuchte : int 7
-## $ Lautstärke : int 6
-## $ Signal : int 6
-## $ UV-säteily : int 6
-## $ Wind speed : int 5
-## $ Pressure : int 4
-## $ temperature : int 4
-## $ Windrichtung : int 4
-## $ DS18B20_Probe01 : int 3
-## $ DS18B20_Probe02 : int 3
-## $ DS18B20_Probe03 : int 3
-## $ DS18B20_Probe04 : int 3
-## $ DS18B20_Probe05 : int 3
-## $ Light : int 3
-## $ Niederschlag : int 3
-## $ UV Index : int 3
-## $ UV-Säteily : int 3
-## $ UV-Strahlung : int 3
-## $ C2H5OH : int 2
-## $ CO : int 2
-## $ CPU-Temp : int 2
-## $ Feinstaub : int 2
-## $ Feinstaub PM10 : int 2
-## $ Feinstaub PM2,5 : int 2
-## $ H2 : int 2
-## $ humidity : int 2
-## $ Ilmankosteus : int 2
-## $ NH3 : int 2
-## $ NO2 : int 2
-## $ Regen : int 2
-## $ rel. Luftfeuchtigkeit : int 2
-## $ Relative Humidity : int 2
-## $ Temperatur BMP280 : int 2
-## $ Temperatur DHT22 : int 2
-## $ Temperatur HDC1008 : int 2
-## $ TemperaturBME : int 2
-## $ test : int 2
-## $ UV-Index : int 2
-## $ Wassertemperatur : int 2
-## $ Wifi-Stärke : int 2
-## $ Windböen : int 2
-## $ Wolkenbedeckung : int 2
-## $ Air Preassure : int 1
-## $ Air pressure : int 1
-## $ Air Temperature : int 1
-## $ Akkuspannung Terrasse : int 1
-## $ Akkuspannung Unten Eingang : int 1
-## $ Attendance : int 1
-## $ Batterie : int 1
-## $ Batterieladung : int 1
-## $ Battery : int 1
-## $ Beleuchtungsstaerke : int 1
-## $ Beleuchtungsstärke des sichtbaren Lichts: int 1
-## $ Bodenfeuchte : int 1
-## $ Bodentemperatur : int 1
-## $ C3H8 : int 1
-## $ C4H10 : int 1
-## $ CH4 : int 1
-## $ CO2 : int 1
-## $ CO2-Konzentration : int 1
-## $ Dämmerung : int 1
-## $ dT : int 1
-## $ Dust Sensor : int 1
-## $ Dust_Concentration : int 1
-## $ Eingangsspannung : int 1
-## $ Feinstaub P10 : int 1
-## $ Feinstaub P2.5 : int 1
-## $ Feinstaubgehalt PM10 : int 1
-## $ Feinstaubgehalt PM2.5 : int 1
-## $ Feinstaubkonzentration : int 1
-## $ Feuchte : int 1
-## $ Feuchtigkeit : int 1
-## $ filedData : int 1
-## $ H2, LPG, CH4, CO, Alcohol : int 1
-## $ Höhe : int 1
-## $ Illuminance : int 1
-## $ Infrared light : int 1
-## $ Intensität der ultravioletten Strahlung : int 1
-## [list output truncated]
-Thats quite some noise there, with many phenomena being measured by a single sensor only, or many duplicated phenomena due to slightly different spellings. We should clean that up, but for now let’s just filter out the noise and find those phenomena with high sensor numbers:
-phenoms[phenoms > 20]
But what do these sensor stations actually measure? Lets find out.
+osem_phenomena()
gives us a named list of of the counts of each observed
+phenomenon for the given set of sensor stations:
phenoms = osem_phenomena(all_sensors)
+str(phenoms)
+
+
+## List of 432
+## $ Temperatur : int 1607
+## $ rel. Luftfeuchte : int 1421
+## $ PM10 : int 1200
+## $ PM2.5 : int 1198
+## $ Luftdruck : int 824
+## $ Beleuchtungsstärke : int 480
+## $ UV-Intensität : int 471
+## $ Luftfeuchtigkeit : int 84
+## $ Temperature : int 49
+## $ Humidity : int 42
+## $ Helligkeit : int 25
+## $ Lautstärke : int 21
+## $ Schall : int 20
+## $ UV : int 20
+## $ Pressure : int 19
+## $ Licht : int 18
+## $ Luftfeuchte : int 14
+## $ Umgebungslautstärke : int 14
+## $ Lämpötila : int 13
+## $ Ilmanpaine : int 12
+## $ Signal : int 12
+## $ Feinstaub PM10 : int 10
+## $ Feinstaub PM2.5 : int 9
+## $ Kosteus : int 8
+## $ Valonmäärä : int 8
+## $ temperature : int 8
+## $ PM01 : int 7
+## $ Temperatur DHT22 : int 7
+## $ UV-säteily : int 7
+## $ Niederschlag : int 6
+## $ UV-Strahlung : int 6
+## $ Wind speed : int 6
+## $ Windgeschwindigkeit : int 6
+## $ humidity : int 6
+## $ Ilmankosteus : int 5
+## $ Wassertemperatur : int 5
+## $ Windrichtung : int 5
+## $ rel. Luftfeuchtigkeit : int 5
+## $ Druck : int 4
+## $ Light : int 4
+## $ Temperature 1 : int 4
+## $ UV Index : int 4
+## $ UV-Säteily : int 4
+## $ lautstärke : int 4
+## $ rel. Luftfeuchte 1 : int 4
+## $ relative Luftfeuchtigkeit : int 4
+## $ Air pressure : int 3
+## $ Batterie : int 3
+## $ Battery : int 3
+## $ DS18B20_Probe01 : int 3
+## $ DS18B20_Probe02 : int 3
+## $ DS18B20_Probe03 : int 3
+## $ DS18B20_Probe04 : int 3
+## $ DS18B20_Probe05 : int 3
+## $ Licht (digital) : int 3
+## $ Luftdruck (BME280) : int 3
+## $ PM 10 : int 3
+## $ PM 2.5 : int 3
+## $ Temp : int 3
+## $ Temperatur (BME280) : int 3
+## $ Temperatur HDC1008 : int 3
+## $ Temperatura : int 3
+## $ Temperature 2 : int 3
+## $ UV-Index : int 3
+## $ Valoisuus : int 3
+## $ Wind Gust : int 3
+## $ pressure : int 3
+## $ rel. Luftfeuchte DHT22 : int 3
+## $ 1 : int 2
+## $ 10 : int 2
+## $ 2 : int 2
+## $ 3 : int 2
+## $ 4 : int 2
+## $ 5 : int 2
+## $ 6 : int 2
+## $ 7 : int 2
+## $ 8 : int 2
+## $ 9 : int 2
+## $ Air Pressure : int 2
+## $ Anderer : int 2
+## $ Battery voltage : int 2
+## $ CO2 : int 2
+## $ Feuchte : int 2
+## $ Illuminance : int 2
+## $ Intensity : int 2
+## $ Leitfähigkeit : int 2
+## $ Lichtintensität : int 2
+## $ Luftdruck BMP180 : int 2
+## $ Luftfeuchte (BME280) : int 2
+## $ Luftqualität : int 2
+## $ Lufttemperatur : int 2
+## $ PM25 : int 2
+## $ Radioactivity : int 2
+## $ Radioaktivität : int 2
+## $ Regen : int 2
+## $ Relative Humidity : int 2
+## $ Sound : int 2
+## $ Temperatur (DHT22) : int 2
+## $ Temperatur BMP180 : int 2
+## [list output truncated]
+
+
+Thats quite some noise there, with many phenomena being measured by a single +sensor only, or many duplicated phenomena due to slightly different spellings. +We should clean that up, but for now let's just filter out the noise and find +those phenomena with high sensor numbers:
+ +phenoms[phenoms > 20]
+
+
## $Temperatur
-## [1] 647
+## [1] 1607
##
## $`rel. Luftfeuchte`
-## [1] 534
-##
-## $Luftdruck
-## [1] 368
+## [1] 1421
##
## $PM10
-## [1] 347
+## [1] 1200
##
## $PM2.5
-## [1] 347
+## [1] 1198
##
-## $`UV-Intensität`
-## [1] 255
+## $Luftdruck
+## [1] 824
##
## $Beleuchtungsstärke
-## [1] 251
+## [1] 480
+##
+## $`UV-Intensität`
+## [1] 471
##
## $Luftfeuchtigkeit
-## [1] 83
+## [1] 84
+##
+## $Temperature
+## [1] 49
##
-## $Schall
-## [1] 26
-Alright, temperature it is! Fine particulate matter (PM2.5) seems to be more interesting to analyze though. We should check how many sensor stations provide useful data: We want only those boxes with a PM2.5 sensor, that are placed outdoors and are currently submitting measurements:
-pm25_sensors = osem_boxes(
- exposure = 'outdoor',
- date = Sys.time(), # ±4 hours
- phenomenon = 'PM2.5'
-)
summary(pm25_sensors)
## boxes total: 240
+## $Humidity
+## [1] 42
+##
+## $Helligkeit
+## [1] 25
+##
+## $Lautstärke
+## [1] 21
+
+
+Alright, temperature it is! Fine particulate matter (PM2.5) seems to be more +interesting to analyze though. +We should check how many sensor stations provide useful data: We want only those +boxes with a PM2.5 sensor, that are placed outdoors and are currently submitting +measurements:
+ +pm25_sensors = osem_boxes(
+ exposure = 'outdoor',
+ date = Sys.time(), # ±4 hours
+ phenomenon = 'PM2.5'
+)
+
+
+summary(pm25_sensors)
+
+
+## boxes total: 788
##
## boxes by exposure:
## outdoor
-## 240
+## 788
##
## boxes by model:
-## custom homeEthernetFeinstaub homeWifi
-## 18 4 5
-## homeWifiFeinstaub luftdaten_sds011 luftdaten_sds011_bme280
-## 12 14 29
-## luftdaten_sds011_bmp180 luftdaten_sds011_dht11 luftdaten_sds011_dht22
-## 1 11 146
+## custom homeEthernetFeinstaub homeWifi
+## 28 37 6
+## homeWifiFeinstaub luftdaten_pms1003_bme280 luftdaten_pms5003_bme280
+## 57 1 2
+## luftdaten_pms7003_bme280 luftdaten_sds011 luftdaten_sds011_bme280
+## 2 33 135
+## luftdaten_sds011_bmp180 luftdaten_sds011_dht11 luftdaten_sds011_dht22
+## 14 31 442
##
## $last_measurement_within
## 1h 1d 30d 365d never
-## 0 0 0 0 240
+## 764 777 780 785 3
##
-## oldest box: 2016-09-11 08:17:17 (Balkon Gasselstiege)
-## newest box: 2017-08-24 17:38:44 (Burgweinting)
+## oldest box: 2016-06-02 12:09:47 (BalkonBox Mindener Str.)
+## newest box: 2018-05-24 20:29:50 (Stadthalle)
##
## sensors per box:
## Min. 1st Qu. Median Mean 3rd Qu. Max.
-## 2.000 4.000 4.000 4.275 4.000 10.000
-plot(pm25_sensors)
plot(pm25_sensors)
+
+
+
+
Thats still more than 200 measuring stations, we can work with that.
-Having analyzed the available data sources, let’s finally get some measurements. We could call osem_measurements(pm25_sensors)
now, however we are focussing on a restricted area of interest, the city of Berlin. Luckily we can get the measurements filtered by a bounding box:
library(sf)
-library(units)
-library(lubridate)
-library(dplyr)
-
-# construct a bounding box: 12 kilometers around Berlin
-berlin = st_point(c(13.4034, 52.5120)) %>%
- st_sfc(crs = 4326) %>%
- st_transform(3857) %>% # allow setting a buffer in meters
- st_buffer(set_units(12, km)) %>%
- st_transform(4326) %>% # the opensensemap expects WGS 84
- st_bbox()
pm25 = osem_measurements(
+
+Having analyzed the available data sources, let's finally get some measurements.
+We could call osem_measurements(pm25_sensors)
now, however we are focussing on
+a restricted area of interest, the city of Berlin.
+Luckily we can get the measurements filtered by a bounding box:
+
+library(sf)
+
+
+## Linking to GEOS 3.6.1, GDAL 2.1.4, proj.4 4.9.3
+
+
+library(units)
+library(lubridate)
+
+
+##
+## Attaching package: 'lubridate'
+
+
+## The following object is masked from 'package:base':
+##
+## date
+
+
+library(dplyr)
+
+
+##
+## Attaching package: 'dplyr'
+
+
+## The following objects are masked from 'package:lubridate':
+##
+## intersect, setdiff, union
+
+
+## The following objects are masked from 'package:rgeos':
+##
+## intersect, setdiff, union
+
+
+## The following objects are masked from 'package:stats':
+##
+## filter, lag
+
+
+## The following objects are masked from 'package:base':
+##
+## intersect, setdiff, setequal, union
+
+
+# construct a bounding box: 12 kilometers around Berlin
+berlin = st_point(c(13.4034, 52.5120)) %>%
+ st_sfc(crs = 4326) %>%
+ st_transform(3857) %>% # allow setting a buffer in meters
+ st_buffer(set_units(12, km)) %>%
+ st_transform(4326) %>% # the opensensemap expects WGS 84
+ st_bbox()
+
+
+pm25 = osem_measurements(
berlin,
- phenomenon = 'PM2.5',
- from = now() - days(20), # defaults to 2 days
- to = now()
+ phenomenon = 'PM2.5',
+ from = now() - days(20), # defaults to 2 days
+ to = now()
)
-plot(pm25)
Now we can get started with actual spatiotemporal data analysis. First, lets mask the seemingly uncalibrated sensors:
-outliers = filter(pm25, value > 100)$sensorId
-bad_sensors = outliers[, drop = T] %>% levels()
+plot(pm25)
+
+
+
+
+Now we can get started with actual spatiotemporal data analysis. +First, lets mask the seemingly uncalibrated sensors:
+ +outliers = filter(pm25, value > 100)$sensorId
+bad_sensors = outliers[, drop = T] %>% levels()
+
+pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors)
+
-pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors)Then plot the measuring locations, flagging the outliers:
-st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = T)
st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = T)
+
+
+
+
Removing these sensors yields a nicer time series plot:
-pm25 %>% filter(invalid == FALSE) %>% plot()
Further analysis: comparison with LANUV data TODO
pm25 %>% filter(invalid == FALSE) %>% plot()
+
+
-
-
+Further analysis: comparison with LANUV data TODO
It may be useful to download data from openSenseMap only once. +For reproducible results, the data could be saved to disk, and reloaded at a +later point.
+ +This avoids..
+ +This vignette shows how to use this built in opensensmapr
feature, and
+how to do it yourself, if you want to store to other data formats.
All data retrieval functions of opensensmapr
have a built in caching feature,
+which serializes an API response to disk.
+Subsequent identical requests will then return the serialized data instead of making
+another request.
+To do so, each request is given a unique ID based on its parameters.
To use this feature, just add a path to a directory to the cache
parameter:
b = osem_boxes(cache = tempdir())
+list.files(tempdir(), pattern = 'osemcache\\..*\\.rds')
+
+
+## [1] "osemcache.c54710f66b662e29dd86b089962b0f598e47eddb.rds"
+
+
+# the next identical request will hit the cache only!
+b = osem_boxes(cache = tempdir())
+
+# requests without the cache parameter will still be performed normally
+b = osem_boxes()
+
+
+You can maintain multiple caches simultaneously which allows to store only +serialized data related to a script in its directory:
+ +cacheDir = getwd() # current working directory
+b = osem_boxes(cache = cacheDir)
+
+# the next identical request will hit the cache only!
+b = osem_boxes(cache = cacheDir)
+
+
+To get fresh results again, just call osem_clear_cache()
for the respective cache:
osem_clear_cache() # clears default cache
+
+
+## [1] TRUE
+
+
+osem_clear_cache(getwd()) # clears a custom cache
+
+
+## [1] TRUE
+
+
+If you want to roll your own serialization method to support custom data formats, +here's how:
+ +# this section requires:
+library(opensensmapr)
+library(jsonlite)
+library(readr)
+
+# first get our example data:
+boxes = osem_boxes(grouptag = 'ifgi')
+measurements = osem_measurements(boxes, phenomenon = 'PM10')
+
+
+If you are paranoid and worry about .rds
files not being decodable anymore
+in the (distant) future, you could serialize to a plain text format such as JSON.
+This of course comes at the cost of storage space and performance.
# serializing senseBoxes to JSON, and loading from file again:
+write(jsonlite::serializeJSON(measurements), 'boxes.json')
+boxes_from_file = jsonlite::unserializeJSON(readr::read_file('boxes.json'))
+
+
+Both methods also persist the R object metadata (classes, attributes). +If you were to use a serialization method that can't persist object metadata, you +could re-apply it with the following functions:
+ +# note the toJSON call
+write(jsonlite::toJSON(measurements), 'boxes_bad.json')
+boxes_without_attrs = jsonlite::fromJSON('boxes_bad.json')
+
+boxes_with_attrs = osem_as_sensebox(boxes_without_attrs)
+class(boxes_with_attrs)
+
+
+## [1] "sensebox" "data.frame"
+
+
+The same goes for measurements via osem_as_measurements()
.
For truly reproducible code you want it to work and return the same results – +no matter if you run it the first time or a consecutive time, and without making +changes to it.
+ +Therefore we need a wrapper around the save-to-file & load-from-file logic. +The following examples show a way to do just that, and where inspired by +this reproducible analysis by Daniel Nuest.
+ +# offline logic
+osem_offline = function (func, file, format='rds', ...) {
+ # deserialize if file exists, otherwise download and serialize
+ if (file.exists(file)) {
+ if (format == 'json')
+ jsonlite::unserializeJSON(readr::read_file(file))
+ else
+ readRDS(file)
+ } else {
+ data = func(...)
+ if (format == 'json')
+ write(jsonlite::serializeJSON(data), file = file)
+ else
+ saveRDS(data, file)
+ data
+ }
+}
+
+# wrappers for each download function
+osem_measurements_offline = function (file, ...) {
+ osem_offline(opensensmapr::osem_measurements, file, ...)
+}
+osem_boxes_offline = function (file, ...) {
+ osem_offline(opensensmapr::osem_boxes, file, ...)
+}
+osem_box_offline = function (file, ...) {
+ osem_offline(opensensmapr::osem_box, file, ...)
+}
+osem_counts_offline = function (file, ...) {
+ osem_offline(opensensmapr::osem_counts, file, ...)
+}
+
+
+Thats it! Now let's try it out:
+ +# first run; will download and save to disk
+b1 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
+
+# consecutive runs; will read from disk
+b2 = osem_boxes_offline('mobileboxes.rds', exposure='mobile')
+class(b1) == class(b2)
+
+
+## [1] TRUE TRUE
+
+
+# we can even omit the arguments now (though thats not really the point here)
+b3 = osem_boxes_offline('mobileboxes.rds')
+nrow(b1) == nrow(b3)
+
+
+## [1] TRUE
+
+
+# verify that the custom sensebox methods are still working
+summary(b2)
+
+
+## boxes total: 55
+##
+## boxes by exposure:
+## mobile
+## 55
+##
+## boxes by model:
+## custom homeEthernet homeWifi
+## 7 2 8
+## homeWifiFeinstaub luftdaten_pms5003_bme280 luftdaten_sds011_bme280
+## 6 2 9
+## luftdaten_sds011_dht11 luftdaten_sds011_dht22
+## 1 20
+##
+## $last_measurement_within
+## 1h 1d 30d 365d never
+## 16 18 24 43 12
+##
+## oldest box: 2017-05-24 08:16:36 (Feinstaub Hauptstrasse Steampunk-Design)
+## newest box: 2018-05-24 07:08:32 (Josi Test)
+##
+## sensors per box:
+## Min. 1st Qu. Median Mean 3rd Qu. Max.
+## 1.000 4.000 4.000 4.618 5.000 22.000
+
+
+plot(b3)
+
+
+
+
+To re-download the data, just clear the files that were created in the process:
+ +file.remove('mobileboxes.rds', 'boxes_bad.json', 'boxes.json', 'measurements.rds')
+
+
+## Warning in file.remove("mobileboxes.rds", "boxes_bad.json", "boxes.json", :
+## cannot remove file 'measurements.rds', reason 'No such file or directory'
+
+
+A possible extension to this scheme comes to mind: Omit the specification of a +filename, and assign a unique ID to the request instead. +For example, one could calculate the SHA-1 hash of the parameters, and use it +as filename.
+ + + + diff --git a/man/osem_box.Rd b/man/osem_box.Rd index f891654..cb6c629 100644 --- a/man/osem_box.Rd +++ b/man/osem_box.Rd @@ -4,12 +4,15 @@ \alias{osem_box} \title{Get a single senseBox by its ID} \usage{ -osem_box(boxId, endpoint = osem_endpoint()) +osem_box(boxId, endpoint = osem_endpoint(), cache = NA) } \arguments{ \item{boxId}{A string containing a senseBox ID} \item{endpoint}{The URL of the openSenseMap API instance} + +\item{cache}{Whether to cache the result, defaults to false. +If a valid path to a directory is given, the response will be cached there. Subsequent identical requests will return the cached data instead.} } \value{ A \code{sensebox data.frame} containing a box in each row @@ -18,9 +21,17 @@ A \code{sensebox data.frame} containing a box in each row Get a single senseBox by its ID } \examples{ -# get a specific box by ID -b = osem_box('57000b8745fd40c8196ad04c') +\donttest{ + # get a specific box by ID + b = osem_box('57000b8745fd40c8196ad04c') + + # get a specific box by ID from a custom (selfhosted) openSenseMap API + b = osem_box('51030b8725fd30c2196277da', 'http://api.my-custom-osem.com') + # get a specific box by ID and cache the response, in order to provide + # reproducible results in the future. + b = osem_box('51030b8725fd30c2196277da', cache = tempdir()) +} } \seealso{ \href{https://docs.opensensemap.org/#api-Measurements-findAllBoxes}{openSenseMap API documentation (web)} @@ -28,4 +39,6 @@ b = osem_box('57000b8745fd40c8196ad04c') \code{\link{osem_phenomena}} \code{\link{osem_boxes}} + +\code{\link{osem_clear_cache}} } diff --git a/man/osem_boxes.Rd b/man/osem_boxes.Rd index d8b5c9b..a7b13ad 100644 --- a/man/osem_boxes.Rd +++ b/man/osem_boxes.Rd @@ -6,7 +6,7 @@ \usage{ osem_boxes(exposure = NA, model = NA, grouptag = NA, date = NA, from = NA, to = NA, phenomenon = NA, endpoint = osem_endpoint(), - progress = TRUE) + progress = TRUE, cache = NA) } \arguments{ \item{exposure}{Only return boxes with the given exposure ('indoor', 'outdoor', 'mobile')} @@ -26,7 +26,11 @@ time interval as specified through \code{date} or \code{from / to}} \item{endpoint}{The URL of the openSenseMap API instance} -\item{progress}{Whether to print download progress information defaults to \code{TRUE}} +\item{progress}{Whether to print download progress information, defaults to \code{TRUE}} + +\item{cache}{Whether to cache the result, defaults to false. +If a valid path to a directory is given, the response will be cached there. +Subsequent identical requests will return the cached data instead.} } \value{ A \code{sensebox data.frame} containing a box in each row @@ -41,6 +45,7 @@ Note that some filters do not work together: } } \examples{ + \donttest{ # get *all* boxes available on the API b = osem_boxes() @@ -48,8 +53,31 @@ Note that some filters do not work together: # get all boxes with grouptag 'ifgi' that are placed outdoors b = osem_boxes(grouptag = 'ifgi', exposure = 'outdoor') + # get all boxes with model 'luftdaten_sds011_dht22' + b = osem_boxes(grouptag = 'ifgi') + # get all boxes that have measured PM2.5 in the last 4 hours b = osem_boxes(date = Sys.time(), phenomenon = 'PM2.5') + + # get all boxes that have measured PM2.5 between Jan & Feb 2018 + library(lubridate) + b = osem_boxes( + from = date('2018-01-01'), + to = date('2018-02-01'), + phenomenon = 'PM2.5' + ) + + # get all boxes from a custom (selfhosted) openSenseMap API + b = osem_box(endpoint = 'http://api.my-custom-osem.com') + + # get all boxes and cache the response, in order to provide + # reproducible results in the future. Also useful for development + # to avoid repeated loading times! + b = osem_boxes(cache = getwd()) + b = osem_boxes(cache = getwd()) + + # get *all* boxes available on the API, without showing download progress + b = osem_boxes(progress = FALSE) } } \seealso{ @@ -58,4 +86,6 @@ Note that some filters do not work together: \code{\link{osem_phenomena}} \code{\link{osem_box}} + +\code{\link{osem_clear_cache}} } diff --git a/man/osem_clear_cache.Rd b/man/osem_clear_cache.Rd new file mode 100644 index 0000000..83bb123 --- /dev/null +++ b/man/osem_clear_cache.Rd @@ -0,0 +1,28 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/api.R +\name{osem_clear_cache} +\alias{osem_clear_cache} +\title{Purge cached responses from the given cache directory} +\usage{ +osem_clear_cache(location = tempdir()) +} +\arguments{ +\item{location}{A path to the cache directory, defaults to the +sessions' \code{tempdir()}} +} +\value{ +Boolean whether the deletion was successful +} +\description{ +Purge cached responses from the given cache directory +} +\examples{ +\donttest{ + osem_boxes(cache = tempdir()) + osem_clear_cache() + + cachedir = paste(getwd(), 'osemcache', sep = '/') + osem_boxes(cache = cachedir) + osem_clear_cache(cachedir) +} +} diff --git a/man/osem_counts.Rd b/man/osem_counts.Rd index cc7c9ef..0ea3151 100644 --- a/man/osem_counts.Rd +++ b/man/osem_counts.Rd @@ -4,10 +4,14 @@ \alias{osem_counts} \title{Get count statistics of the openSenseMap Instance} \usage{ -osem_counts(endpoint = osem_endpoint()) +osem_counts(endpoint = osem_endpoint(), cache = NA) } \arguments{ \item{endpoint}{The URL of the openSenseMap API} + +\item{cache}{Whether to cache the result, defaults to false. +If a valid path to a directory is given, the response will be cached there. +Subsequent identical requests will return the cached data instead.} } \value{ A named \code{list} containing the counts diff --git a/man/osem_measurements.Rd b/man/osem_measurements.Rd index f10d9c5..b26eec2 100644 --- a/man/osem_measurements.Rd +++ b/man/osem_measurements.Rd @@ -12,11 +12,12 @@ osem_measurements(x, ...) \method{osem_measurements}{default}(x, ...) \method{osem_measurements}{bbox}(x, phenomenon, exposure = NA, from = NA, - to = NA, columns = NA, ..., endpoint = osem_endpoint(), progress = T) + to = NA, columns = NA, ..., endpoint = osem_endpoint(), progress = T, + cache = NA) \method{osem_measurements}{sensebox}(x, phenomenon, exposure = NA, from = NA, to = NA, columns = NA, ..., endpoint = osem_endpoint(), - progress = T) + progress = T, cache = NA) } \arguments{ \item{x}{Depending on the method, either @@ -42,6 +43,9 @@ osem_measurements(x, ...) \item{endpoint}{The URL of the openSenseMap API} \item{progress}{Whether to print download progress information} + +\item{cache}{Whether to cache the result, defaults to false. +If a valid path to a directory is given, the response will be cached there. Subsequent identical requests will return the cached data instead.} } \value{ An \code{osem_measurements data.frame} containing the @@ -63,17 +67,49 @@ a bounding box spanning the whole world. \examples{ \donttest{ - # get measurements from all boxes - m1 = osem_measurements('Windrichtung') + # get measurements from all boxes on the phenomenon 'PM10' from the last 48h + m = osem_measurements('PM10') + + # get measurements from all mobile boxes on the phenomenon 'PM10' from the last 48h + m = osem_measurements('PM10', exposure = 'mobile') + + # get measurements and cache them locally in the working directory. + # subsequent identical requests will load from the cache instead, ensuring + # reproducibility and saving time and bandwidth! + m = osem_measurements('PM10', exposure = 'mobile', cache = getwd()) + m = osem_measurements('PM10', exposure = 'mobile', cache = getwd()) + + # get measurements returning a custom selection of columns + m = osem_measurements('PM10', exposure = 'mobile', columns = c( + 'value', + 'boxId', + 'sensorType', + 'lat', + 'lon', + 'height' + )) } \donttest{ - # get measurements from sensors within a bounding box + # get measurements from sensors within a custom WGS84 bounding box bbox = structure(c(7, 51, 8, 52), class = 'bbox') - m2 = osem_measurements(bbox, 'Temperatur') - - points = sf::st_multipoint(matrix(c(7.5, 7.8, 51.7, 52), 2, 2)) - bbox2 = sf::st_bbox(points) - m3 = osem_measurements(bbox2, 'Temperatur', exposure = 'outdoor') + m = osem_measurements(bbox, 'Temperatur') + + # construct a bounding box 12km around berlin using the sf package, + # and get measurements from stations within that box + library(sf) + bbox2 = st_point(c(13.4034, 52.5120)) \%>\% + st_sfc(crs = 4326) \%>\% + st_transform(3857) \%>\% # allow setting a buffer in meters + st_buffer(set_units(12, km)) \%>\% + st_transform(4326) \%>\% # the opensensemap expects WGS 84 + st_bbox() + m = osem_measurements(bbox2, 'Temperatur', exposure = 'outdoor') + + # construct a bounding box from two points, + # and get measurements from stations within that box + points = st_multipoint(matrix(c(7.5, 7.8, 51.7, 52), 2, 2)) + bbox3 = st_bbox(points) + m = osem_measurements(bbox2, 'Temperatur', exposure = 'outdoor') } \donttest{ # get measurements from a set of boxes @@ -83,10 +119,25 @@ a bounding box spanning the whole world. # ...or a single box b = osem_box('57000b8745fd40c8196ad04c') m5 = osem_measurements(b, phenomenon = 'Temperatur') + + # get measurements from a single box on the from the last 40 days. + # requests are paged for long time frames, so the APIs limitation + # does not apply! + library(lubridate) + m1 = osem_measurements( + b, + 'Temperatur', + to = now(), + from = now() - days(40) + ) } } \seealso{ \href{https://docs.opensensemap.org/#api-Measurements-getDataMulti}{openSenseMap API documentation (web)} +\code{\link{osem_box}} + \code{\link{osem_boxes}} + +\code{\link{osem_clear_cache}} }