mirror of
https://github.com/sensebox/opensensmapr
synced 2025-02-18 17:23:57 +01:00
update inst/doc
This commit is contained in:
parent
8d0746b263
commit
8ef52f8f59
12 changed files with 714 additions and 708 deletions
|
@ -9,7 +9,10 @@ library(zoo) # rollmean()
|
|||
## ----download-----------------------------------------------------------------
|
||||
# if you want to see results for a specific subset of boxes,
|
||||
# just specify a filter such as grouptag='ifgi' here
|
||||
boxes = osem_boxes()
|
||||
|
||||
# boxes = osem_boxes(cache = '.')
|
||||
boxes = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
|
||||
|
||||
|
||||
## ----exposure_counts, message=FALSE-------------------------------------------
|
||||
exposure_counts = boxes %>%
|
||||
|
|
|
@ -43,7 +43,10 @@ So the first step is to retrieve *all the boxes*:
|
|||
```{r download}
|
||||
# if you want to see results for a specific subset of boxes,
|
||||
# just specify a filter such as grouptag='ifgi' here
|
||||
boxes = osem_boxes()
|
||||
|
||||
# boxes = osem_boxes(cache = '.')
|
||||
boxes = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
|
||||
|
||||
```
|
||||
|
||||
# Plot count of boxes by time {.tabset}
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -9,18 +9,15 @@ library(zoo) # rollmean()
|
|||
## ----download, results='hide', message=FALSE, warning=FALSE-------------------
|
||||
# if you want to see results for a specific subset of boxes,
|
||||
# just specify a filter such as grouptag='ifgi' here
|
||||
boxes_all = osem_boxes()
|
||||
boxes = boxes_all
|
||||
|
||||
# boxes = osem_boxes(cache = '.')
|
||||
boxes = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
|
||||
|
||||
## -----------------------------------------------------------------------------
|
||||
boxes = filter(boxes, locationtimestamp >= "2022-01-01" & locationtimestamp <="2022-12-31")
|
||||
summary(boxes) -> summary.data.frame
|
||||
|
||||
## ----message=F, warning=F-----------------------------------------------------
|
||||
if (!require('maps')) install.packages('maps')
|
||||
if (!require('maptools')) install.packages('maptools')
|
||||
if (!require('rgeos')) install.packages('rgeos')
|
||||
|
||||
## ---- message=FALSE, warning=FALSE--------------------------------------------
|
||||
plot(boxes)
|
||||
|
||||
## -----------------------------------------------------------------------------
|
||||
|
|
|
@ -45,8 +45,9 @@ So the first step is to retrieve *all the boxes*.
|
|||
```{r download, results='hide', message=FALSE, warning=FALSE}
|
||||
# if you want to see results for a specific subset of boxes,
|
||||
# just specify a filter such as grouptag='ifgi' here
|
||||
boxes_all = osem_boxes()
|
||||
boxes = boxes_all
|
||||
|
||||
# boxes = osem_boxes(cache = '.')
|
||||
boxes = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
|
||||
```
|
||||
# Introduction
|
||||
In the following we just want to have a look at the boxes created in 2022, so we filter for them.
|
||||
|
@ -65,11 +66,7 @@ summary(boxes) -> summary.data.frame
|
|||
Another feature of interest is the spatial distribution of the boxes: `plot()`
|
||||
can help us out here. This function requires a bunch of optional dependencies though.
|
||||
|
||||
```{r message=F, warning=F}
|
||||
if (!require('maps')) install.packages('maps')
|
||||
if (!require('maptools')) install.packages('maptools')
|
||||
if (!require('rgeos')) install.packages('rgeos')
|
||||
|
||||
```{r, message=FALSE, warning=FALSE}
|
||||
plot(boxes)
|
||||
```
|
||||
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -1,20 +1,17 @@
|
|||
## ----setup, include=FALSE-----------------------------------------------------
|
||||
knitr::opts_chunk$set(echo = TRUE)
|
||||
|
||||
## ----results = F--------------------------------------------------------------
|
||||
## ----results = FALSE----------------------------------------------------------
|
||||
library(magrittr)
|
||||
library(opensensmapr)
|
||||
|
||||
all_sensors = osem_boxes()
|
||||
# all_sensors = osem_boxes(cache = '.')
|
||||
all_sensors = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
|
||||
|
||||
## -----------------------------------------------------------------------------
|
||||
summary(all_sensors)
|
||||
|
||||
## ----message=F, warning=F-----------------------------------------------------
|
||||
if (!require('maps')) install.packages('maps')
|
||||
if (!require('maptools')) install.packages('maptools')
|
||||
if (!require('rgeos')) install.packages('rgeos')
|
||||
|
||||
## ---- message=FALSE, warning=FALSE--------------------------------------------
|
||||
plot(all_sensors)
|
||||
|
||||
## -----------------------------------------------------------------------------
|
||||
|
@ -24,49 +21,54 @@ str(phenoms)
|
|||
## -----------------------------------------------------------------------------
|
||||
phenoms[phenoms > 20]
|
||||
|
||||
## ----results = F--------------------------------------------------------------
|
||||
pm25_sensors = osem_boxes(
|
||||
exposure = 'outdoor',
|
||||
date = Sys.time(), # ±4 hours
|
||||
phenomenon = 'PM2.5'
|
||||
)
|
||||
## ----results = FALSE, eval=FALSE----------------------------------------------
|
||||
# pm25_sensors = osem_boxes(
|
||||
# exposure = 'outdoor',
|
||||
# date = Sys.time(), # ±4 hours
|
||||
# phenomenon = 'PM2.5'
|
||||
# )
|
||||
|
||||
## -----------------------------------------------------------------------------
|
||||
pm25_sensors = readRDS('pm25_sensors.rds') # read precomputed file to save resources
|
||||
|
||||
summary(pm25_sensors)
|
||||
plot(pm25_sensors)
|
||||
|
||||
## -----------------------------------------------------------------------------
|
||||
## ---- results=FALSE, message=FALSE--------------------------------------------
|
||||
library(sf)
|
||||
library(units)
|
||||
library(lubridate)
|
||||
library(dplyr)
|
||||
|
||||
# construct a bounding box: 12 kilometers around Berlin
|
||||
berlin = st_point(c(13.4034, 52.5120)) %>%
|
||||
st_sfc(crs = 4326) %>%
|
||||
st_transform(3857) %>% # allow setting a buffer in meters
|
||||
st_buffer(set_units(12, km)) %>%
|
||||
st_transform(4326) %>% # the opensensemap expects WGS 84
|
||||
st_bbox()
|
||||
|
||||
## ----results = F--------------------------------------------------------------
|
||||
pm25 = osem_measurements(
|
||||
berlin,
|
||||
phenomenon = 'PM2.5',
|
||||
from = now() - days(3), # defaults to 2 days
|
||||
to = now()
|
||||
)
|
||||
|
||||
plot(pm25)
|
||||
## ----bbox, results = FALSE, eval=FALSE----------------------------------------
|
||||
# # construct a bounding box: 12 kilometers around Berlin
|
||||
# berlin = st_point(c(13.4034, 52.5120)) %>%
|
||||
# st_sfc(crs = 4326) %>%
|
||||
# st_transform(3857) %>% # allow setting a buffer in meters
|
||||
# st_buffer(set_units(12, km)) %>%
|
||||
# st_transform(4326) %>% # the opensensemap expects WGS 84
|
||||
# st_bbox()
|
||||
# pm25 = osem_measurements(
|
||||
# berlin,
|
||||
# phenomenon = 'PM2.5',
|
||||
# from = now() - days(3), # defaults to 2 days
|
||||
# to = now()
|
||||
# )
|
||||
#
|
||||
|
||||
## -----------------------------------------------------------------------------
|
||||
pm25 = readRDS('pm25_berlin.rds') # read precomputed file to save resources
|
||||
plot(pm25)
|
||||
|
||||
## ---- warning=FALSE-----------------------------------------------------------
|
||||
outliers = filter(pm25, value > 100)$sensorId
|
||||
bad_sensors = outliers[, drop = T] %>% levels()
|
||||
bad_sensors = outliers[, drop = TRUE] %>% levels()
|
||||
|
||||
pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors)
|
||||
|
||||
## -----------------------------------------------------------------------------
|
||||
st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = T)
|
||||
st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = TRUE)
|
||||
|
||||
## -----------------------------------------------------------------------------
|
||||
pm25 %>% filter(invalid == FALSE) %>% plot()
|
||||
|
|
|
@ -28,11 +28,12 @@ Its main goals are to provide means for:
|
|||
Before we look at actual observations, lets get a grasp of the openSenseMap
|
||||
datasets' structure.
|
||||
|
||||
```{r results = F}
|
||||
```{r results = FALSE}
|
||||
library(magrittr)
|
||||
library(opensensmapr)
|
||||
|
||||
all_sensors = osem_boxes()
|
||||
# all_sensors = osem_boxes(cache = '.')
|
||||
all_sensors = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
|
||||
```
|
||||
```{r}
|
||||
summary(all_sensors)
|
||||
|
@ -47,11 +48,7 @@ couple of minutes ago.
|
|||
Another feature of interest is the spatial distribution of the boxes: `plot()`
|
||||
can help us out here. This function requires a bunch of optional dependencies though.
|
||||
|
||||
```{r message=F, warning=F}
|
||||
if (!require('maps')) install.packages('maps')
|
||||
if (!require('maptools')) install.packages('maptools')
|
||||
if (!require('rgeos')) install.packages('rgeos')
|
||||
|
||||
```{r, message=FALSE, warning=FALSE}
|
||||
plot(all_sensors)
|
||||
```
|
||||
|
||||
|
@ -81,7 +78,7 @@ We should check how many sensor stations provide useful data: We want only those
|
|||
boxes with a PM2.5 sensor, that are placed outdoors and are currently submitting
|
||||
measurements:
|
||||
|
||||
```{r results = F}
|
||||
```{r results = FALSE, eval=FALSE}
|
||||
pm25_sensors = osem_boxes(
|
||||
exposure = 'outdoor',
|
||||
date = Sys.time(), # ±4 hours
|
||||
|
@ -89,6 +86,8 @@ pm25_sensors = osem_boxes(
|
|||
)
|
||||
```
|
||||
```{r}
|
||||
pm25_sensors = readRDS('pm25_sensors.rds') # read precomputed file to save resources
|
||||
|
||||
summary(pm25_sensors)
|
||||
plot(pm25_sensors)
|
||||
```
|
||||
|
@ -101,12 +100,16 @@ We could call `osem_measurements(pm25_sensors)` now, however we are focusing on
|
|||
a restricted area of interest, the city of Berlin.
|
||||
Luckily we can get the measurements filtered by a bounding box:
|
||||
|
||||
```{r}
|
||||
```{r, results=FALSE, message=FALSE}
|
||||
library(sf)
|
||||
library(units)
|
||||
library(lubridate)
|
||||
library(dplyr)
|
||||
|
||||
```
|
||||
|
||||
Since the API takes quite long to response measurements, especially filtered on space and time, we do not run the following chunks for publication of the package on CRAN.
|
||||
```{r bbox, results = FALSE, eval=FALSE}
|
||||
# construct a bounding box: 12 kilometers around Berlin
|
||||
berlin = st_point(c(13.4034, 52.5120)) %>%
|
||||
st_sfc(crs = 4326) %>%
|
||||
|
@ -114,8 +117,6 @@ berlin = st_point(c(13.4034, 52.5120)) %>%
|
|||
st_buffer(set_units(12, km)) %>%
|
||||
st_transform(4326) %>% # the opensensemap expects WGS 84
|
||||
st_bbox()
|
||||
```
|
||||
```{r results = F}
|
||||
pm25 = osem_measurements(
|
||||
berlin,
|
||||
phenomenon = 'PM2.5',
|
||||
|
@ -123,15 +124,19 @@ pm25 = osem_measurements(
|
|||
to = now()
|
||||
)
|
||||
|
||||
```
|
||||
|
||||
```{r}
|
||||
pm25 = readRDS('pm25_berlin.rds') # read precomputed file to save resources
|
||||
plot(pm25)
|
||||
```
|
||||
|
||||
Now we can get started with actual spatiotemporal data analysis.
|
||||
First, lets mask the seemingly uncalibrated sensors:
|
||||
|
||||
```{r}
|
||||
```{r, warning=FALSE}
|
||||
outliers = filter(pm25, value > 100)$sensorId
|
||||
bad_sensors = outliers[, drop = T] %>% levels()
|
||||
bad_sensors = outliers[, drop = TRUE] %>% levels()
|
||||
|
||||
pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors)
|
||||
```
|
||||
|
@ -139,7 +144,7 @@ pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors)
|
|||
Then plot the measuring locations, flagging the outliers:
|
||||
|
||||
```{r}
|
||||
st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = T)
|
||||
st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = TRUE)
|
||||
```
|
||||
|
||||
Removing these sensors yields a nicer time series plot:
|
||||
|
|
File diff suppressed because one or more lines are too long
|
@ -27,25 +27,25 @@ b = osem_boxes(grouptag = 'ifgi', cache = cacheDir)
|
|||
osem_clear_cache() # clears default cache
|
||||
osem_clear_cache(getwd()) # clears a custom cache
|
||||
|
||||
## ----data, results='hide'-----------------------------------------------------
|
||||
# first get our example data:
|
||||
measurements = osem_measurements('Windgeschwindigkeit')
|
||||
## ----data, results='hide', eval=FALSE-----------------------------------------
|
||||
# # first get our example data:
|
||||
# measurements = osem_measurements('Windgeschwindigkeit')
|
||||
|
||||
## ----serialize_json-----------------------------------------------------------
|
||||
# serializing senseBoxes to JSON, and loading from file again:
|
||||
write(jsonlite::serializeJSON(measurements), 'measurements.json')
|
||||
measurements_from_file = jsonlite::unserializeJSON(readr::read_file('measurements.json'))
|
||||
class(measurements_from_file)
|
||||
## ----serialize_json, eval=FALSE-----------------------------------------------
|
||||
# # serializing senseBoxes to JSON, and loading from file again:
|
||||
# write(jsonlite::serializeJSON(measurements), 'measurements.json')
|
||||
# measurements_from_file = jsonlite::unserializeJSON(readr::read_file('measurements.json'))
|
||||
# class(measurements_from_file)
|
||||
|
||||
## ----serialize_attrs----------------------------------------------------------
|
||||
# note the toJSON call instead of serializeJSON
|
||||
write(jsonlite::toJSON(measurements), 'measurements_bad.json')
|
||||
measurements_without_attrs = jsonlite::fromJSON('measurements_bad.json')
|
||||
class(measurements_without_attrs)
|
||||
## ----serialize_attrs, eval=FALSE----------------------------------------------
|
||||
# # note the toJSON call instead of serializeJSON
|
||||
# write(jsonlite::toJSON(measurements), 'measurements_bad.json')
|
||||
# measurements_without_attrs = jsonlite::fromJSON('measurements_bad.json')
|
||||
# class(measurements_without_attrs)
|
||||
#
|
||||
# measurements_with_attrs = osem_as_measurements(measurements_without_attrs)
|
||||
# class(measurements_with_attrs)
|
||||
|
||||
measurements_with_attrs = osem_as_measurements(measurements_without_attrs)
|
||||
class(measurements_with_attrs)
|
||||
|
||||
## ----cleanup, include=FALSE---------------------------------------------------
|
||||
file.remove('measurements.json', 'measurements_bad.json')
|
||||
## ----cleanup, include=FALSE, eval=FALSE---------------------------------------
|
||||
# file.remove('measurements.json', 'measurements_bad.json')
|
||||
|
||||
|
|
|
@ -71,7 +71,7 @@ osem_clear_cache(getwd()) # clears a custom cache
|
|||
If you want to roll your own serialization method to support custom data formats,
|
||||
here's how:
|
||||
|
||||
```{r data, results='hide'}
|
||||
```{r data, results='hide', eval=FALSE}
|
||||
# first get our example data:
|
||||
measurements = osem_measurements('Windgeschwindigkeit')
|
||||
```
|
||||
|
@ -79,7 +79,7 @@ measurements = osem_measurements('Windgeschwindigkeit')
|
|||
If you are paranoid and worry about `.rds` files not being decodable anymore
|
||||
in the (distant) future, you could serialize to a plain text format such as JSON.
|
||||
This of course comes at the cost of storage space and performance.
|
||||
```{r serialize_json}
|
||||
```{r serialize_json, eval=FALSE}
|
||||
# serializing senseBoxes to JSON, and loading from file again:
|
||||
write(jsonlite::serializeJSON(measurements), 'measurements.json')
|
||||
measurements_from_file = jsonlite::unserializeJSON(readr::read_file('measurements.json'))
|
||||
|
@ -90,7 +90,7 @@ This method also persists the R object metadata (classes, attributes).
|
|||
If you were to use a serialization method that can't persist object metadata, you
|
||||
could re-apply it with the following functions:
|
||||
|
||||
```{r serialize_attrs}
|
||||
```{r serialize_attrs, eval=FALSE}
|
||||
# note the toJSON call instead of serializeJSON
|
||||
write(jsonlite::toJSON(measurements), 'measurements_bad.json')
|
||||
measurements_without_attrs = jsonlite::fromJSON('measurements_bad.json')
|
||||
|
@ -101,6 +101,6 @@ class(measurements_with_attrs)
|
|||
```
|
||||
The same goes for boxes via `osem_as_sensebox()`.
|
||||
|
||||
```{r cleanup, include=FALSE}
|
||||
```{r cleanup, include=FALSE, eval=FALSE}
|
||||
file.remove('measurements.json', 'measurements_bad.json')
|
||||
```
|
||||
|
|
|
@ -12,7 +12,7 @@
|
|||
|
||||
<meta name="author" content="Norwin Roosen" />
|
||||
|
||||
<meta name="date" content="2023-02-23" />
|
||||
<meta name="date" content="2023-03-08" />
|
||||
|
||||
<title>Caching openSenseMap Data for Reproducibility</title>
|
||||
|
||||
|
@ -341,7 +341,7 @@ code > span.er { color: #a61717; background-color: #e3d2d2; }
|
|||
<h1 class="title toc-ignore">Caching openSenseMap Data for
|
||||
Reproducibility</h1>
|
||||
<h4 class="author">Norwin Roosen</h4>
|
||||
<h4 class="date">2023-02-23</h4>
|
||||
<h4 class="date">2023-03-08</h4>
|
||||
|
||||
|
||||
|
||||
|
@ -407,21 +407,17 @@ storage space and performance.</p>
|
|||
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a><span class="fu">write</span>(jsonlite<span class="sc">::</span><span class="fu">serializeJSON</span>(measurements), <span class="st">'measurements.json'</span>)</span>
|
||||
<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>measurements_from_file <span class="ot">=</span> jsonlite<span class="sc">::</span><span class="fu">unserializeJSON</span>(readr<span class="sc">::</span><span class="fu">read_file</span>(<span class="st">'measurements.json'</span>))</span>
|
||||
<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a><span class="fu">class</span>(measurements_from_file)</span></code></pre></div>
|
||||
<pre><code>## [1] "osem_measurements" "tbl_df" "tbl"
|
||||
## [4] "data.frame"</code></pre>
|
||||
<p>This method also persists the R object metadata (classes,
|
||||
attributes). If you were to use a serialization method that can’t
|
||||
persist object metadata, you could re-apply it with the following
|
||||
functions:</p>
|
||||
<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="co"># note the toJSON call instead of serializeJSON</span></span>
|
||||
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a><span class="fu">write</span>(jsonlite<span class="sc">::</span><span class="fu">toJSON</span>(measurements), <span class="st">'measurements_bad.json'</span>)</span>
|
||||
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>measurements_without_attrs <span class="ot">=</span> jsonlite<span class="sc">::</span><span class="fu">fromJSON</span>(<span class="st">'measurements_bad.json'</span>)</span>
|
||||
<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a><span class="fu">class</span>(measurements_without_attrs)</span></code></pre></div>
|
||||
<pre><code>## [1] "data.frame"</code></pre>
|
||||
<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>measurements_with_attrs <span class="ot">=</span> <span class="fu">osem_as_measurements</span>(measurements_without_attrs)</span>
|
||||
<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="fu">class</span>(measurements_with_attrs)</span></code></pre></div>
|
||||
<pre><code>## [1] "osem_measurements" "tbl_df" "tbl"
|
||||
## [4] "data.frame"</code></pre>
|
||||
<div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co"># note the toJSON call instead of serializeJSON</span></span>
|
||||
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="fu">write</span>(jsonlite<span class="sc">::</span><span class="fu">toJSON</span>(measurements), <span class="st">'measurements_bad.json'</span>)</span>
|
||||
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>measurements_without_attrs <span class="ot">=</span> jsonlite<span class="sc">::</span><span class="fu">fromJSON</span>(<span class="st">'measurements_bad.json'</span>)</span>
|
||||
<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a><span class="fu">class</span>(measurements_without_attrs)</span>
|
||||
<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a></span>
|
||||
<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a>measurements_with_attrs <span class="ot">=</span> <span class="fu">osem_as_measurements</span>(measurements_without_attrs)</span>
|
||||
<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a><span class="fu">class</span>(measurements_with_attrs)</span></code></pre></div>
|
||||
<p>The same goes for boxes via <code>osem_as_sensebox()</code>.</p>
|
||||
</div>
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue