update inst/doc

master v0.6.0
jan 1 year ago
parent 8d0746b263
commit 8ef52f8f59

@ -9,7 +9,10 @@ library(zoo) # rollmean()
## ----download-----------------------------------------------------------------
# if you want to see results for a specific subset of boxes,
# just specify a filter such as grouptag='ifgi' here
boxes = osem_boxes()
# boxes = osem_boxes(cache = '.')
boxes = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
## ----exposure_counts, message=FALSE-------------------------------------------
exposure_counts = boxes %>%

@ -43,7 +43,10 @@ So the first step is to retrieve *all the boxes*:
```{r download}
# if you want to see results for a specific subset of boxes,
# just specify a filter such as grouptag='ifgi' here
boxes = osem_boxes()
# boxes = osem_boxes(cache = '.')
boxes = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
```
# Plot count of boxes by time {.tabset}

File diff suppressed because one or more lines are too long

@ -9,18 +9,15 @@ library(zoo) # rollmean()
## ----download, results='hide', message=FALSE, warning=FALSE-------------------
# if you want to see results for a specific subset of boxes,
# just specify a filter such as grouptag='ifgi' here
boxes_all = osem_boxes()
boxes = boxes_all
# boxes = osem_boxes(cache = '.')
boxes = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
## -----------------------------------------------------------------------------
boxes = filter(boxes, locationtimestamp >= "2022-01-01" & locationtimestamp <="2022-12-31")
summary(boxes) -> summary.data.frame
## ----message=F, warning=F-----------------------------------------------------
if (!require('maps')) install.packages('maps')
if (!require('maptools')) install.packages('maptools')
if (!require('rgeos')) install.packages('rgeos')
## ---- message=FALSE, warning=FALSE--------------------------------------------
plot(boxes)
## -----------------------------------------------------------------------------

@ -45,8 +45,9 @@ So the first step is to retrieve *all the boxes*.
```{r download, results='hide', message=FALSE, warning=FALSE}
# if you want to see results for a specific subset of boxes,
# just specify a filter such as grouptag='ifgi' here
boxes_all = osem_boxes()
boxes = boxes_all
# boxes = osem_boxes(cache = '.')
boxes = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
```
# Introduction
In the following we just want to have a look at the boxes created in 2022, so we filter for them.
@ -65,11 +66,7 @@ summary(boxes) -> summary.data.frame
Another feature of interest is the spatial distribution of the boxes: `plot()`
can help us out here. This function requires a bunch of optional dependencies though.
```{r message=F, warning=F}
if (!require('maps')) install.packages('maps')
if (!require('maptools')) install.packages('maptools')
if (!require('rgeos')) install.packages('rgeos')
```{r, message=FALSE, warning=FALSE}
plot(boxes)
```

File diff suppressed because one or more lines are too long

@ -1,20 +1,17 @@
## ----setup, include=FALSE-----------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)
## ----results = F--------------------------------------------------------------
## ----results = FALSE----------------------------------------------------------
library(magrittr)
library(opensensmapr)
all_sensors = osem_boxes()
# all_sensors = osem_boxes(cache = '.')
all_sensors = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
## -----------------------------------------------------------------------------
summary(all_sensors)
## ----message=F, warning=F-----------------------------------------------------
if (!require('maps')) install.packages('maps')
if (!require('maptools')) install.packages('maptools')
if (!require('rgeos')) install.packages('rgeos')
## ---- message=FALSE, warning=FALSE--------------------------------------------
plot(all_sensors)
## -----------------------------------------------------------------------------
@ -24,49 +21,54 @@ str(phenoms)
## -----------------------------------------------------------------------------
phenoms[phenoms > 20]
## ----results = F--------------------------------------------------------------
pm25_sensors = osem_boxes(
exposure = 'outdoor',
date = Sys.time(), # ±4 hours
phenomenon = 'PM2.5'
)
## ----results = FALSE, eval=FALSE----------------------------------------------
# pm25_sensors = osem_boxes(
# exposure = 'outdoor',
# date = Sys.time(), # ±4 hours
# phenomenon = 'PM2.5'
# )
## -----------------------------------------------------------------------------
pm25_sensors = readRDS('pm25_sensors.rds') # read precomputed file to save resources
summary(pm25_sensors)
plot(pm25_sensors)
## -----------------------------------------------------------------------------
## ---- results=FALSE, message=FALSE--------------------------------------------
library(sf)
library(units)
library(lubridate)
library(dplyr)
# construct a bounding box: 12 kilometers around Berlin
berlin = st_point(c(13.4034, 52.5120)) %>%
st_sfc(crs = 4326) %>%
st_transform(3857) %>% # allow setting a buffer in meters
st_buffer(set_units(12, km)) %>%
st_transform(4326) %>% # the opensensemap expects WGS 84
st_bbox()
## ----results = F--------------------------------------------------------------
pm25 = osem_measurements(
berlin,
phenomenon = 'PM2.5',
from = now() - days(3), # defaults to 2 days
to = now()
)
plot(pm25)
## ----bbox, results = FALSE, eval=FALSE----------------------------------------
# # construct a bounding box: 12 kilometers around Berlin
# berlin = st_point(c(13.4034, 52.5120)) %>%
# st_sfc(crs = 4326) %>%
# st_transform(3857) %>% # allow setting a buffer in meters
# st_buffer(set_units(12, km)) %>%
# st_transform(4326) %>% # the opensensemap expects WGS 84
# st_bbox()
# pm25 = osem_measurements(
# berlin,
# phenomenon = 'PM2.5',
# from = now() - days(3), # defaults to 2 days
# to = now()
# )
#
## -----------------------------------------------------------------------------
pm25 = readRDS('pm25_berlin.rds') # read precomputed file to save resources
plot(pm25)
## ---- warning=FALSE-----------------------------------------------------------
outliers = filter(pm25, value > 100)$sensorId
bad_sensors = outliers[, drop = T] %>% levels()
bad_sensors = outliers[, drop = TRUE] %>% levels()
pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors)
## -----------------------------------------------------------------------------
st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = T)
st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = TRUE)
## -----------------------------------------------------------------------------
pm25 %>% filter(invalid == FALSE) %>% plot()

@ -28,11 +28,12 @@ Its main goals are to provide means for:
Before we look at actual observations, lets get a grasp of the openSenseMap
datasets' structure.
```{r results = F}
```{r results = FALSE}
library(magrittr)
library(opensensmapr)
all_sensors = osem_boxes()
# all_sensors = osem_boxes(cache = '.')
all_sensors = readRDS('boxes_precomputed.rds') # read precomputed file to save resources
```
```{r}
summary(all_sensors)
@ -47,11 +48,7 @@ couple of minutes ago.
Another feature of interest is the spatial distribution of the boxes: `plot()`
can help us out here. This function requires a bunch of optional dependencies though.
```{r message=F, warning=F}
if (!require('maps')) install.packages('maps')
if (!require('maptools')) install.packages('maptools')
if (!require('rgeos')) install.packages('rgeos')
```{r, message=FALSE, warning=FALSE}
plot(all_sensors)
```
@ -81,7 +78,7 @@ We should check how many sensor stations provide useful data: We want only those
boxes with a PM2.5 sensor, that are placed outdoors and are currently submitting
measurements:
```{r results = F}
```{r results = FALSE, eval=FALSE}
pm25_sensors = osem_boxes(
exposure = 'outdoor',
date = Sys.time(), # ±4 hours
@ -89,6 +86,8 @@ pm25_sensors = osem_boxes(
)
```
```{r}
pm25_sensors = readRDS('pm25_sensors.rds') # read precomputed file to save resources
summary(pm25_sensors)
plot(pm25_sensors)
```
@ -101,12 +100,16 @@ We could call `osem_measurements(pm25_sensors)` now, however we are focusing on
a restricted area of interest, the city of Berlin.
Luckily we can get the measurements filtered by a bounding box:
```{r}
```{r, results=FALSE, message=FALSE}
library(sf)
library(units)
library(lubridate)
library(dplyr)
```
Since the API takes quite long to response measurements, especially filtered on space and time, we do not run the following chunks for publication of the package on CRAN.
```{r bbox, results = FALSE, eval=FALSE}
# construct a bounding box: 12 kilometers around Berlin
berlin = st_point(c(13.4034, 52.5120)) %>%
st_sfc(crs = 4326) %>%
@ -114,8 +117,6 @@ berlin = st_point(c(13.4034, 52.5120)) %>%
st_buffer(set_units(12, km)) %>%
st_transform(4326) %>% # the opensensemap expects WGS 84
st_bbox()
```
```{r results = F}
pm25 = osem_measurements(
berlin,
phenomenon = 'PM2.5',
@ -123,15 +124,19 @@ pm25 = osem_measurements(
to = now()
)
```
```{r}
pm25 = readRDS('pm25_berlin.rds') # read precomputed file to save resources
plot(pm25)
```
Now we can get started with actual spatiotemporal data analysis.
First, lets mask the seemingly uncalibrated sensors:
```{r}
```{r, warning=FALSE}
outliers = filter(pm25, value > 100)$sensorId
bad_sensors = outliers[, drop = T] %>% levels()
bad_sensors = outliers[, drop = TRUE] %>% levels()
pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors)
```
@ -139,7 +144,7 @@ pm25 = mutate(pm25, invalid = sensorId %in% bad_sensors)
Then plot the measuring locations, flagging the outliers:
```{r}
st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = T)
st_as_sf(pm25) %>% st_geometry() %>% plot(col = factor(pm25$invalid), axes = TRUE)
```
Removing these sensors yields a nicer time series plot:

File diff suppressed because one or more lines are too long

@ -27,25 +27,25 @@ b = osem_boxes(grouptag = 'ifgi', cache = cacheDir)
osem_clear_cache() # clears default cache
osem_clear_cache(getwd()) # clears a custom cache
## ----data, results='hide'-----------------------------------------------------
# first get our example data:
measurements = osem_measurements('Windgeschwindigkeit')
## ----serialize_json-----------------------------------------------------------
# serializing senseBoxes to JSON, and loading from file again:
write(jsonlite::serializeJSON(measurements), 'measurements.json')
measurements_from_file = jsonlite::unserializeJSON(readr::read_file('measurements.json'))
class(measurements_from_file)
## ----serialize_attrs----------------------------------------------------------
# note the toJSON call instead of serializeJSON
write(jsonlite::toJSON(measurements), 'measurements_bad.json')
measurements_without_attrs = jsonlite::fromJSON('measurements_bad.json')
class(measurements_without_attrs)
measurements_with_attrs = osem_as_measurements(measurements_without_attrs)
class(measurements_with_attrs)
## ----cleanup, include=FALSE---------------------------------------------------
file.remove('measurements.json', 'measurements_bad.json')
## ----data, results='hide', eval=FALSE-----------------------------------------
# # first get our example data:
# measurements = osem_measurements('Windgeschwindigkeit')
## ----serialize_json, eval=FALSE-----------------------------------------------
# # serializing senseBoxes to JSON, and loading from file again:
# write(jsonlite::serializeJSON(measurements), 'measurements.json')
# measurements_from_file = jsonlite::unserializeJSON(readr::read_file('measurements.json'))
# class(measurements_from_file)
## ----serialize_attrs, eval=FALSE----------------------------------------------
# # note the toJSON call instead of serializeJSON
# write(jsonlite::toJSON(measurements), 'measurements_bad.json')
# measurements_without_attrs = jsonlite::fromJSON('measurements_bad.json')
# class(measurements_without_attrs)
#
# measurements_with_attrs = osem_as_measurements(measurements_without_attrs)
# class(measurements_with_attrs)
## ----cleanup, include=FALSE, eval=FALSE---------------------------------------
# file.remove('measurements.json', 'measurements_bad.json')

@ -71,7 +71,7 @@ osem_clear_cache(getwd()) # clears a custom cache
If you want to roll your own serialization method to support custom data formats,
here's how:
```{r data, results='hide'}
```{r data, results='hide', eval=FALSE}
# first get our example data:
measurements = osem_measurements('Windgeschwindigkeit')
```
@ -79,7 +79,7 @@ measurements = osem_measurements('Windgeschwindigkeit')
If you are paranoid and worry about `.rds` files not being decodable anymore
in the (distant) future, you could serialize to a plain text format such as JSON.
This of course comes at the cost of storage space and performance.
```{r serialize_json}
```{r serialize_json, eval=FALSE}
# serializing senseBoxes to JSON, and loading from file again:
write(jsonlite::serializeJSON(measurements), 'measurements.json')
measurements_from_file = jsonlite::unserializeJSON(readr::read_file('measurements.json'))
@ -90,7 +90,7 @@ This method also persists the R object metadata (classes, attributes).
If you were to use a serialization method that can't persist object metadata, you
could re-apply it with the following functions:
```{r serialize_attrs}
```{r serialize_attrs, eval=FALSE}
# note the toJSON call instead of serializeJSON
write(jsonlite::toJSON(measurements), 'measurements_bad.json')
measurements_without_attrs = jsonlite::fromJSON('measurements_bad.json')
@ -101,6 +101,6 @@ class(measurements_with_attrs)
```
The same goes for boxes via `osem_as_sensebox()`.
```{r cleanup, include=FALSE}
```{r cleanup, include=FALSE, eval=FALSE}
file.remove('measurements.json', 'measurements_bad.json')
```

@ -12,7 +12,7 @@
<meta name="author" content="Norwin Roosen" />
<meta name="date" content="2023-02-23" />
<meta name="date" content="2023-03-08" />
<title>Caching openSenseMap Data for Reproducibility</title>
@ -341,7 +341,7 @@ code > span.er { color: #a61717; background-color: #e3d2d2; }
<h1 class="title toc-ignore">Caching openSenseMap Data for
Reproducibility</h1>
<h4 class="author">Norwin Roosen</h4>
<h4 class="date">2023-02-23</h4>
<h4 class="date">2023-03-08</h4>
@ -407,21 +407,17 @@ storage space and performance.</p>
<span id="cb8-2"><a href="#cb8-2" aria-hidden="true" tabindex="-1"></a><span class="fu">write</span>(jsonlite<span class="sc">::</span><span class="fu">serializeJSON</span>(measurements), <span class="st">&#39;measurements.json&#39;</span>)</span>
<span id="cb8-3"><a href="#cb8-3" aria-hidden="true" tabindex="-1"></a>measurements_from_file <span class="ot">=</span> jsonlite<span class="sc">::</span><span class="fu">unserializeJSON</span>(readr<span class="sc">::</span><span class="fu">read_file</span>(<span class="st">&#39;measurements.json&#39;</span>))</span>
<span id="cb8-4"><a href="#cb8-4" aria-hidden="true" tabindex="-1"></a><span class="fu">class</span>(measurements_from_file)</span></code></pre></div>
<pre><code>## [1] &quot;osem_measurements&quot; &quot;tbl_df&quot; &quot;tbl&quot;
## [4] &quot;data.frame&quot;</code></pre>
<p>This method also persists the R object metadata (classes,
attributes). If you were to use a serialization method that cant
persist object metadata, you could re-apply it with the following
functions:</p>
<div class="sourceCode" id="cb10"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb10-1"><a href="#cb10-1" aria-hidden="true" tabindex="-1"></a><span class="co"># note the toJSON call instead of serializeJSON</span></span>
<span id="cb10-2"><a href="#cb10-2" aria-hidden="true" tabindex="-1"></a><span class="fu">write</span>(jsonlite<span class="sc">::</span><span class="fu">toJSON</span>(measurements), <span class="st">&#39;measurements_bad.json&#39;</span>)</span>
<span id="cb10-3"><a href="#cb10-3" aria-hidden="true" tabindex="-1"></a>measurements_without_attrs <span class="ot">=</span> jsonlite<span class="sc">::</span><span class="fu">fromJSON</span>(<span class="st">&#39;measurements_bad.json&#39;</span>)</span>
<span id="cb10-4"><a href="#cb10-4" aria-hidden="true" tabindex="-1"></a><span class="fu">class</span>(measurements_without_attrs)</span></code></pre></div>
<pre><code>## [1] &quot;data.frame&quot;</code></pre>
<div class="sourceCode" id="cb12"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb12-1"><a href="#cb12-1" aria-hidden="true" tabindex="-1"></a>measurements_with_attrs <span class="ot">=</span> <span class="fu">osem_as_measurements</span>(measurements_without_attrs)</span>
<span id="cb12-2"><a href="#cb12-2" aria-hidden="true" tabindex="-1"></a><span class="fu">class</span>(measurements_with_attrs)</span></code></pre></div>
<pre><code>## [1] &quot;osem_measurements&quot; &quot;tbl_df&quot; &quot;tbl&quot;
## [4] &quot;data.frame&quot;</code></pre>
<div class="sourceCode" id="cb9"><pre class="sourceCode r"><code class="sourceCode r"><span id="cb9-1"><a href="#cb9-1" aria-hidden="true" tabindex="-1"></a><span class="co"># note the toJSON call instead of serializeJSON</span></span>
<span id="cb9-2"><a href="#cb9-2" aria-hidden="true" tabindex="-1"></a><span class="fu">write</span>(jsonlite<span class="sc">::</span><span class="fu">toJSON</span>(measurements), <span class="st">&#39;measurements_bad.json&#39;</span>)</span>
<span id="cb9-3"><a href="#cb9-3" aria-hidden="true" tabindex="-1"></a>measurements_without_attrs <span class="ot">=</span> jsonlite<span class="sc">::</span><span class="fu">fromJSON</span>(<span class="st">&#39;measurements_bad.json&#39;</span>)</span>
<span id="cb9-4"><a href="#cb9-4" aria-hidden="true" tabindex="-1"></a><span class="fu">class</span>(measurements_without_attrs)</span>
<span id="cb9-5"><a href="#cb9-5" aria-hidden="true" tabindex="-1"></a></span>
<span id="cb9-6"><a href="#cb9-6" aria-hidden="true" tabindex="-1"></a>measurements_with_attrs <span class="ot">=</span> <span class="fu">osem_as_measurements</span>(measurements_without_attrs)</span>
<span id="cb9-7"><a href="#cb9-7" aria-hidden="true" tabindex="-1"></a><span class="fu">class</span>(measurements_with_attrs)</span></code></pre></div>
<p>The same goes for boxes via <code>osem_as_sensebox()</code>.</p>
</div>

Loading…
Cancel
Save