You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
opensensmapR/inst/doc/osem-history.html

502 lines
188 KiB
HTML

<!DOCTYPE html>
<html xmlns="http://www.w3.org/1999/xhtml">
<head>
<meta charset="utf-8" />
<meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
<meta name="generator" content="pandoc" />
<meta name="viewport" content="width=device-width, initial-scale=1">
<meta name="author" content="Norwin Roosen" />
<meta name="date" content="2018-05-26" />
<title>Visualising the History of openSenseMap.org</title>
<style type="text/css">code{white-space: pre;}</style>
<style type="text/css">
div.sourceCode { overflow-x: auto; }
table.sourceCode, tr.sourceCode, td.lineNumbers, td.sourceCode {
margin: 0; padding: 0; vertical-align: baseline; border: none; }
table.sourceCode { width: 100%; line-height: 100%; }
td.lineNumbers { text-align: right; padding-right: 4px; padding-left: 4px; color: #aaaaaa; border-right: 1px solid #aaaaaa; }
td.sourceCode { padding-left: 5px; }
code > span.kw { color: #007020; font-weight: bold; } /* Keyword */
code > span.dt { color: #902000; } /* DataType */
code > span.dv { color: #40a070; } /* DecVal */
code > span.bn { color: #40a070; } /* BaseN */
code > span.fl { color: #40a070; } /* Float */
code > span.ch { color: #4070a0; } /* Char */
code > span.st { color: #4070a0; } /* String */
code > span.co { color: #60a0b0; font-style: italic; } /* Comment */
code > span.ot { color: #007020; } /* Other */
code > span.al { color: #ff0000; font-weight: bold; } /* Alert */
code > span.fu { color: #06287e; } /* Function */
code > span.er { color: #ff0000; font-weight: bold; } /* Error */
code > span.wa { color: #60a0b0; font-weight: bold; font-style: italic; } /* Warning */
code > span.cn { color: #880000; } /* Constant */
code > span.sc { color: #4070a0; } /* SpecialChar */
code > span.vs { color: #4070a0; } /* VerbatimString */
code > span.ss { color: #bb6688; } /* SpecialString */
code > span.im { } /* Import */
code > span.va { color: #19177c; } /* Variable */
code > span.cf { color: #007020; font-weight: bold; } /* ControlFlow */
code > span.op { color: #666666; } /* Operator */
code > span.bu { } /* BuiltIn */
code > span.ex { } /* Extension */
code > span.pp { color: #bc7a00; } /* Preprocessor */
code > span.at { color: #7d9029; } /* Attribute */
code > span.do { color: #ba2121; font-style: italic; } /* Documentation */
code > span.an { color: #60a0b0; font-weight: bold; font-style: italic; } /* Annotation */
code > span.cv { color: #60a0b0; font-weight: bold; font-style: italic; } /* CommentVar */
code > span.in { color: #60a0b0; font-weight: bold; font-style: italic; } /* Information */
</style>
<link href="data:text/css;charset=utf-8,body%20%7B%0Abackground%2Dcolor%3A%20%23fff%3B%0Amargin%3A%201em%20auto%3B%0Amax%2Dwidth%3A%20700px%3B%0Aoverflow%3A%20visible%3B%0Apadding%2Dleft%3A%202em%3B%0Apadding%2Dright%3A%202em%3B%0Afont%2Dfamily%3A%20%22Open%20Sans%22%2C%20%22Helvetica%20Neue%22%2C%20Helvetica%2C%20Arial%2C%20sans%2Dserif%3B%0Afont%2Dsize%3A%2014px%3B%0Aline%2Dheight%3A%201%2E35%3B%0A%7D%0A%23header%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0A%23TOC%20%7B%0Aclear%3A%20both%3B%0Amargin%3A%200%200%2010px%2010px%3B%0Apadding%3A%204px%3B%0Awidth%3A%20400px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Aborder%2Dradius%3A%205px%3B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Afont%2Dsize%3A%2013px%3B%0Aline%2Dheight%3A%201%2E3%3B%0A%7D%0A%23TOC%20%2Etoctitle%20%7B%0Afont%2Dweight%3A%20bold%3B%0Afont%2Dsize%3A%2015px%3B%0Amargin%2Dleft%3A%205px%3B%0A%7D%0A%23TOC%20ul%20%7B%0Apadding%2Dleft%3A%2040px%3B%0Amargin%2Dleft%3A%20%2D1%2E5em%3B%0Amargin%2Dtop%3A%205px%3B%0Amargin%2Dbottom%3A%205px%3B%0A%7D%0A%23TOC%20ul%20ul%20%7B%0Amargin%2Dleft%3A%20%2D2em%3B%0A%7D%0A%23TOC%20li%20%7B%0Aline%2Dheight%3A%2016px%3B%0A%7D%0Atable%20%7B%0Amargin%3A%201em%20auto%3B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dcolor%3A%20%23DDDDDD%3B%0Aborder%2Dstyle%3A%20outset%3B%0Aborder%2Dcollapse%3A%20collapse%3B%0A%7D%0Atable%20th%20%7B%0Aborder%2Dwidth%3A%202px%3B%0Apadding%3A%205px%3B%0Aborder%2Dstyle%3A%20inset%3B%0A%7D%0Atable%20td%20%7B%0Aborder%2Dwidth%3A%201px%3B%0Aborder%2Dstyle%3A%20inset%3B%0Aline%2Dheight%3A%2018px%3B%0Apadding%3A%205px%205px%3B%0A%7D%0Atable%2C%20table%20th%2C%20table%20td%20%7B%0Aborder%2Dleft%2Dstyle%3A%20none%3B%0Aborder%2Dright%2Dstyle%3A%20none%3B%0A%7D%0Atable%20thead%2C%20table%20tr%2Eeven%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Ap%20%7B%0Amargin%3A%200%2E5em%200%3B%0A%7D%0Ablockquote%20%7B%0Abackground%2Dcolor%3A%20%23f6f6f6%3B%0Apadding%3A%200%2E25em%200%2E75em%3B%0A%7D%0Ahr%20%7B%0Aborder%2Dstyle%3A%20solid%3B%0Aborder%3A%20none%3B%0Aborder%2Dtop%3A%201px%20solid%20%23777%3B%0Amargin%3A%2028px%200%3B%0A%7D%0Adl%20%7B%0Amargin%2Dleft%3A%200%3B%0A%7D%0Adl%20dd%20%7B%0Amargin%2Dbottom%3A%2013px%3B%0Amargin%2Dleft%3A%2013px%3B%0A%7D%0Adl%20dt%20%7B%0Afont%2Dweight%3A%20bold%3B%0A%7D%0Aul%20%7B%0Amargin%2Dtop%3A%200%3B%0A%7D%0Aul%20li%20%7B%0Alist%2Dstyle%3A%20circle%20outside%3B%0A%7D%0Aul%20ul%20%7B%0Amargin%2Dbottom%3A%200%3B%0A%7D%0Apre%2C%20code%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0Aborder%2Dradius%3A%203px%3B%0Acolor%3A%20%23333%3B%0Awhite%2Dspace%3A%20pre%2Dwrap%3B%20%0A%7D%0Apre%20%7B%0Aborder%2Dradius%3A%203px%3B%0Amargin%3A%205px%200px%2010px%200px%3B%0Apadding%3A%2010px%3B%0A%7D%0Apre%3Anot%28%5Bclass%5D%29%20%7B%0Abackground%2Dcolor%3A%20%23f7f7f7%3B%0A%7D%0Acode%20%7B%0Afont%2Dfamily%3A%20Consolas%2C%20Monaco%2C%20%27Courier%20New%27%2C%20monospace%3B%0Afont%2Dsize%3A%2085%25%3B%0A%7D%0Ap%20%3E%20code%2C%20li%20%3E%20code%20%7B%0Apadding%3A%202px%200px%3B%0A%7D%0Adiv%2Efigure%20%7B%0Atext%2Dalign%3A%20center%3B%0A%7D%0Aimg%20%7B%0Abackground%2Dcolor%3A%20%23FFFFFF%3B%0Apadding%3A%202px%3B%0Aborder%3A%201px%20solid%20%23DDDDDD%3B%0Aborder%2Dradius%3A%203px%3B%0Aborder%3A%201px%20solid%20%23CCCCCC%3B%0Amargin%3A%200%205px%3B%0A%7D%0Ah1%20%7B%0Amargin%2Dtop%3A%200%3B%0Afont%2Dsize%3A%2035px%3B%0Aline%2Dheight%3A%2040px%3B%0A%7D%0Ah2%20%7B%0Aborder%2Dbottom%3A%204px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Apadding%2Dbottom%3A%202px%3B%0Afont%2Dsize%3A%20145%25%3B%0A%7D%0Ah3%20%7B%0Aborder%2Dbottom%3A%202px%20solid%20%23f7f7f7%3B%0Apadding%2Dtop%3A%2010px%3B%0Afont%2Dsize%3A%20120%25%3B%0A%7D%0Ah4%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23f7f7f7%3B%0Amargin%2Dleft%3A%208px%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Ah5%2C%20h6%20%7B%0Aborder%2Dbottom%3A%201px%20solid%20%23ccc%3B%0Afont%2Dsize%3A%20105%25%3B%0A%7D%0Aa%20%7B%0Acolor%3A%20%230033dd%3B%0Atext%2Ddecoration%3A%20none%3B%0A%7D%0Aa%3Ahover%20%7B%0Acolor%3A%20%236666ff%3B%20%7D%0Aa%3Avisited%20%7B%0Acolor%3A%20%23800080%3B%20%7D%0Aa%3Avisited%3Ahover%20%7B%0Acolor%3A%20%23BB00BB%3B%20%7D%0Aa%5Bhref%5E%3D%22http
</head>
<body>
<h1 class="title toc-ignore">Visualising the History of openSenseMap.org</h1>
<h4 class="author"><em>Norwin Roosen</em></h4>
<h4 class="date"><em>2018-05-26</em></h4>
<div id="TOC">
<ul>
<li><a href="#plot-count-of-boxes-by-time">Plot count of boxes by time</a><ul>
<li><a href="#and-exposure">…and exposure</a></li>
<li><a href="#and-grouptag">…and grouptag</a></li>
</ul></li>
<li><a href="#plot-rate-of-growth-and-inactivity-per-week">Plot rate of growth and inactivity per week</a></li>
<li><a href="#plot-duration-of-boxes-being-active">Plot duration of boxes being active</a><ul>
<li><a href="#by-exposure">…by exposure</a></li>
<li><a href="#by-grouptag">…by grouptag</a></li>
<li><a href="#by-year-of-registration">…by year of registration</a></li>
</ul></li>
<li><a href="#more-visualisations">More Visualisations</a></li>
</ul>
</div>
<blockquote>
<p>This vignette serves as an example on data wrangling &amp; visualization with <code>opensensmapr</code>, <code>dplyr</code> and <code>ggplot2</code>.</p>
</blockquote>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># required packages:</span>
<span class="kw">library</span>(opensensmapr) <span class="co"># data download</span>
<span class="kw">library</span>(dplyr) <span class="co"># data wrangling</span>
<span class="kw">library</span>(ggplot2) <span class="co"># plotting</span>
<span class="kw">library</span>(lubridate) <span class="co"># date arithmetic</span>
<span class="kw">library</span>(zoo) <span class="co"># rollmean()</span></code></pre></div>
<p>openSenseMap.org has grown quite a bit in the last years; it would be interesting to see how we got to the current 1781 sensor stations, split up by various attributes of the boxes.</p>
<p>While <code>opensensmapr</code> provides extensive methods of filtering boxes by attributes on the server, we do the filtering within R to save time and gain flexibility. So the first step is to retrieve <em>all the boxes</em>:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># if you want to see results for a specific subset of boxes,</span>
<span class="co"># just specify a filter such as grouptag='ifgi' here</span>
boxes =<span class="st"> </span><span class="kw">osem_boxes</span>()</code></pre></div>
<div id="plot-count-of-boxes-by-time" class="section level1 tabset">
<h1>Plot count of boxes by time</h1>
<p>By looking at the <code>createdAt</code> attribute of each box we know the exact time a box was registered. With this approach we have no information about boxes that were deleted in the meantime, but thats okay for now.</p>
<div id="and-exposure" class="section level2">
<h2>…and exposure</h2>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">exposure_counts =<span class="st"> </span>boxes %&gt;%
<span class="st"> </span><span class="kw">group_by</span>(exposure) %&gt;%
<span class="st"> </span><span class="kw">mutate</span>(<span class="dt">count =</span> <span class="kw">row_number</span>(createdAt))
exposure_colors =<span class="st"> </span><span class="kw">c</span>(<span class="dt">indoor =</span> <span class="st">'red'</span>, <span class="dt">outdoor =</span> <span class="st">'lightgreen'</span>, <span class="dt">mobile =</span> <span class="st">'blue'</span>, <span class="dt">unknown =</span> <span class="st">'darkgrey'</span>)
<span class="kw">ggplot</span>(exposure_counts, <span class="kw">aes</span>(<span class="dt">x =</span> createdAt, <span class="dt">y =</span> count, <span class="dt">colour =</span> exposure)) +
<span class="st"> </span><span class="kw">geom_line</span>() +
<span class="st"> </span><span class="kw">scale_colour_manual</span>(<span class="dt">values =</span> exposure_colors) +
<span class="st"> </span><span class="kw">xlab</span>(<span class="st">'Registration Date'</span>) +<span class="st"> </span><span class="kw">ylab</span>(<span class="st">'senseBox count'</span>)</code></pre></div>
<p><img src="
<p>Outdoor boxes are growing <em>fast</em>! We can also see the introduction of <code>mobile</code> sensor “stations” in 2017. While mobile boxes are still few, we can expect a quick rise in 2018 once the new <a href="https://sensebox.de/blog/2018-03-06-senseBox_MCU">senseBox MCU with GPS support is released</a>.</p>
<p>Lets have a quick summary:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">exposure_counts %&gt;%
<span class="st"> </span><span class="kw">summarise</span>(
<span class="dt">oldest =</span> <span class="kw">min</span>(createdAt),
<span class="dt">newest =</span> <span class="kw">max</span>(createdAt),
<span class="dt">count =</span> <span class="kw">max</span>(count)
) %&gt;%
<span class="st"> </span><span class="kw">arrange</span>(<span class="kw">desc</span>(count))</code></pre></div>
<div class="kable-table">
<table>
<thead>
<tr class="header">
<th align="left">exposure</th>
<th align="left">oldest</th>
<th align="left">newest</th>
<th align="right">count</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td align="left">outdoor</td>
<td align="left">2015-02-18 16:53:41</td>
<td align="left">2018-05-26 08:39:12</td>
<td align="right">1416</td>
</tr>
<tr class="even">
<td align="left">indoor</td>
<td align="left">2015-02-08 17:36:40</td>
<td align="left">2018-05-26 10:29:27</td>
<td align="right">290</td>
</tr>
<tr class="odd">
<td align="left">mobile</td>
<td align="left">2017-05-24 08:16:36</td>
<td align="left">2018-05-24 07:08:32</td>
<td align="right">55</td>
</tr>
<tr class="even">
<td align="left">unknown</td>
<td align="left">2014-05-28 15:36:14</td>
<td align="left">2016-06-25 15:11:11</td>
<td align="right">20</td>
</tr>
</tbody>
</table>
</div>
</div>
<div id="and-grouptag" class="section level2">
<h2>…and grouptag</h2>
<p>We can try to find out where the increases in growth came from, by analysing the box count by grouptag.</p>
<p>Caveats: Only a small subset of boxes has a grouptag, and we should assume that these groups are actually bigger. Also, we can see that grouptag naming is inconsistent (<code>Luftdaten</code>, <code>luftdaten.info</code>, …)</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">grouptag_counts =<span class="st"> </span>boxes %&gt;%
<span class="st"> </span><span class="kw">group_by</span>(grouptag) %&gt;%
<span class="st"> </span><span class="co"># only include grouptags with 8 or more members</span>
<span class="st"> </span><span class="kw">filter</span>(<span class="kw">length</span>(grouptag) &gt;=<span class="st"> </span><span class="dv">8</span> &amp;&amp;<span class="st"> </span>!<span class="kw">is.na</span>(grouptag)) %&gt;%
<span class="st"> </span><span class="kw">mutate</span>(<span class="dt">count =</span> <span class="kw">row_number</span>(createdAt))
<span class="co"># helper for sorting the grouptags by boxcount</span>
sortLvls =<span class="st"> </span>function(oldFactor, <span class="dt">ascending =</span> <span class="ot">TRUE</span>) {
lvls =<span class="st"> </span><span class="kw">table</span>(oldFactor) %&gt;%<span class="st"> </span><span class="kw">sort</span>(., <span class="dt">decreasing =</span> !ascending) %&gt;%<span class="st"> </span><span class="kw">names</span>()
<span class="kw">factor</span>(oldFactor, <span class="dt">levels =</span> lvls)
}
grouptag_counts$grouptag =<span class="st"> </span><span class="kw">sortLvls</span>(grouptag_counts$grouptag, <span class="dt">ascending =</span> <span class="ot">FALSE</span>)
<span class="kw">ggplot</span>(grouptag_counts, <span class="kw">aes</span>(<span class="dt">x =</span> createdAt, <span class="dt">y =</span> count, <span class="dt">colour =</span> grouptag)) +
<span class="st"> </span><span class="kw">geom_line</span>(<span class="kw">aes</span>(<span class="dt">group =</span> grouptag)) +
<span class="st"> </span><span class="kw">xlab</span>(<span class="st">'Registration Date'</span>) +<span class="st"> </span><span class="kw">ylab</span>(<span class="st">'senseBox count'</span>)</code></pre></div>
<p><img src="
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">grouptag_counts %&gt;%
<span class="st"> </span><span class="kw">summarise</span>(
<span class="dt">oldest =</span> <span class="kw">min</span>(createdAt),
<span class="dt">newest =</span> <span class="kw">max</span>(createdAt),
<span class="dt">count =</span> <span class="kw">max</span>(count)
) %&gt;%
<span class="st"> </span><span class="kw">arrange</span>(<span class="kw">desc</span>(count))</code></pre></div>
<div class="kable-table">
<table>
<thead>
<tr class="header">
<th align="left">grouptag</th>
<th align="left">oldest</th>
<th align="left">newest</th>
<th align="right">count</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td align="left">Luftdaten</td>
<td align="left">2017-03-14 17:01:16</td>
<td align="left">2018-05-21 02:20:50</td>
<td align="right">109</td>
</tr>
<tr class="even">
<td align="left">ifgi</td>
<td align="left">2016-06-17 08:04:54</td>
<td align="left">2018-05-15 10:27:02</td>
<td align="right">35</td>
</tr>
<tr class="odd">
<td align="left">MakeLight</td>
<td align="left">2015-02-18 16:53:41</td>
<td align="left">2018-02-02 13:50:21</td>
<td align="right">15</td>
</tr>
<tr class="even">
<td align="left">Bad_Hersfeld</td>
<td align="left">2017-07-18 13:32:03</td>
<td align="left">2018-03-22 09:10:07</td>
<td align="right">13</td>
</tr>
<tr class="odd">
<td align="left">luftdaten.info</td>
<td align="left">2017-05-01 10:15:44</td>
<td align="left">2018-05-17 11:47:21</td>
<td align="right">12</td>
</tr>
<tr class="even">
<td align="left">dwih-sp</td>
<td align="left">2016-08-09 08:06:02</td>
<td align="left">2016-11-23 10:16:04</td>
<td align="right">11</td>
</tr>
<tr class="odd">
<td align="left">Che Aria Tira?</td>
<td align="left">2018-03-11 10:50:42</td>
<td align="left">2018-03-11 23:11:20</td>
<td align="right">10</td>
</tr>
<tr class="even">
<td align="left">Luftdaten.info</td>
<td align="left">2017-04-03 14:10:20</td>
<td align="left">2018-04-16 16:31:24</td>
<td align="right">10</td>
</tr>
<tr class="odd">
<td align="left">Feinstaub</td>
<td align="left">2017-04-08 06:38:25</td>
<td align="left">2018-03-29 17:27:55</td>
<td align="right">9</td>
</tr>
<tr class="even">
<td align="left">PGKN</td>
<td align="left">2018-04-08 07:01:57</td>
<td align="left">2018-04-27 18:38:51</td>
<td align="right">9</td>
</tr>
<tr class="odd">
<td align="left">Raumanmeri</td>
<td align="left">2017-03-13 11:35:39</td>
<td align="left">2017-04-27 05:36:20</td>
<td align="right">9</td>
</tr>
<tr class="even">
<td align="left">Sofia</td>
<td align="left">2017-04-11 04:40:11</td>
<td align="left">2018-03-15 13:26:56</td>
<td align="right">9</td>
</tr>
<tr class="odd">
<td align="left">IKG</td>
<td align="left">2017-03-21 19:02:11</td>
<td align="left">2017-12-18 14:30:21</td>
<td align="right">8</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
<div id="plot-rate-of-growth-and-inactivity-per-week" class="section level1">
<h1>Plot rate of growth and inactivity per week</h1>
<p>First we group the boxes by <code>createdAt</code> into bins of one week:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">bins =<span class="st"> 'week'</span>
mvavg_bins =<span class="st"> </span><span class="dv">6</span>
growth =<span class="st"> </span>boxes %&gt;%
<span class="st"> </span><span class="kw">mutate</span>(<span class="dt">week =</span> <span class="kw">cut</span>(<span class="kw">as.Date</span>(createdAt), <span class="dt">breaks =</span> bins)) %&gt;%
<span class="st"> </span><span class="kw">group_by</span>(week) %&gt;%
<span class="st"> </span><span class="kw">summarize</span>(<span class="dt">count =</span> <span class="kw">length</span>(week)) %&gt;%
<span class="st"> </span><span class="kw">mutate</span>(<span class="dt">event =</span> <span class="st">'registered'</span>)</code></pre></div>
<p>We can do the same for <code>updatedAt</code>, which informs us about the last change to a box, including uploaded measurements. This method of determining inactive boxes is fairly inaccurate and should be considered an approximation, because we have no information about intermediate inactive phases. Also deleted boxes would probably have a big impact here.</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">inactive =<span class="st"> </span>boxes %&gt;%
<span class="st"> </span><span class="co"># remove boxes that were updated in the last two days,</span>
<span class="st"> </span><span class="co"># b/c any box becomes inactive at some point by definition of updatedAt</span>
<span class="st"> </span><span class="kw">filter</span>(updatedAt &lt;<span class="st"> </span><span class="kw">now</span>() -<span class="st"> </span><span class="kw">days</span>(<span class="dv">2</span>)) %&gt;%
<span class="st"> </span><span class="kw">mutate</span>(<span class="dt">week =</span> <span class="kw">cut</span>(<span class="kw">as.Date</span>(updatedAt), <span class="dt">breaks =</span> bins)) %&gt;%
<span class="st"> </span><span class="kw">group_by</span>(week) %&gt;%
<span class="st"> </span><span class="kw">summarize</span>(<span class="dt">count =</span> <span class="kw">length</span>(week)) %&gt;%
<span class="st"> </span><span class="kw">mutate</span>(<span class="dt">event =</span> <span class="st">'inactive'</span>)</code></pre></div>
<p>Now we can combine both datasets for plotting:</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">boxes_by_date =<span class="st"> </span><span class="kw">bind_rows</span>(growth, inactive) %&gt;%<span class="st"> </span><span class="kw">group_by</span>(event)
<span class="kw">ggplot</span>(boxes_by_date, <span class="kw">aes</span>(<span class="dt">x =</span> <span class="kw">as.Date</span>(week), <span class="dt">colour =</span> event)) +
<span class="st"> </span><span class="kw">xlab</span>(<span class="st">'Time'</span>) +<span class="st"> </span><span class="kw">ylab</span>(<span class="kw">paste</span>(<span class="st">'rate per '</span>, bins)) +
<span class="st"> </span><span class="kw">scale_x_date</span>(<span class="dt">date_breaks=</span><span class="st">&quot;years&quot;</span>, <span class="dt">date_labels=</span><span class="st">&quot;%Y&quot;</span>) +
<span class="st"> </span><span class="kw">scale_colour_manual</span>(<span class="dt">values =</span> <span class="kw">c</span>(<span class="dt">registered =</span> <span class="st">'lightgreen'</span>, <span class="dt">inactive =</span> <span class="st">'grey'</span>)) +
<span class="st"> </span><span class="kw">geom_point</span>(<span class="kw">aes</span>(<span class="dt">y =</span> count), <span class="dt">size =</span> <span class="fl">0.5</span>) +
<span class="st"> </span><span class="co"># moving average, make first and last value NA (to ensure identical length of vectors)</span>
<span class="st"> </span><span class="kw">geom_line</span>(<span class="kw">aes</span>(<span class="dt">y =</span> <span class="kw">rollmean</span>(count, mvavg_bins, <span class="dt">fill =</span> <span class="kw">list</span>(<span class="ot">NA</span>, <span class="ot">NULL</span>, <span class="ot">NA</span>))))</code></pre></div>
<p><img src="
<p>We see a sudden rise in early 2017, which lines up with the fast growing grouptag <code>Luftdaten</code>. This was enabled by an integration of openSenseMap.org into the firmware of the air quality monitoring project <a href="https://luftdaten.info">luftdaten.info</a>. The dips in mid 2017 and early 2018 could possibly be explained by production/delivery issues of the senseBox hardware, but I have no data on the exact time frames to verify.</p>
</div>
<div id="plot-duration-of-boxes-being-active" class="section level1 tabset">
<h1>Plot duration of boxes being active</h1>
<p>While we are looking at <code>createdAt</code> and <code>updatedAt</code>, we can also extract the duration of activity of each box, and look at metrics by exposure and grouptag once more:</p>
<div id="by-exposure" class="section level2">
<h2>…by exposure</h2>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">duration =<span class="st"> </span>boxes %&gt;%
<span class="st"> </span><span class="kw">group_by</span>(exposure) %&gt;%
<span class="st"> </span><span class="kw">filter</span>(!<span class="kw">is.na</span>(updatedAt)) %&gt;%
<span class="st"> </span><span class="kw">mutate</span>(<span class="dt">duration =</span> <span class="kw">difftime</span>(updatedAt, createdAt, <span class="dt">units=</span><span class="st">'days'</span>))
<span class="kw">ggplot</span>(duration, <span class="kw">aes</span>(<span class="dt">x =</span> exposure, <span class="dt">y =</span> duration)) +
<span class="st"> </span><span class="kw">geom_boxplot</span>() +
<span class="st"> </span><span class="kw">coord_flip</span>() +<span class="st"> </span><span class="kw">ylab</span>(<span class="st">'Duration active in Days'</span>)</code></pre></div>
<p><img src="
<p>The time of activity averages at only 152 days, though there are boxes with 759 days of activity, spanning a large chunk of openSenseMaps existence.</p>
</div>
<div id="by-grouptag" class="section level2">
<h2>…by grouptag</h2>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">duration =<span class="st"> </span>boxes %&gt;%
<span class="st"> </span><span class="kw">group_by</span>(grouptag) %&gt;%
<span class="st"> </span><span class="co"># only include grouptags with 8 or more members</span>
<span class="st"> </span><span class="kw">filter</span>(<span class="kw">length</span>(grouptag) &gt;=<span class="st"> </span><span class="dv">8</span> &amp;&amp;<span class="st"> </span>!<span class="kw">is.na</span>(grouptag) &amp;&amp;<span class="st"> </span>!<span class="kw">is.na</span>(updatedAt)) %&gt;%
<span class="st"> </span><span class="kw">mutate</span>(<span class="dt">duration =</span> <span class="kw">difftime</span>(updatedAt, createdAt, <span class="dt">units=</span><span class="st">'days'</span>))
<span class="kw">ggplot</span>(duration, <span class="kw">aes</span>(<span class="dt">x =</span> grouptag, <span class="dt">y =</span> duration)) +
<span class="st"> </span><span class="kw">geom_boxplot</span>() +
<span class="st"> </span><span class="kw">coord_flip</span>() +<span class="st"> </span><span class="kw">ylab</span>(<span class="st">'Duration active in Days'</span>)</code></pre></div>
<p><img src="
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r">duration %&gt;%
<span class="st"> </span><span class="kw">summarize</span>(
<span class="dt">duration_avg =</span> <span class="kw">round</span>(<span class="kw">mean</span>(duration)),
<span class="dt">duration_min =</span> <span class="kw">round</span>(<span class="kw">min</span>(duration)),
<span class="dt">duration_max =</span> <span class="kw">round</span>(<span class="kw">max</span>(duration)),
<span class="dt">oldest_box =</span> <span class="kw">round</span>(<span class="kw">max</span>(<span class="kw">difftime</span>(<span class="kw">now</span>(), createdAt, <span class="dt">units=</span><span class="st">'days'</span>)))
) %&gt;%
<span class="st"> </span><span class="kw">arrange</span>(<span class="kw">desc</span>(duration_avg))</code></pre></div>
<div class="kable-table">
<table>
<thead>
<tr class="header">
<th align="left">grouptag</th>
<th align="left">duration_avg</th>
<th align="left">duration_min</th>
<th align="left">duration_max</th>
<th align="left">oldest_box</th>
</tr>
</thead>
<tbody>
<tr class="odd">
<td align="left">dwih-sp</td>
<td align="left">627 days</td>
<td align="left">549 days</td>
<td align="left">655 days</td>
<td align="left">655 days</td>
</tr>
<tr class="even">
<td align="left">Feinstaub</td>
<td align="left">219 days</td>
<td align="left">4 days</td>
<td align="left">413 days</td>
<td align="left">413 days</td>
</tr>
<tr class="odd">
<td align="left">ifgi</td>
<td align="left">207 days</td>
<td align="left">0 days</td>
<td align="left">622 days</td>
<td align="left">708 days</td>
</tr>
<tr class="even">
<td align="left">Sofia</td>
<td align="left">200 days</td>
<td align="left">15 days</td>
<td align="left">410 days</td>
<td align="left">410 days</td>
</tr>
<tr class="odd">
<td align="left">Bad_Hersfeld</td>
<td align="left">197 days</td>
<td align="left">65 days</td>
<td align="left">312 days</td>
<td align="left">312 days</td>
</tr>
<tr class="even">
<td align="left">Luftdaten</td>
<td align="left">187 days</td>
<td align="left">0 days</td>
<td align="left">424 days</td>
<td align="left">438 days</td>
</tr>
<tr class="odd">
<td align="left">luftdaten.info</td>
<td align="left">183 days</td>
<td align="left">9 days</td>
<td align="left">360 days</td>
<td align="left">390 days</td>
</tr>
<tr class="even">
<td align="left">IKG</td>
<td align="left">163 days</td>
<td align="left">70 days</td>
<td align="left">260 days</td>
<td align="left">431 days</td>
</tr>
<tr class="odd">
<td align="left">Luftdaten.info</td>
<td align="left">86 days</td>
<td align="left">5 days</td>
<td align="left">376 days</td>
<td align="left">418 days</td>
</tr>
<tr class="even">
<td align="left">Che Aria Tira?</td>
<td align="left">75 days</td>
<td align="left">71 days</td>
<td align="left">76 days</td>
<td align="left">76 days</td>
</tr>
<tr class="odd">
<td align="left">Raumanmeri</td>
<td align="left">45 days</td>
<td align="left">7 days</td>
<td align="left">318 days</td>
<td align="left">439 days</td>
</tr>
<tr class="even">
<td align="left">PGKN</td>
<td align="left">35 days</td>
<td align="left">29 days</td>
<td align="left">48 days</td>
<td align="left">48 days</td>
</tr>
</tbody>
</table>
</div>
<p>The time of activity averages at only 191 days, though there are boxes with 655 days of activity, spanning a large chunk of openSenseMaps existence.</p>
</div>
<div id="by-year-of-registration" class="section level2">
<h2>…by year of registration</h2>
<p>This is less useful, as older boxes are active for a longer time by definition. If you have an idea how to compensate for that, please send a <a href="https://github.com/sensebox/opensensmapr/pulls">Pull Request</a>!</p>
<div class="sourceCode"><pre class="sourceCode r"><code class="sourceCode r"><span class="co"># NOTE: boxes older than 2016 missing due to missing updatedAt in database</span>
duration =<span class="st"> </span>boxes %&gt;%
<span class="st"> </span><span class="kw">mutate</span>(<span class="dt">year =</span> <span class="kw">cut</span>(<span class="kw">as.Date</span>(createdAt), <span class="dt">breaks =</span> <span class="st">'year'</span>)) %&gt;%
<span class="st"> </span><span class="kw">group_by</span>(year) %&gt;%
<span class="st"> </span><span class="kw">filter</span>(!<span class="kw">is.na</span>(updatedAt)) %&gt;%
<span class="st"> </span><span class="kw">mutate</span>(<span class="dt">duration =</span> <span class="kw">difftime</span>(updatedAt, createdAt, <span class="dt">units=</span><span class="st">'days'</span>))
<span class="kw">ggplot</span>(duration, <span class="kw">aes</span>(<span class="dt">x =</span> <span class="kw">substr</span>(<span class="kw">as.character</span>(year), <span class="dv">0</span>, <span class="dv">4</span>), <span class="dt">y =</span> duration)) +
<span class="st"> </span><span class="kw">geom_boxplot</span>() +
<span class="st"> </span><span class="kw">coord_flip</span>() +<span class="st"> </span><span class="kw">ylab</span>(<span class="st">'Duration active in Days'</span>) +<span class="st"> </span><span class="kw">xlab</span>(<span class="st">'Year of Registration'</span>)</code></pre></div>
<p><img src="
</div>
</div>
<div id="more-visualisations" class="section level1">
<h1>More Visualisations</h1>
<p>Other visualisations come to mind, and are left as an exercise to the reader. If you implemented some, feel free to add them to this vignette via a <a href="https://github.com/sensebox/opensensmapr/pulls">Pull Request</a>.</p>
<ul>
<li>growth by phenomenon</li>
<li>growth by location -&gt; (interactive) map</li>
<li>set inactive rate in relation to total box count</li>
<li>filter timespans with big dips in growth rate, and extrapolate the amount of senseBoxes that could be on the platform today, assuming there were no production issues ;)</li>
</ul>
</div>
<!-- dynamically load mathjax for compatibility with self-contained -->
<script>
(function () {
var script = document.createElement("script");
script.type = "text/javascript";
script.src = "https://mathjax.rstudio.com/latest/MathJax.js?config=TeX-AMS-MML_HTMLorMML";
document.getElementsByTagName("head")[0].appendChild(script);
})();
</script>
</body>
</html>