GeographicLib 2.3
NearestNeighbor.hpp
Go to the documentation of this file.
1/**
2 * \file NearestNeighbor.hpp
3 * \brief Header for GeographicLib::NearestNeighbor class
4 *
5 * Copyright (c) Charles Karney (2016-2020) <karney@alum.mit.edu> and licensed
6 * under the MIT/X11 License. For more information, see
7 * https://geographiclib.sourceforge.io/
8 **********************************************************************/
9
10#if !defined(GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP)
11#define GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP 1
12
13#include <algorithm> // for nth_element, max_element, etc.
14#include <vector>
15#include <queue> // for priority_queue
16#include <utility> // for swap + pair
17#include <cstring>
18#include <limits>
19#include <cmath>
20#include <sstream>
21// Only for GeographicLib::GeographicErr
23
24#if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
25 GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
26#include <boost/serialization/nvp.hpp>
27#include <boost/serialization/split_member.hpp>
28#include <boost/serialization/array.hpp>
29#include <boost/serialization/vector.hpp>
30#endif
31
32#if defined(_MSC_VER)
33// Squelch warnings about constant conditional expressions
34# pragma warning (push)
35# pragma warning (disable: 4127)
36#endif
37
38namespace GeographicLib {
39
40 /**
41 * \brief Nearest-neighbor calculations
42 *
43 * This class solves the nearest-neighbor problm using a vantage-point tree
44 * as described in \ref nearest.
45 *
46 * This class is templated so that it can handle arbitrary metric spaces as
47 * follows:
48 *
49 * @tparam dist_t the type used for measuring distances; it can be a real or
50 * signed integer type; in typical geodetic applications, \e dist_t might
51 * be <code>double</code>.
52 * @tparam pos_t the type for specifying the positions of points; geodetic
53 * application might bundle the latitude and longitude into a
54 * <code>std::pair<dist_t, dist_t></code>.
55 * @tparam distfun_t the type of a function object which takes takes two
56 * positions (of type \e pos_t) and returns the distance (of type \e
57 * dist_t); in geodetic applications, this might be a class which is
58 * constructed with a Geodesic object and which implements a member
59 * function with a signature <code>dist_t operator() (const pos_t&, const
60 * pos_t&) const</code>, which returns the geodesic distance between two
61 * points.
62 *
63 * \note The distance measure must satisfy the triangle inequality, \f$
64 * d(a,c) \le d(a,b) + d(b,c) \f$ for all points \e a, \e b, \e c. The
65 * geodesic distance (given by Geodesic::Inverse) does, while the great
66 * ellipse distance and the rhumb line distance <i>do not</i>. If you use
67 * the ordinary Euclidean distance, i.e., \f$ \sqrt{(x_a-x_b)^2 +
68 * (y_a-y_b)^2} \f$ for two dimensions, don't be tempted to leave out the
69 * square root in the interests of "efficiency"; the squared distance does
70 * not satisfy the triangle inequality!
71 *
72 * \note This is a "header-only" implementation and, as such, depends in a
73 * minimal way on the rest of GeographicLib (the only dependency is through
74 * the use of GeographicLib::GeographicErr for handling and run-time
75 * exceptions). Therefore, it is easy to extract this class from the rest of
76 * GeographicLib and use it as a stand-alone facility.
77 *
78 * The \e dist_t type must support numeric_limits queries (specifically:
79 * is_signed, is_integer, max(), digits).
80 *
81 * The NearestNeighbor object is constructed with a vector of points (type \e
82 * pos_t) and a distance function (type \e distfun_t). However the object
83 * does \e not store the points. When querying the object with Search(),
84 * it's necessary to supply the same vector of points and the same distance
85 * function.
86 *
87 * There's no capability in this implementation to add or remove points from
88 * the set. Instead Initialize() should be called to re-initialize the
89 * object with the modified vector of points.
90 *
91 * Because of the overhead in constructing a NearestNeighbor object for a
92 * large set of points, functions Save() and Load() are provided to save the
93 * object to an external file. operator<<(), operator>>() and <a
94 * href="https://www.boost.org/libs/serialization/doc"> Boost
95 * serialization</a> can also be used to save and restore a NearestNeighbor
96 * object. This is illustrated in the example.
97 *
98 * Example of use:
99 * \include example-NearestNeighbor.cpp
100 **********************************************************************/
101 template<typename dist_t, typename pos_t, class distfun_t>
103 // For tracking changes to the I/O format
104 static const int version = 1;
105 // This is what we get "free"; but if sizeof(dist_t) = 1 (unlikely), allow
106 // 4 slots (and this accommodates the default value bucket = 4).
107 static const int maxbucket =
108 (2 + ((4 * sizeof(dist_t)) / sizeof(int) >= 2 ?
109 (4 * sizeof(dist_t)) / sizeof(int) : 2));
110 public:
111
112 /**
113 * Default constructor for NearestNeighbor.
114 *
115 * This is equivalent to specifying an empty set of points.
116 **********************************************************************/
117 NearestNeighbor() : _numpoints(0), _bucket(0), _cost(0) {}
118
119 /**
120 * Constructor for NearestNeighbor.
121 *
122 * @param[in] pts a vector of points to include in the set.
123 * @param[in] dist the distance function object.
124 * @param[in] bucket the size of the buckets at the leaf nodes; this must
125 * lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)] (default 4).
126 * @exception GeographicErr if the value of \e bucket is out of bounds or
127 * the size of \e pts is too big for an int.
128 * @exception std::bad_alloc if memory for the tree can't be allocated.
129 *
130 * \e pts may contain coincident points (i.e., the distance between them
131 * vanishes); these are treated as distinct.
132 *
133 * The choice of \e bucket is a tradeoff between space and efficiency. A
134 * larger \e bucket decreases the size of the NearestNeighbor object which
135 * scales as pts.size() / max(1, bucket) and reduces the number of distance
136 * calculations to construct the object by log2(bucket) * pts.size().
137 * However each search then requires about bucket additional distance
138 * calculations.
139 *
140 * \warning The distances computed by \e dist must satisfy the standard
141 * metric conditions. If not, the results are undefined. Neither the data
142 * in \e pts nor the query points should contain NaNs or infinities because
143 * such data violates the metric conditions.
144 *
145 * \warning The same arguments \e pts and \e dist must be provided
146 * to the Search() function.
147 **********************************************************************/
148 NearestNeighbor(const std::vector<pos_t>& pts, const distfun_t& dist,
149 int bucket = 4) {
150 Initialize(pts, dist, bucket);
151 }
152
153 /**
154 * Initialize or re-initialize NearestNeighbor.
155 *
156 * @param[in] pts a vector of points to include in the tree.
157 * @param[in] dist the distance function object.
158 * @param[in] bucket the size of the buckets at the leaf nodes; this must
159 * lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)] (default 4).
160 * @exception GeographicErr if the value of \e bucket is out of bounds or
161 * the size of \e pts is too big for an int.
162 * @exception std::bad_alloc if memory for the tree can't be allocated.
163 *
164 * See also the documentation on the constructor.
165 *
166 * If an exception is thrown, the state of the NearestNeighbor is
167 * unchanged.
168 **********************************************************************/
169 void Initialize(const std::vector<pos_t>& pts, const distfun_t& dist,
170 int bucket = 4) {
171 static_assert(std::numeric_limits<dist_t>::is_signed,
172 "dist_t must be a signed type");
173 if (!( 0 <= bucket && bucket <= maxbucket ))
175 ("bucket must lie in [0, 2 + 4*sizeof(dist_t)/sizeof(int)]");
176 if (pts.size() > size_t(std::numeric_limits<int>::max()))
177 throw GeographicLib::GeographicErr("pts array too big");
178 // the pair contains distance+id
179 std::vector<item> ids(pts.size());
180 for (int k = int(ids.size()); k--;)
181 ids[k] = std::make_pair(dist_t(0), k);
182 int cost = 0;
183 std::vector<Node> tree;
184 init(pts, dist, bucket, tree, ids, cost,
185 0, int(ids.size()), int(ids.size()/2));
186 _tree.swap(tree);
187 _numpoints = int(pts.size());
188 _bucket = bucket;
189 _mc = _sc = 0;
190 _cost = cost; _c1 = _k = _cmax = 0;
191 _cmin = std::numeric_limits<int>::max();
192 }
193
194 /**
195 * Search the NearestNeighbor.
196 *
197 * @param[in] pts the vector of points used for initialization.
198 * @param[in] dist the distance function object used for initialization.
199 * @param[in] query the query point.
200 * @param[out] ind a vector of indices to the closest points found.
201 * @param[in] k the number of points to search for (default = 1).
202 * @param[in] maxdist only return points with distances of \e maxdist or
203 * less from \e query (default is the maximum \e dist_t).
204 * @param[in] mindist only return points with distances of more than
205 * \e mindist from \e query (default = &minus;1).
206 * @param[in] exhaustive whether to do an exhaustive search (default true).
207 * @param[in] tol the tolerance on the results (default 0).
208 * @return the distance to the closest point found (&minus;1 if no points
209 * are found).
210 * @exception GeographicErr if \e pts has a different size from that used
211 * to construct the object.
212 *
213 * The indices returned in \e ind are sorted by distance from \e query
214 * (closest first).
215 *
216 * The simplest invocation is with just the 4 non-optional arguments. This
217 * returns the closest distance and the index to the closest point in
218 * <i>ind</i><sub>0</sub>. If there are several points equally close, then
219 * <i>ind</i><sub>0</sub> gives the index of an arbirary one of them. If
220 * there's no closest point (because the set of points is empty), then \e
221 * ind is empty and &minus;1 is returned.
222 *
223 * With \e exhaustive = true and \e tol = 0 (their default values), this
224 * finds the indices of \e k closest neighbors to \e query whose distances
225 * to \e query are in (\e mindist, \e maxdist]. If \e mindist and \e
226 * maxdist have their default values, then these bounds have no effect. If
227 * \e query is one of the points in the tree, then set \e mindist = 0 to
228 * prevent this point (and other coincident points) from being returned.
229 *
230 * If \e exhaustive = false, exit as soon as \e k results satisfying the
231 * distance criteria are found. If less than \e k results are returned
232 * then the search was exhaustive even if \e exhaustive = false.
233 *
234 * If \e tol is positive, do an approximate search; in this case the
235 * results are to be interpreted as follows: if the <i>k</i>'th distance is
236 * \e dk, then all results with distances less than or equal \e dk &minus;
237 * \e tol are correct; all others are suspect &mdash; there may be other
238 * closer results with distances greater or equal to \e dk &minus; \e tol.
239 * If less than \e k results are found, then the search is exact.
240 *
241 * \e mindist should be used to exclude a "small" neighborhood of the query
242 * point (relative to the average spacing of the data). If \e mindist is
243 * large, the efficiency of the search deteriorates.
244 *
245 * \note Only the shortest distance is returned (as as the function value).
246 * The distances to other points (indexed by <i>ind</i><sub><i>j</i></sub>
247 * for \e j > 0) can be found by invoking \e dist again.
248 *
249 * \warning The arguments \e pts and \e dist must be identical to those
250 * used to initialize the NearestNeighbor; if not, this function will
251 * return some meaningless result (however, if the size of \e pts is wrong,
252 * this function throw an exception).
253 *
254 * \warning The query point cannot be a NaN or infinite because then the
255 * metric conditions are violated.
256 **********************************************************************/
257 dist_t Search(const std::vector<pos_t>& pts, const distfun_t& dist,
258 const pos_t& query,
259 std::vector<int>& ind,
260 int k = 1,
261 dist_t maxdist = std::numeric_limits<dist_t>::max(),
262 dist_t mindist = -1,
263 bool exhaustive = true,
264 dist_t tol = 0) const {
265 if (_numpoints != int(pts.size()))
266 throw GeographicLib::GeographicErr("pts array has wrong size");
267 std::priority_queue<item> results;
268 if (_numpoints > 0 && k > 0 && maxdist > mindist) {
269 // distance to the kth closest point so far
270 dist_t tau = maxdist;
271 // first is negative of how far query is outside boundary of node
272 // +1 if on boundary or inside
273 // second is node index
274 std::priority_queue<item> todo;
275 todo.push(std::make_pair(dist_t(1), int(_tree.size()) - 1));
276 int c = 0;
277 while (!todo.empty()) {
278 int n = todo.top().second;
279 dist_t d = -todo.top().first;
280 todo.pop();
281 dist_t tau1 = tau - tol;
282 // compare tau and d again since tau may have become smaller.
283 if (!( n >= 0 && tau1 >= d )) continue;
284 const Node& current = _tree[n];
285 dist_t dst = 0; // to suppress warning about uninitialized variable
286 bool exitflag = false, leaf = current.index < 0;
287 for (int i = 0; i < (leaf ? _bucket : 1); ++i) {
288 int index = leaf ? current.leaves[i] : current.index;
289 if (index < 0) break;
290 dst = dist(pts[index], query);
291 ++c;
292
293 if (dst > mindist && dst <= tau) {
294 if (int(results.size()) == k) results.pop();
295 results.push(std::make_pair(dst, index));
296 if (int(results.size()) == k) {
297 if (exhaustive)
298 tau = results.top().first;
299 else {
300 exitflag = true;
301 break;
302 }
303 if (tau <= tol) {
304 exitflag = true;
305 break;
306 }
307 }
308 }
309 }
310 if (exitflag) break;
311
312 if (current.index < 0) continue;
313 tau1 = tau - tol;
314 for (int l = 0; l < 2; ++l) {
315 if (current.data.child[l] >= 0 &&
316 dst + current.data.upper[l] >= mindist) {
317 if (dst < current.data.lower[l]) {
318 d = current.data.lower[l] - dst;
319 if (tau1 >= d)
320 todo.push(std::make_pair(-d, current.data.child[l]));
321 } else if (dst > current.data.upper[l]) {
322 d = dst - current.data.upper[l];
323 if (tau1 >= d)
324 todo.push(std::make_pair(-d, current.data.child[l]));
325 } else
326 todo.push(std::make_pair(dist_t(1), current.data.child[l]));
327 }
328 }
329 }
330 ++_k;
331 _c1 += c;
332 double omc = _mc;
333 _mc += (c - omc) / _k;
334 _sc += (c - omc) * (c - _mc);
335 if (c > _cmax) _cmax = c;
336 if (c < _cmin) _cmin = c;
337 }
338
339 dist_t d = -1;
340 ind.resize(results.size());
341
342 for (int i = int(ind.size()); i--;) {
343 ind[i] = int(results.top().second);
344 if (i == 0) d = results.top().first;
345 results.pop();
346 }
347 return d;
348
349 }
350
351 /**
352 * @return the total number of points in the set.
353 **********************************************************************/
354 int NumPoints() const { return _numpoints; }
355
356 /**
357 * Write the object to an I/O stream.
358 *
359 * @param[in,out] os the stream to write to.
360 * @param[in] bin if true (the default) save in binary mode.
361 * @exception std::bad_alloc if memory for the string representation of the
362 * object can't be allocated.
363 *
364 * The counters tracking the statistics of searches are not saved; however
365 * the initializtion cost is saved. The format of the binary saves is \e
366 * not portable.
367 *
368 * \note <a href="https://www.boost.org/libs/serialization/doc">
369 * Boost serialization</a> can also be used to save and restore a
370 * NearestNeighbor object. This requires that the
371 * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined.
372 **********************************************************************/
373 void Save(std::ostream& os, bool bin = true) const {
374 int realspec = std::numeric_limits<dist_t>::digits *
375 (std::numeric_limits<dist_t>::is_integer ? -1 : 1);
376 if (bin) {
377 char id[] = "NearestNeighbor_";
378 os.write(id, 16);
379 int buf[6];
380 buf[0] = version;
381 buf[1] = realspec;
382 buf[2] = _bucket;
383 buf[3] = _numpoints;
384 buf[4] = int(_tree.size());
385 buf[5] = _cost;
386 os.write(reinterpret_cast<const char *>(buf), 6 * sizeof(int));
387 for (int i = 0; i < int(_tree.size()); ++i) {
388 const Node& node = _tree[i];
389 os.write(reinterpret_cast<const char *>(&node.index), sizeof(int));
390 if (node.index >= 0) {
391 os.write(reinterpret_cast<const char *>(node.data.lower),
392 2 * sizeof(dist_t));
393 os.write(reinterpret_cast<const char *>(node.data.upper),
394 2 * sizeof(dist_t));
395 os.write(reinterpret_cast<const char *>(node.data.child),
396 2 * sizeof(int));
397 } else {
398 os.write(reinterpret_cast<const char *>(node.leaves),
399 _bucket * sizeof(int));
400 }
401 }
402 } else {
403 std::stringstream ostring;
404 // Ensure enough precision for type dist_t. With C++11, max_digits10
405 // can be used instead.
406 if (!std::numeric_limits<dist_t>::is_integer) {
407 static const int prec
408 = int(std::ceil(std::numeric_limits<dist_t>::digits *
409 std::log10(2.0) + 1));
410 ostring.precision(prec);
411 }
412 ostring << version << " " << realspec << " " << _bucket << " "
413 << _numpoints << " " << _tree.size() << " " << _cost;
414 for (int i = 0; i < int(_tree.size()); ++i) {
415 const Node& node = _tree[i];
416 ostring << "\n" << node.index;
417 if (node.index >= 0) {
418 for (int l = 0; l < 2; ++l)
419 ostring << " " << node.data.lower[l] << " " << node.data.upper[l]
420 << " " << node.data.child[l];
421 } else {
422 for (int l = 0; l < _bucket; ++l)
423 ostring << " " << node.leaves[l];
424 }
425 }
426 os << ostring.str();
427 }
428 }
429
430 /**
431 * Read the object from an I/O stream.
432 *
433 * @param[in,out] is the stream to read from
434 * @param[in] bin if true (the default) load in binary mode.
435 * @exception GeographicErr if the state read from \e is is illegal.
436 * @exception std::bad_alloc if memory for the tree can't be allocated.
437 *
438 * The counters tracking the statistics of searches are reset by this
439 * operation. Binary data must have been saved on a machine with the same
440 * architecture. If an exception is thrown, the state of the
441 * NearestNeighbor is unchanged.
442 *
443 * \note <a href="https://www.boost.org/libs/serialization/doc">
444 * Boost serialization</a> can also be used to save and restore a
445 * NearestNeighbor object. This requires that the
446 * GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION macro be defined.
447 *
448 * \warning The same arguments \e pts and \e dist used for
449 * initialization must be provided to the Search() function.
450 **********************************************************************/
451 void Load(std::istream& is, bool bin = true) {
452 int version1, realspec, bucket, numpoints, treesize, cost;
453 if (bin) {
454 char id[17];
455 is.read(id, 16);
456 id[16] = '\0';
457 if (!(std::strcmp(id, "NearestNeighbor_") == 0))
458 throw GeographicLib::GeographicErr("Bad ID");
459 is.read(reinterpret_cast<char *>(&version1), sizeof(int));
460 is.read(reinterpret_cast<char *>(&realspec), sizeof(int));
461 is.read(reinterpret_cast<char *>(&bucket), sizeof(int));
462 is.read(reinterpret_cast<char *>(&numpoints), sizeof(int));
463 is.read(reinterpret_cast<char *>(&treesize), sizeof(int));
464 is.read(reinterpret_cast<char *>(&cost), sizeof(int));
465 } else {
466 if (!( is >> version1 >> realspec >> bucket >> numpoints >> treesize
467 >> cost ))
468 throw GeographicLib::GeographicErr("Bad header");
469 }
470 if (!( version1 == version ))
471 throw GeographicLib::GeographicErr("Incompatible version");
472 if (!( realspec == std::numeric_limits<dist_t>::digits *
473 (std::numeric_limits<dist_t>::is_integer ? -1 : 1) ))
474 throw GeographicLib::GeographicErr("Different dist_t types");
475 if (!( 0 <= bucket && bucket <= maxbucket ))
476 throw GeographicLib::GeographicErr("Bad bucket size");
477 if (!( 0 <= treesize && treesize <= numpoints ))
478 throw
479 GeographicLib::GeographicErr("Bad number of points or tree size");
480 if (!( 0 <= cost ))
481 throw GeographicLib::GeographicErr("Bad value for cost");
482 std::vector<Node> tree;
483 tree.reserve(treesize);
484 for (int i = 0; i < treesize; ++i) {
485 Node node;
486 if (bin) {
487 is.read(reinterpret_cast<char *>(&node.index), sizeof(int));
488 if (node.index >= 0) {
489 is.read(reinterpret_cast<char *>(node.data.lower),
490 2 * sizeof(dist_t));
491 is.read(reinterpret_cast<char *>(node.data.upper),
492 2 * sizeof(dist_t));
493 is.read(reinterpret_cast<char *>(node.data.child),
494 2 * sizeof(int));
495 } else {
496 is.read(reinterpret_cast<char *>(node.leaves),
497 bucket * sizeof(int));
498 for (int l = bucket; l < maxbucket; ++l)
499 node.leaves[l] = 0;
500 }
501 } else {
502 if (!( is >> node.index ))
503 throw GeographicLib::GeographicErr("Bad index");
504 if (node.index >= 0) {
505 for (int l = 0; l < 2; ++l) {
506 if (!( is >> node.data.lower[l] >> node.data.upper[l]
507 >> node.data.child[l] ))
508 throw GeographicLib::GeographicErr("Bad node data");
509 }
510 } else {
511 // Must be at least one valid leaf followed by a sequence end
512 // markers (-1).
513 for (int l = 0; l < bucket; ++l) {
514 if (!( is >> node.leaves[l] ))
515 throw GeographicLib::GeographicErr("Bad leaf data");
516 }
517 for (int l = bucket; l < maxbucket; ++l)
518 node.leaves[l] = 0;
519 }
520 }
521 node.Check(numpoints, treesize, bucket);
522 tree.push_back(node);
523 }
524 _tree.swap(tree);
525 _numpoints = numpoints;
526 _bucket = bucket;
527 _mc = _sc = 0;
528 _cost = cost; _c1 = _k = _cmax = 0;
529 _cmin = std::numeric_limits<int>::max();
530 }
531
532 /**
533 * Write the object to stream \e os as text.
534 *
535 * @param[in,out] os the output stream.
536 * @param[in] t the NearestNeighbor object to be saved.
537 * @exception std::bad_alloc if memory for the string representation of the
538 * object can't be allocated.
539 **********************************************************************/
540 friend std::ostream& operator<<(std::ostream& os, const NearestNeighbor& t)
541 { t.Save(os, false); return os; }
542
543 /**
544 * Read the object from stream \e is as text.
545 *
546 * @param[in,out] is the input stream.
547 * @param[out] t the NearestNeighbor object to be loaded.
548 * @exception GeographicErr if the state read from \e is is illegal.
549 * @exception std::bad_alloc if memory for the tree can't be allocated.
550 **********************************************************************/
551 friend std::istream& operator>>(std::istream& is, NearestNeighbor& t)
552 { t.Load(is, false); return is; }
553
554 /**
555 * Swap with another NearestNeighbor object.
556 *
557 * @param[in,out] t the NearestNeighbor object to swap with.
558 **********************************************************************/
560 std::swap(_numpoints, t._numpoints);
561 std::swap(_bucket, t._bucket);
562 std::swap(_cost, t._cost);
563 _tree.swap(t._tree);
564 std::swap(_mc, t._mc);
565 std::swap(_sc, t._sc);
566 std::swap(_c1, t._c1);
567 std::swap(_k, t._k);
568 std::swap(_cmin, t._cmin);
569 std::swap(_cmax, t._cmax);
570 }
571
572 /**
573 * The accumulated statistics on the searches so far.
574 *
575 * @param[out] setupcost the cost of initializing the NearestNeighbor.
576 * @param[out] numsearches the number of calls to Search().
577 * @param[out] searchcost the total cost of the calls to Search().
578 * @param[out] mincost the minimum cost of a Search().
579 * @param[out] maxcost the maximum cost of a Search().
580 * @param[out] mean the mean cost of a Search().
581 * @param[out] sd the standard deviation in the cost of a Search().
582 *
583 * Here "cost" measures the number of distance calculations needed. Note
584 * that the accumulation of statistics is \e not thread safe.
585 **********************************************************************/
586 void Statistics(int& setupcost, int& numsearches, int& searchcost,
587 int& mincost, int& maxcost,
588 double& mean, double& sd) const {
589 setupcost = _cost; numsearches = _k; searchcost = _c1;
590 mincost = _cmin; maxcost = _cmax;
591 mean = _mc; sd = std::sqrt(_sc / (_k - 1));
592 }
593
594 /**
595 * Reset the counters for the accumulated statistics on the searches so
596 * far.
597 **********************************************************************/
598 void ResetStatistics() const {
599 _mc = _sc = 0;
600 _c1 = _k = _cmax = 0;
601 _cmin = std::numeric_limits<int>::max();
602 }
603
604 private:
605 // Package up a dist_t and an int. We will want to sort on the dist_t so
606 // put it first.
607 typedef std::pair<dist_t, int> item;
608 // \cond SKIP
609 class Node {
610 public:
611 struct bounds {
612 dist_t lower[2], upper[2]; // bounds on inner/outer distances
613 int child[2];
614 };
615 union {
616 bounds data;
617 int leaves[maxbucket];
618 };
619 int index;
620
621 Node()
622 : index(-1)
623 {
624 for (int i = 0; i < 2; ++i) {
625 data.lower[i] = data.upper[i] = 0;
626 data.child[i] = -1;
627 }
628 }
629
630 // Sanity check on a Node
631 void Check(int numpoints, int treesize, int bucket) const {
632 if (!( -1 <= index && index < numpoints ))
633 throw GeographicLib::GeographicErr("Bad index");
634 if (index >= 0) {
635 if (!( -1 <= data.child[0] && data.child[0] < treesize &&
636 -1 <= data.child[1] && data.child[1] < treesize ))
637 throw GeographicLib::GeographicErr("Bad child pointers");
638 if (!( 0 <= data.lower[0] && data.lower[0] <= data.upper[0] &&
639 data.upper[0] <= data.lower[1] &&
640 data.lower[1] <= data.upper[1] ))
641 throw GeographicLib::GeographicErr("Bad bounds");
642 } else {
643 // Must be at least one valid leaf followed by a sequence end markers
644 // (-1).
645 bool start = true;
646 for (int l = 0; l < bucket; ++l) {
647 if (!( (start ?
648 ((l == 0 ? 0 : -1) <= leaves[l] && leaves[l] < numpoints) :
649 leaves[l] == -1) ))
650 throw GeographicLib::GeographicErr("Bad leaf data");
651 start = leaves[l] >= 0;
652 }
653 for (int l = bucket; l < maxbucket; ++l) {
654 if (leaves[l] != 0)
655 throw GeographicLib::GeographicErr("Bad leaf data");
656 }
657 }
658 }
659
660#if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
661 GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
662 friend class boost::serialization::access;
663 template<class Archive>
664 void save(Archive& ar, const unsigned int) const {
665 ar & boost::serialization::make_nvp("index", index);
666 if (index < 0)
667 ar & boost::serialization::make_nvp("leaves", leaves);
668 else
669 ar & boost::serialization::make_nvp("lower", data.lower)
670 & boost::serialization::make_nvp("upper", data.upper)
671 & boost::serialization::make_nvp("child", data.child);
672 }
673 template<class Archive>
674 void load(Archive& ar, const unsigned int) {
675 ar & boost::serialization::make_nvp("index", index);
676 if (index < 0)
677 ar & boost::serialization::make_nvp("leaves", leaves);
678 else
679 ar & boost::serialization::make_nvp("lower", data.lower)
680 & boost::serialization::make_nvp("upper", data.upper)
681 & boost::serialization::make_nvp("child", data.child);
682 }
683 template<class Archive>
684 void serialize(Archive& ar, const unsigned int file_version)
685 { boost::serialization::split_member(ar, *this, file_version); }
686#endif
687 };
688 // \endcond
689#if defined(GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION) && \
690 GEOGRAPHICLIB_HAVE_BOOST_SERIALIZATION
691 friend class boost::serialization::access;
692 template<class Archive> void save(Archive& ar, const unsigned) const {
693 int realspec = std::numeric_limits<dist_t>::digits *
694 (std::numeric_limits<dist_t>::is_integer ? -1 : 1);
695 // Need to use version1, otherwise load error in debug mode on Linux:
696 // undefined reference to GeographicLib::NearestNeighbor<...>::version.
697 int version1 = version;
698 ar & boost::serialization::make_nvp("version", version1)
699 & boost::serialization::make_nvp("realspec", realspec)
700 & boost::serialization::make_nvp("bucket", _bucket)
701 & boost::serialization::make_nvp("numpoints", _numpoints)
702 & boost::serialization::make_nvp("cost", _cost)
703 & boost::serialization::make_nvp("tree", _tree);
704 }
705 template<class Archive> void load(Archive& ar, const unsigned) {
706 int version1, realspec, bucket, numpoints, cost;
707 ar & boost::serialization::make_nvp("version", version1);
708 if (version1 != version)
709 throw GeographicLib::GeographicErr("Incompatible version");
710 std::vector<Node> tree;
711 ar & boost::serialization::make_nvp("realspec", realspec);
712 if (!( realspec == std::numeric_limits<dist_t>::digits *
713 (std::numeric_limits<dist_t>::is_integer ? -1 : 1) ))
714 throw GeographicLib::GeographicErr("Different dist_t types");
715 ar & boost::serialization::make_nvp("bucket", bucket);
716 if (!( 0 <= bucket && bucket <= maxbucket ))
717 throw GeographicLib::GeographicErr("Bad bucket size");
718 ar & boost::serialization::make_nvp("numpoints", numpoints)
719 & boost::serialization::make_nvp("cost", cost)
720 & boost::serialization::make_nvp("tree", tree);
721 if (!( 0 <= int(tree.size()) && int(tree.size()) <= numpoints ))
722 throw
723 GeographicLib::GeographicErr("Bad number of points or tree size");
724 for (int i = 0; i < int(tree.size()); ++i)
725 tree[i].Check(numpoints, int(tree.size()), bucket);
726 _tree.swap(tree);
727 _numpoints = numpoints;
728 _bucket = bucket;
729 _mc = _sc = 0;
730 _cost = cost; _c1 = _k = _cmax = 0;
731 _cmin = std::numeric_limits<int>::max();
732 }
733 template<class Archive>
734 void serialize(Archive& ar, const unsigned int file_version)
735 { boost::serialization::split_member(ar, *this, file_version); }
736#endif
737
738 int _numpoints, _bucket, _cost;
739 std::vector<Node> _tree;
740 // Counters to track stastistics on the cost of searches
741 mutable double _mc, _sc;
742 mutable int _c1, _k, _cmin, _cmax;
743
744 int init(const std::vector<pos_t>& pts, const distfun_t& dist, int bucket,
745 std::vector<Node>& tree, std::vector<item>& ids, int& cost,
746 int l, int u, int vp) {
747
748 if (u == l)
749 return -1;
750 Node node;
751
752 if (u - l > (bucket == 0 ? 1 : bucket)) {
753
754 // choose a vantage point and move it to the start
755 int i = vp;
756 std::swap(ids[l], ids[i]);
757
758 int m = (u + l + 1) / 2;
759
760 for (int k = l + 1; k < u; ++k) {
761 ids[k].first = dist(pts[ids[l].second], pts[ids[k].second]);
762 ++cost;
763 }
764 // partition around the median distance
765 std::nth_element(ids.begin() + l + 1,
766 ids.begin() + m,
767 ids.begin() + u);
768 node.index = ids[l].second;
769 if (m > l + 1) { // node.child[0] is possibly empty
770 typename std::vector<item>::iterator
771 t = std::min_element(ids.begin() + l + 1, ids.begin() + m);
772 node.data.lower[0] = t->first;
773 t = std::max_element(ids.begin() + l + 1, ids.begin() + m);
774 node.data.upper[0] = t->first;
775 // Use point with max distance as vantage point; this point act as a
776 // "corner" point and leads to a good partition.
777 node.data.child[0] = init(pts, dist, bucket, tree, ids, cost,
778 l + 1, m, int(t - ids.begin()));
779 }
780 typename std::vector<item>::iterator
781 t = std::max_element(ids.begin() + m, ids.begin() + u);
782 node.data.lower[1] = ids[m].first;
783 node.data.upper[1] = t->first;
784 // Use point with max distance as vantage point here too
785 node.data.child[1] = init(pts, dist, bucket, tree, ids, cost,
786 m, u, int(t - ids.begin()));
787 } else {
788 if (bucket == 0)
789 node.index = ids[l].second;
790 else {
791 node.index = -1;
792 // Sort the bucket entries so that the tree is independent of the
793 // implementation of nth_element.
794 std::sort(ids.begin() + l, ids.begin() + u);
795 for (int i = l; i < u; ++i)
796 node.leaves[i-l] = ids[i].second;
797 for (int i = u - l; i < bucket; ++i)
798 node.leaves[i] = -1;
799 for (int i = bucket; i < maxbucket; ++i)
800 node.leaves[i] = 0;
801 }
802 }
803
804 tree.push_back(node);
805 return int(tree.size()) - 1;
806 }
807
808 };
809
810} // namespace GeographicLib
811
812namespace std {
813
814 /**
815 * Swap two GeographicLib::NearestNeighbor objects.
816 *
817 * @tparam dist_t the type used for measuring distances.
818 * @tparam pos_t the type for specifying the positions of points.
819 * @tparam distfun_t the type for a function object which calculates
820 * distances between points.
821 * @param[in,out] a the first GeographicLib::NearestNeighbor to swap.
822 * @param[in,out] b the second GeographicLib::NearestNeighbor to swap.
823 **********************************************************************/
824 template<typename dist_t, typename pos_t, class distfun_t>
827 a.swap(b);
828 }
829
830} // namespace std
831
832#if defined(_MSC_VER)
833# pragma warning (pop)
834#endif
835
836#endif // GEOGRAPHICLIB_NEARESTNEIGHBOR_HPP
Header for GeographicLib::Constants class.
Exception handling for GeographicLib.
Definition: Constants.hpp:316
Nearest-neighbor calculations.
void Statistics(int &setupcost, int &numsearches, int &searchcost, int &mincost, int &maxcost, double &mean, double &sd) const
friend std::istream & operator>>(std::istream &is, NearestNeighbor &t)
dist_t Search(const std::vector< pos_t > &pts, const distfun_t &dist, const pos_t &query, std::vector< int > &ind, int k=1, dist_t maxdist=std::numeric_limits< dist_t >::max(), dist_t mindist=-1, bool exhaustive=true, dist_t tol=0) const
void Initialize(const std::vector< pos_t > &pts, const distfun_t &dist, int bucket=4)
void Load(std::istream &is, bool bin=true)
void Save(std::ostream &os, bool bin=true) const
NearestNeighbor(const std::vector< pos_t > &pts, const distfun_t &dist, int bucket=4)
void swap(NearestNeighbor &t)
friend std::ostream & operator<<(std::ostream &os, const NearestNeighbor &t)
Namespace for GeographicLib.
Definition: Accumulator.cpp:12
void swap(GeographicLib::NearestNeighbor< dist_t, pos_t, distfun_t > &a, GeographicLib::NearestNeighbor< dist_t, pos_t, distfun_t > &b)