Explorar el Código

Use BTreeSet to check node IDs in IndexedReader

There is a performance penalty (+13%) but this collection enables cheap
deletions.
Johannes Hofmann hace 5 años
padre
commit
bd0846d555
Se han modificado 1 ficheros con 28 adiciones y 49 borrados
  1. 28
    49
      src/indexed.rs

+ 28
- 49
src/indexed.rs Ver fichero

@@ -1,6 +1,7 @@
1 1
 //! Speed up searches by using an index
2 2
 
3 3
 use error::Result;
4
+use std::collections::BTreeSet;
4 5
 use std::fs::File;
5 6
 use std::io::{Read, Seek};
6 7
 use std::ops::RangeInclusive;
@@ -15,23 +16,9 @@ pub struct IdRanges {
15 16
     relation_ids: Option<RangeInclusive<i64>>,
16 17
 }
17 18
 
18
-/// Checks if `sorted_slice` contains some values from the given `range`.
19
-/// Assumes that `sorted_slice` is sorted.
20
-/// Returns the range of indices into `sorted_slice` that needs to be checked.
21
-/// Returns `None` if it is guaranteed that no values from `sorted_slice` are inside `range`.
22
-fn range_included(range: &RangeInclusive<i64>, sorted_slice: &[i64]) -> Option<RangeInclusive<usize>> {
23
-    match (sorted_slice.binary_search(&range.start()), sorted_slice.binary_search(&range.end())) {
24
-        (Ok(start), Ok(end)) => Some(RangeInclusive::new(start, end)),
25
-        (Ok(start), Err(end)) => Some(RangeInclusive::new(start, end.saturating_sub(1))),
26
-        (Err(start), Ok(end)) => Some(RangeInclusive::new(start, end)),
27
-        (Err(start), Err(end)) => {
28
-            if start == end {
29
-                None
30
-            } else {
31
-                Some(RangeInclusive::new(start, end.saturating_sub(1)))
32
-            }
33
-        },
34
-    }
19
+/// Returns true if the given set contains at least one value that is inside the given range.
20
+fn range_included(range: RangeInclusive<i64>, node_ids: &BTreeSet<i64>) -> bool {
21
+    node_ids.range(range).next().is_some()
35 22
 }
36 23
 
37 24
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
@@ -156,7 +143,7 @@ impl<R: Read + Seek> IndexedReader<R> {
156 143
             self.create_index()?;
157 144
         }
158 145
 
159
-        let mut node_ids: Vec<i64> = vec![];
146
+        let mut node_ids: BTreeSet<i64> = BTreeSet::new();
160 147
 
161 148
         // First pass:
162 149
         //   * Filter ways and store their dependencies as node IDs
@@ -180,10 +167,7 @@ impl<R: Read + Seek> IndexedReader<R> {
180 167
                         if filter(&way) {
181 168
                             let refs = way.refs();
182 169
 
183
-                            node_ids.reserve(refs.size_hint().0);
184
-                            for node_id in refs {
185
-                                node_ids.push(node_id);
186
-                            }
170
+                            node_ids.extend(refs);
187 171
 
188 172
                             // Return way
189 173
                             element_callback(&Element::Way(way));
@@ -214,20 +198,12 @@ impl<R: Read + Seek> IndexedReader<R> {
214 198
             }
215 199
         }
216 200
 
217
-        // Sort, to enable binary search
218
-        node_ids.sort_unstable();
219
-
220
-        // Remove duplicate node IDs
221
-        node_ids.dedup();
222
-
223 201
         // Second pass:
224 202
         //   * Iterate only over blobs that may include the node IDs we're searching for
225 203
         for info in &mut self.index {
226 204
             if info.blob_type == SimpleBlobType::Primitive {
227 205
                 if let Some(node_id_range) = info.id_ranges.as_ref().and_then(|r| r.node_ids.as_ref()) {
228
-                    if let Some(slice_range) = range_included(node_id_range, &node_ids) {
229
-                        let ids_subslice = &node_ids.as_slice()[slice_range];
230
-
206
+                    if range_included(node_id_range.clone(), &node_ids) {
231 207
                         self.reader.seek(info.offset)?;
232 208
                         let blob = self.reader.next().ok_or_else(|| {
233 209
                             ::std::io::Error::new(
@@ -238,15 +214,13 @@ impl<R: Read + Seek> IndexedReader<R> {
238 214
                         let block = blob.to_primitiveblock()?;
239 215
                         for group in block.groups() {
240 216
                             for node in group.nodes() {
241
-                                let id = node.id();
242
-                                if ids_subslice.binary_search(&id).is_ok() {
217
+                                if node_ids.contains(&node.id()) {
243 218
                                     // ID found, return node
244 219
                                     element_callback(&Element::Node(node));
245 220
                                 }
246 221
                             }
247 222
                             for node in group.dense_nodes() {
248
-                                let id = node.id;
249
-                                if ids_subslice.binary_search(&id).is_ok() {
223
+                                if node_ids.contains(&node.id) {
250 224
                                     // ID found, return dense node
251 225
                                     element_callback(&Element::DenseNode(node));
252 226
                                 }
@@ -287,19 +261,24 @@ mod tests {
287 261
     use super::*;
288 262
 
289 263
     #[test]
290
-    fn test_range_included() {
291
-        assert_eq!(range_included(&RangeInclusive::new(0, 0), &[1,2,3]), None);
292
-        assert_eq!(range_included(&RangeInclusive::new(1, 1), &[1,2,3]), Some(RangeInclusive::new(0, 0)));
293
-        assert_eq!(range_included(&RangeInclusive::new(2, 2), &[1,2,3]), Some(RangeInclusive::new(1, 1)));
294
-        assert_eq!(range_included(&RangeInclusive::new(3, 3), &[1,2,3]), Some(RangeInclusive::new(2, 2)));
295
-        assert_eq!(range_included(&RangeInclusive::new(4, 4), &[1,2,3]), None);
296
-        assert_eq!(range_included(&RangeInclusive::new(0, 1), &[1,2,3]), Some(RangeInclusive::new(0, 0)));
297
-        assert_eq!(range_included(&RangeInclusive::new(3, 4), &[1,2,3]), Some(RangeInclusive::new(2, 2)));
298
-        assert_eq!(range_included(&RangeInclusive::new(4, 4), &[1,2,6]), None);
299
-        assert_eq!(range_included(&RangeInclusive::new(2, 3), &[1,2,6]), Some(RangeInclusive::new(1, 1)));
300
-        assert_eq!(range_included(&RangeInclusive::new(5, 6), &[1,2,6]), Some(RangeInclusive::new(2, 2)));
301
-        assert_eq!(range_included(&RangeInclusive::new(5, 8), &[1,2,6]), Some(RangeInclusive::new(2, 2)));
302
-        assert_eq!(range_included(&RangeInclusive::new(0, 8), &[1,2,6]), Some(RangeInclusive::new(0, 2)));
303
-        assert_eq!(range_included(&RangeInclusive::new(0, 4), &[1,2,6]), Some(RangeInclusive::new(0, 1)));
264
+    fn test_range_included_set() {
265
+        let mut set = BTreeSet::<i64>::new();
266
+        set.extend(&[1,2,6]);
267
+
268
+        assert_eq!(range_included(RangeInclusive::new(0, 0), &set), false);
269
+        assert_eq!(range_included(RangeInclusive::new(1, 1), &set), true);
270
+        assert_eq!(range_included(RangeInclusive::new(2, 2), &set), true);
271
+        assert_eq!(range_included(RangeInclusive::new(3, 3), &set), false);
272
+        assert_eq!(range_included(RangeInclusive::new(3, 5), &set), false);
273
+        assert_eq!(range_included(RangeInclusive::new(3, 6), &set), true);
274
+        assert_eq!(range_included(RangeInclusive::new(6, 6), &set), true);
275
+        assert_eq!(range_included(RangeInclusive::new(7, 7), &set), false);
276
+        assert_eq!(range_included(RangeInclusive::new(0, 1), &set), true);
277
+        assert_eq!(range_included(RangeInclusive::new(6, 7), &set), true);
278
+        assert_eq!(range_included(RangeInclusive::new(2, 3), &set), true);
279
+        assert_eq!(range_included(RangeInclusive::new(5, 6), &set), true);
280
+        assert_eq!(range_included(RangeInclusive::new(5, 8), &set), true);
281
+        assert_eq!(range_included(RangeInclusive::new(0, 8), &set), true);
282
+        assert_eq!(range_included(RangeInclusive::new(0, 4), &set), true);
304 283
     }
305 284
 }