浏览代码

Use BTreeSet to check node IDs in IndexedReader

There is a performance penalty (+13%) but this collection enables cheap
deletions.
Johannes Hofmann 5 年前
父节点
当前提交
bd0846d555
共有 1 个文件被更改,包括 28 次插入49 次删除
  1. 28
    49
      src/indexed.rs

+ 28
- 49
src/indexed.rs 查看文件

1
 //! Speed up searches by using an index
1
 //! Speed up searches by using an index
2
 
2
 
3
 use error::Result;
3
 use error::Result;
4
+use std::collections::BTreeSet;
4
 use std::fs::File;
5
 use std::fs::File;
5
 use std::io::{Read, Seek};
6
 use std::io::{Read, Seek};
6
 use std::ops::RangeInclusive;
7
 use std::ops::RangeInclusive;
15
     relation_ids: Option<RangeInclusive<i64>>,
16
     relation_ids: Option<RangeInclusive<i64>>,
16
 }
17
 }
17
 
18
 
18
-/// Checks if `sorted_slice` contains some values from the given `range`.
19
-/// Assumes that `sorted_slice` is sorted.
20
-/// Returns the range of indices into `sorted_slice` that needs to be checked.
21
-/// Returns `None` if it is guaranteed that no values from `sorted_slice` are inside `range`.
22
-fn range_included(range: &RangeInclusive<i64>, sorted_slice: &[i64]) -> Option<RangeInclusive<usize>> {
23
-    match (sorted_slice.binary_search(&range.start()), sorted_slice.binary_search(&range.end())) {
24
-        (Ok(start), Ok(end)) => Some(RangeInclusive::new(start, end)),
25
-        (Ok(start), Err(end)) => Some(RangeInclusive::new(start, end.saturating_sub(1))),
26
-        (Err(start), Ok(end)) => Some(RangeInclusive::new(start, end)),
27
-        (Err(start), Err(end)) => {
28
-            if start == end {
29
-                None
30
-            } else {
31
-                Some(RangeInclusive::new(start, end.saturating_sub(1)))
32
-            }
33
-        },
34
-    }
19
+/// Returns true if the given set contains at least one value that is inside the given range.
20
+fn range_included(range: RangeInclusive<i64>, node_ids: &BTreeSet<i64>) -> bool {
21
+    node_ids.range(range).next().is_some()
35
 }
22
 }
36
 
23
 
37
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
24
 #[derive(Clone, Copy, Debug, Eq, PartialEq)]
156
             self.create_index()?;
143
             self.create_index()?;
157
         }
144
         }
158
 
145
 
159
-        let mut node_ids: Vec<i64> = vec![];
146
+        let mut node_ids: BTreeSet<i64> = BTreeSet::new();
160
 
147
 
161
         // First pass:
148
         // First pass:
162
         //   * Filter ways and store their dependencies as node IDs
149
         //   * Filter ways and store their dependencies as node IDs
180
                         if filter(&way) {
167
                         if filter(&way) {
181
                             let refs = way.refs();
168
                             let refs = way.refs();
182
 
169
 
183
-                            node_ids.reserve(refs.size_hint().0);
184
-                            for node_id in refs {
185
-                                node_ids.push(node_id);
186
-                            }
170
+                            node_ids.extend(refs);
187
 
171
 
188
                             // Return way
172
                             // Return way
189
                             element_callback(&Element::Way(way));
173
                             element_callback(&Element::Way(way));
214
             }
198
             }
215
         }
199
         }
216
 
200
 
217
-        // Sort, to enable binary search
218
-        node_ids.sort_unstable();
219
-
220
-        // Remove duplicate node IDs
221
-        node_ids.dedup();
222
-
223
         // Second pass:
201
         // Second pass:
224
         //   * Iterate only over blobs that may include the node IDs we're searching for
202
         //   * Iterate only over blobs that may include the node IDs we're searching for
225
         for info in &mut self.index {
203
         for info in &mut self.index {
226
             if info.blob_type == SimpleBlobType::Primitive {
204
             if info.blob_type == SimpleBlobType::Primitive {
227
                 if let Some(node_id_range) = info.id_ranges.as_ref().and_then(|r| r.node_ids.as_ref()) {
205
                 if let Some(node_id_range) = info.id_ranges.as_ref().and_then(|r| r.node_ids.as_ref()) {
228
-                    if let Some(slice_range) = range_included(node_id_range, &node_ids) {
229
-                        let ids_subslice = &node_ids.as_slice()[slice_range];
230
-
206
+                    if range_included(node_id_range.clone(), &node_ids) {
231
                         self.reader.seek(info.offset)?;
207
                         self.reader.seek(info.offset)?;
232
                         let blob = self.reader.next().ok_or_else(|| {
208
                         let blob = self.reader.next().ok_or_else(|| {
233
                             ::std::io::Error::new(
209
                             ::std::io::Error::new(
238
                         let block = blob.to_primitiveblock()?;
214
                         let block = blob.to_primitiveblock()?;
239
                         for group in block.groups() {
215
                         for group in block.groups() {
240
                             for node in group.nodes() {
216
                             for node in group.nodes() {
241
-                                let id = node.id();
242
-                                if ids_subslice.binary_search(&id).is_ok() {
217
+                                if node_ids.contains(&node.id()) {
243
                                     // ID found, return node
218
                                     // ID found, return node
244
                                     element_callback(&Element::Node(node));
219
                                     element_callback(&Element::Node(node));
245
                                 }
220
                                 }
246
                             }
221
                             }
247
                             for node in group.dense_nodes() {
222
                             for node in group.dense_nodes() {
248
-                                let id = node.id;
249
-                                if ids_subslice.binary_search(&id).is_ok() {
223
+                                if node_ids.contains(&node.id) {
250
                                     // ID found, return dense node
224
                                     // ID found, return dense node
251
                                     element_callback(&Element::DenseNode(node));
225
                                     element_callback(&Element::DenseNode(node));
252
                                 }
226
                                 }
287
     use super::*;
261
     use super::*;
288
 
262
 
289
     #[test]
263
     #[test]
290
-    fn test_range_included() {
291
-        assert_eq!(range_included(&RangeInclusive::new(0, 0), &[1,2,3]), None);
292
-        assert_eq!(range_included(&RangeInclusive::new(1, 1), &[1,2,3]), Some(RangeInclusive::new(0, 0)));
293
-        assert_eq!(range_included(&RangeInclusive::new(2, 2), &[1,2,3]), Some(RangeInclusive::new(1, 1)));
294
-        assert_eq!(range_included(&RangeInclusive::new(3, 3), &[1,2,3]), Some(RangeInclusive::new(2, 2)));
295
-        assert_eq!(range_included(&RangeInclusive::new(4, 4), &[1,2,3]), None);
296
-        assert_eq!(range_included(&RangeInclusive::new(0, 1), &[1,2,3]), Some(RangeInclusive::new(0, 0)));
297
-        assert_eq!(range_included(&RangeInclusive::new(3, 4), &[1,2,3]), Some(RangeInclusive::new(2, 2)));
298
-        assert_eq!(range_included(&RangeInclusive::new(4, 4), &[1,2,6]), None);
299
-        assert_eq!(range_included(&RangeInclusive::new(2, 3), &[1,2,6]), Some(RangeInclusive::new(1, 1)));
300
-        assert_eq!(range_included(&RangeInclusive::new(5, 6), &[1,2,6]), Some(RangeInclusive::new(2, 2)));
301
-        assert_eq!(range_included(&RangeInclusive::new(5, 8), &[1,2,6]), Some(RangeInclusive::new(2, 2)));
302
-        assert_eq!(range_included(&RangeInclusive::new(0, 8), &[1,2,6]), Some(RangeInclusive::new(0, 2)));
303
-        assert_eq!(range_included(&RangeInclusive::new(0, 4), &[1,2,6]), Some(RangeInclusive::new(0, 1)));
264
+    fn test_range_included_set() {
265
+        let mut set = BTreeSet::<i64>::new();
266
+        set.extend(&[1,2,6]);
267
+
268
+        assert_eq!(range_included(RangeInclusive::new(0, 0), &set), false);
269
+        assert_eq!(range_included(RangeInclusive::new(1, 1), &set), true);
270
+        assert_eq!(range_included(RangeInclusive::new(2, 2), &set), true);
271
+        assert_eq!(range_included(RangeInclusive::new(3, 3), &set), false);
272
+        assert_eq!(range_included(RangeInclusive::new(3, 5), &set), false);
273
+        assert_eq!(range_included(RangeInclusive::new(3, 6), &set), true);
274
+        assert_eq!(range_included(RangeInclusive::new(6, 6), &set), true);
275
+        assert_eq!(range_included(RangeInclusive::new(7, 7), &set), false);
276
+        assert_eq!(range_included(RangeInclusive::new(0, 1), &set), true);
277
+        assert_eq!(range_included(RangeInclusive::new(6, 7), &set), true);
278
+        assert_eq!(range_included(RangeInclusive::new(2, 3), &set), true);
279
+        assert_eq!(range_included(RangeInclusive::new(5, 6), &set), true);
280
+        assert_eq!(range_included(RangeInclusive::new(5, 8), &set), true);
281
+        assert_eq!(range_included(RangeInclusive::new(0, 8), &set), true);
282
+        assert_eq!(range_included(RangeInclusive::new(0, 4), &set), true);
304
     }
283
     }
305
 }
284
 }