|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+//! Speed up searches by using an index
|
|
|
2
|
+
|
|
|
3
|
+use error::Result;
|
|
|
4
|
+use std::fs::File;
|
|
|
5
|
+use std::io::{Read, Seek};
|
|
|
6
|
+use std::ops::RangeInclusive;
|
|
|
7
|
+use std::path::Path;
|
|
|
8
|
+use {BlobReader, BlobType, ByteOffset, Element, Way};
|
|
|
9
|
+
|
|
|
10
|
+/// Stores the minimum and maximum id of every element type.
|
|
|
11
|
+#[derive(Debug)]
|
|
|
12
|
+pub struct IdRanges {
|
|
|
13
|
+ node_ids: Option<RangeInclusive<i64>>,
|
|
|
14
|
+ way_ids: Option<RangeInclusive<i64>>,
|
|
|
15
|
+ relation_ids: Option<RangeInclusive<i64>>,
|
|
|
16
|
+}
|
|
|
17
|
+
|
|
|
18
|
+/// Checks if `sorted_slice` contains some values from the given `range`.
|
|
|
19
|
+/// Assumes that `sorted_slice` is sorted.
|
|
|
20
|
+/// Returns the range of indices into `sorted_slice` that needs to be checked.
|
|
|
21
|
+/// Returns `None` if it is guaranteed that no values from `sorted_slice` are inside `range`.
|
|
|
22
|
+fn range_included(range: &RangeInclusive<i64>, sorted_slice: &[i64]) -> Option<RangeInclusive<usize>> {
|
|
|
23
|
+ match (sorted_slice.binary_search(&range.start()), sorted_slice.binary_search(&range.end())) {
|
|
|
24
|
+ (Ok(start), Ok(end)) => Some(RangeInclusive::new(start, end)),
|
|
|
25
|
+ (Ok(start), Err(end)) => Some(RangeInclusive::new(start, end.saturating_sub(1))),
|
|
|
26
|
+ (Err(start), Ok(end)) => Some(RangeInclusive::new(start, end)),
|
|
|
27
|
+ (Err(start), Err(end)) => {
|
|
|
28
|
+ if start == end {
|
|
|
29
|
+ None
|
|
|
30
|
+ } else {
|
|
|
31
|
+ Some(RangeInclusive::new(start, end.saturating_sub(1)))
|
|
|
32
|
+ }
|
|
|
33
|
+ },
|
|
|
34
|
+ }
|
|
|
35
|
+}
|
|
|
36
|
+
|
|
|
37
|
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
|
|
38
|
+enum SimpleBlobType {
|
|
|
39
|
+ Header,
|
|
|
40
|
+ Primitive,
|
|
|
41
|
+ Unknown,
|
|
|
42
|
+}
|
|
|
43
|
+
|
|
|
44
|
+#[derive(Debug)]
|
|
|
45
|
+struct BlobInfo {
|
|
|
46
|
+ offset: ByteOffset,
|
|
|
47
|
+ blob_type: SimpleBlobType,
|
|
|
48
|
+ id_ranges: Option<IdRanges>,
|
|
|
49
|
+}
|
|
|
50
|
+
|
|
|
51
|
+/// Allows filtering elements and iterating over their dependencies.
|
|
|
52
|
+/// It chooses an efficient method for navigating the PBF structure to achieve this in reasonable
|
|
|
53
|
+/// time and with reasonable memory.
|
|
|
54
|
+pub struct IndexedReader<R: Read + Seek> {
|
|
|
55
|
+ reader: BlobReader<R>,
|
|
|
56
|
+ index: Vec<BlobInfo>,
|
|
|
57
|
+}
|
|
|
58
|
+
|
|
|
59
|
+impl<R: Read + Seek> IndexedReader<R> {
|
|
|
60
|
+ /// Creates a new `IndexedReader`.
|
|
|
61
|
+ ///
|
|
|
62
|
+ /// # Example
|
|
|
63
|
+ /// ```
|
|
|
64
|
+ /// use osmpbf::*;
|
|
|
65
|
+ ///
|
|
|
66
|
+ /// # fn foo() -> Result<()> {
|
|
|
67
|
+ /// let f = std::fs::File::open("tests/test.osm.pbf")?;
|
|
|
68
|
+ /// let buf_reader = std::io::BufReader::new(f);
|
|
|
69
|
+ ///
|
|
|
70
|
+ /// let reader = IndexedReader::new(buf_reader)?;
|
|
|
71
|
+ ///
|
|
|
72
|
+ /// # Ok(())
|
|
|
73
|
+ /// # }
|
|
|
74
|
+ /// # foo().unwrap();
|
|
|
75
|
+ /// ```
|
|
|
76
|
+ pub fn new(reader: R) -> Result<Self> {
|
|
|
77
|
+ let reader = BlobReader::new_seekable(reader)?;
|
|
|
78
|
+ Ok(Self {
|
|
|
79
|
+ reader,
|
|
|
80
|
+ index: vec![],
|
|
|
81
|
+ })
|
|
|
82
|
+ }
|
|
|
83
|
+
|
|
|
84
|
+ fn create_index(&mut self) -> Result<()> {
|
|
|
85
|
+ // remove old items
|
|
|
86
|
+ self.index.clear();
|
|
|
87
|
+
|
|
|
88
|
+ for blob in &mut self.reader {
|
|
|
89
|
+ let blob = blob?;
|
|
|
90
|
+ // Reader is seekable, so offset should return Some(ByteOffset)
|
|
|
91
|
+ let offset = blob.offset().unwrap();
|
|
|
92
|
+ let blob_type = match blob.get_type() {
|
|
|
93
|
+ BlobType::OsmHeader => SimpleBlobType::Header,
|
|
|
94
|
+ BlobType::OsmData => SimpleBlobType::Primitive,
|
|
|
95
|
+ BlobType::Unknown(_) => SimpleBlobType::Unknown,
|
|
|
96
|
+ };
|
|
|
97
|
+
|
|
|
98
|
+ self.index.push(BlobInfo {
|
|
|
99
|
+ offset,
|
|
|
100
|
+ blob_type,
|
|
|
101
|
+ id_ranges: None,
|
|
|
102
|
+ });
|
|
|
103
|
+ }
|
|
|
104
|
+
|
|
|
105
|
+ Ok(())
|
|
|
106
|
+ }
|
|
|
107
|
+
|
|
|
108
|
+ /// Filter ways using a closure and return matching ways and their dependent nodes (`Node`s and
|
|
|
109
|
+ /// `DenseNode`s) in another closure.
|
|
|
110
|
+ ///
|
|
|
111
|
+ /// # Example
|
|
|
112
|
+ /// ```
|
|
|
113
|
+ /// use osmpbf::*;
|
|
|
114
|
+ ///
|
|
|
115
|
+ /// # fn foo() -> Result<()> {
|
|
|
116
|
+ /// let mut reader = IndexedReader::from_path("tests/test.osm.pbf")?;
|
|
|
117
|
+ /// let mut ways = 0;
|
|
|
118
|
+ /// let mut nodes = 0;
|
|
|
119
|
+ ///
|
|
|
120
|
+ /// // Filter all ways that are buildings and count their nodes.
|
|
|
121
|
+ /// reader.read_ways_and_deps(
|
|
|
122
|
+ /// |way| {
|
|
|
123
|
+ /// // Filter ways. Return true if tags contain "building": "yes".
|
|
|
124
|
+ /// way.tags().any(|key_value| key_value == ("building", "yes"))
|
|
|
125
|
+ /// },
|
|
|
126
|
+ /// |element| {
|
|
|
127
|
+ /// // Increment counter
|
|
|
128
|
+ /// match element {
|
|
|
129
|
+ /// Element::Way(way) => ways += 1,
|
|
|
130
|
+ /// Element::Node(node) => nodes += 1,
|
|
|
131
|
+ /// Element::DenseNode(dense_node) => nodes += 1,
|
|
|
132
|
+ /// Element::Relation(_) => (), // should not occur
|
|
|
133
|
+ /// }
|
|
|
134
|
+ /// },
|
|
|
135
|
+ /// )?;
|
|
|
136
|
+ ///
|
|
|
137
|
+ /// println!("ways: {}\nnodes: {}", ways, nodes);
|
|
|
138
|
+ ///
|
|
|
139
|
+ /// # assert_eq!(ways, 1);
|
|
|
140
|
+ /// # assert_eq!(nodes, 3);
|
|
|
141
|
+ /// # Ok(())
|
|
|
142
|
+ /// # }
|
|
|
143
|
+ /// # foo().unwrap();
|
|
|
144
|
+ /// ```
|
|
|
145
|
+ pub fn read_ways_and_deps<F, E>(
|
|
|
146
|
+ &mut self,
|
|
|
147
|
+ mut filter: F,
|
|
|
148
|
+ mut element_callback: E,
|
|
|
149
|
+ ) -> Result<()>
|
|
|
150
|
+ where
|
|
|
151
|
+ F: for<'a> FnMut(&Way<'a>) -> bool,
|
|
|
152
|
+ E: for<'a> FnMut(&Element<'a>),
|
|
|
153
|
+ {
|
|
|
154
|
+ // Create index
|
|
|
155
|
+ if self.index.is_empty() {
|
|
|
156
|
+ self.create_index()?;
|
|
|
157
|
+ }
|
|
|
158
|
+
|
|
|
159
|
+ let mut node_ids: Vec<i64> = vec![];
|
|
|
160
|
+
|
|
|
161
|
+ // First pass:
|
|
|
162
|
+ // * Filter ways and store their dependencies as node IDs
|
|
|
163
|
+ // * Store range of node IDs (min and max value) of each block
|
|
|
164
|
+ for info in &mut self.index {
|
|
|
165
|
+ //TODO do something useful with header blocks
|
|
|
166
|
+ if info.blob_type == SimpleBlobType::Primitive {
|
|
|
167
|
+ self.reader.seek(info.offset)?;
|
|
|
168
|
+ let blob = self.reader.next().ok_or_else(|| {
|
|
|
169
|
+ ::std::io::Error::new(
|
|
|
170
|
+ ::std::io::ErrorKind::UnexpectedEof,
|
|
|
171
|
+ "could not read next blob",
|
|
|
172
|
+ )
|
|
|
173
|
+ })??;
|
|
|
174
|
+ let block = blob.to_primitiveblock()?;
|
|
|
175
|
+ let mut min_node_id: Option<i64> = None;
|
|
|
176
|
+ let mut max_node_id: Option<i64> = None;
|
|
|
177
|
+ for group in block.groups() {
|
|
|
178
|
+ // filter ways and record node IDs
|
|
|
179
|
+ for way in group.ways() {
|
|
|
180
|
+ if filter(&way) {
|
|
|
181
|
+ let refs = way.refs();
|
|
|
182
|
+
|
|
|
183
|
+ node_ids.reserve(refs.size_hint().0);
|
|
|
184
|
+ for node_id in refs {
|
|
|
185
|
+ node_ids.push(node_id);
|
|
|
186
|
+ }
|
|
|
187
|
+
|
|
|
188
|
+ // Return way
|
|
|
189
|
+ element_callback(&Element::Way(way));
|
|
|
190
|
+ }
|
|
|
191
|
+ }
|
|
|
192
|
+
|
|
|
193
|
+ // Check node IDs of this block, record min and max
|
|
|
194
|
+
|
|
|
195
|
+ let mut check_min_max = |id| {
|
|
|
196
|
+ min_node_id = Some(min_node_id.map_or(id, |x| x.min(id)));
|
|
|
197
|
+ max_node_id = Some(max_node_id.map_or(id, |x| x.max(id)));
|
|
|
198
|
+ };
|
|
|
199
|
+
|
|
|
200
|
+ for node in group.nodes() {
|
|
|
201
|
+ check_min_max(node.id())
|
|
|
202
|
+ }
|
|
|
203
|
+ for node in group.dense_nodes() {
|
|
|
204
|
+ check_min_max(node.id)
|
|
|
205
|
+ }
|
|
|
206
|
+ }
|
|
|
207
|
+ if let (Some(min), Some(max)) = (min_node_id, max_node_id) {
|
|
|
208
|
+ info.id_ranges = Some(IdRanges {
|
|
|
209
|
+ node_ids: Some(RangeInclusive::new(min, max)),
|
|
|
210
|
+ way_ids: None,
|
|
|
211
|
+ relation_ids: None,
|
|
|
212
|
+ });
|
|
|
213
|
+ }
|
|
|
214
|
+ }
|
|
|
215
|
+ }
|
|
|
216
|
+
|
|
|
217
|
+ // Sort, to enable binary search
|
|
|
218
|
+ node_ids.sort_unstable();
|
|
|
219
|
+
|
|
|
220
|
+ // Remove duplicate node IDs
|
|
|
221
|
+ node_ids.dedup();
|
|
|
222
|
+
|
|
|
223
|
+ // Second pass:
|
|
|
224
|
+ // * Iterate only over blobs that may include the node IDs we're searching for
|
|
|
225
|
+ for info in &mut self.index {
|
|
|
226
|
+ if info.blob_type == SimpleBlobType::Primitive {
|
|
|
227
|
+ if let Some(node_id_range) = info.id_ranges.as_ref().and_then(|r| r.node_ids.as_ref()) {
|
|
|
228
|
+ if let Some(slice_range) = range_included(node_id_range, &node_ids) {
|
|
|
229
|
+ let ids_subslice = &node_ids.as_slice()[slice_range];
|
|
|
230
|
+
|
|
|
231
|
+ self.reader.seek(info.offset)?;
|
|
|
232
|
+ let blob = self.reader.next().ok_or_else(|| {
|
|
|
233
|
+ ::std::io::Error::new(
|
|
|
234
|
+ ::std::io::ErrorKind::UnexpectedEof,
|
|
|
235
|
+ "could not read next blob",
|
|
|
236
|
+ )
|
|
|
237
|
+ })??;
|
|
|
238
|
+ let block = blob.to_primitiveblock()?;
|
|
|
239
|
+ for group in block.groups() {
|
|
|
240
|
+ for node in group.nodes() {
|
|
|
241
|
+ let id = node.id();
|
|
|
242
|
+ if ids_subslice.binary_search(&id).is_ok() {
|
|
|
243
|
+ // ID found, return node
|
|
|
244
|
+ element_callback(&Element::Node(node));
|
|
|
245
|
+ }
|
|
|
246
|
+ }
|
|
|
247
|
+ for node in group.dense_nodes() {
|
|
|
248
|
+ let id = node.id;
|
|
|
249
|
+ if ids_subslice.binary_search(&id).is_ok() {
|
|
|
250
|
+ // ID found, return dense node
|
|
|
251
|
+ element_callback(&Element::DenseNode(node));
|
|
|
252
|
+ }
|
|
|
253
|
+ }
|
|
|
254
|
+ }
|
|
|
255
|
+ }
|
|
|
256
|
+ }
|
|
|
257
|
+ }
|
|
|
258
|
+ }
|
|
|
259
|
+
|
|
|
260
|
+ Ok(())
|
|
|
261
|
+ }
|
|
|
262
|
+}
|
|
|
263
|
+
|
|
|
264
|
+impl IndexedReader<File> {
|
|
|
265
|
+ /// Creates a new `IndexedReader` from a given path.
|
|
|
266
|
+ ///
|
|
|
267
|
+ /// # Example
|
|
|
268
|
+ /// ```
|
|
|
269
|
+ /// use osmpbf::*;
|
|
|
270
|
+ ///
|
|
|
271
|
+ /// # fn foo() -> Result<()> {
|
|
|
272
|
+ /// let reader = IndexedReader::from_path("tests/test.osm.pbf")?;
|
|
|
273
|
+ ///
|
|
|
274
|
+ /// # Ok(())
|
|
|
275
|
+ /// # }
|
|
|
276
|
+ /// # foo().unwrap();
|
|
|
277
|
+ /// ```
|
|
|
278
|
+ pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
|
|
|
279
|
+ //TODO take some more measurements to determine if `BufReader` should be used here
|
|
|
280
|
+ let f = File::open(path)?;
|
|
|
281
|
+ Self::new(f)
|
|
|
282
|
+ }
|
|
|
283
|
+}
|
|
|
284
|
+
|
|
|
285
|
+#[cfg(test)]
|
|
|
286
|
+mod tests {
|
|
|
287
|
+ use super::*;
|
|
|
288
|
+
|
|
|
289
|
+ #[test]
|
|
|
290
|
+ fn test_range_included() {
|
|
|
291
|
+ assert_eq!(range_included(&RangeInclusive::new(0, 0), &[1,2,3]), None);
|
|
|
292
|
+ assert_eq!(range_included(&RangeInclusive::new(1, 1), &[1,2,3]), Some(RangeInclusive::new(0, 0)));
|
|
|
293
|
+ assert_eq!(range_included(&RangeInclusive::new(2, 2), &[1,2,3]), Some(RangeInclusive::new(1, 1)));
|
|
|
294
|
+ assert_eq!(range_included(&RangeInclusive::new(3, 3), &[1,2,3]), Some(RangeInclusive::new(2, 2)));
|
|
|
295
|
+ assert_eq!(range_included(&RangeInclusive::new(4, 4), &[1,2,3]), None);
|
|
|
296
|
+ assert_eq!(range_included(&RangeInclusive::new(0, 1), &[1,2,3]), Some(RangeInclusive::new(0, 0)));
|
|
|
297
|
+ assert_eq!(range_included(&RangeInclusive::new(3, 4), &[1,2,3]), Some(RangeInclusive::new(2, 2)));
|
|
|
298
|
+ assert_eq!(range_included(&RangeInclusive::new(4, 4), &[1,2,6]), None);
|
|
|
299
|
+ assert_eq!(range_included(&RangeInclusive::new(2, 3), &[1,2,6]), Some(RangeInclusive::new(1, 1)));
|
|
|
300
|
+ assert_eq!(range_included(&RangeInclusive::new(5, 6), &[1,2,6]), Some(RangeInclusive::new(2, 2)));
|
|
|
301
|
+ assert_eq!(range_included(&RangeInclusive::new(5, 8), &[1,2,6]), Some(RangeInclusive::new(2, 2)));
|
|
|
302
|
+ assert_eq!(range_included(&RangeInclusive::new(0, 8), &[1,2,6]), Some(RangeInclusive::new(0, 2)));
|
|
|
303
|
+ assert_eq!(range_included(&RangeInclusive::new(0, 4), &[1,2,6]), Some(RangeInclusive::new(0, 1)));
|
|
|
304
|
+ }
|
|
|
305
|
+}
|