|
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+//! Speed up searches by using an index
|
|
|
2
|
+
|
|
|
3
|
+use error::Result;
|
|
|
4
|
+use std::collections::BTreeSet;
|
|
|
5
|
+use std::fs::File;
|
|
|
6
|
+use std::io::{Read, Seek};
|
|
|
7
|
+use std::ops::RangeInclusive;
|
|
|
8
|
+use std::path::Path;
|
|
|
9
|
+use {BlobReader, BlobType, ByteOffset, Element, Way};
|
|
|
10
|
+
|
|
|
11
|
+/// Stores the minimum and maximum id of every element type.
|
|
|
12
|
+#[derive(Debug)]
|
|
|
13
|
+pub struct IdRanges {
|
|
|
14
|
+ node_ids: Option<RangeInclusive<i64>>,
|
|
|
15
|
+ way_ids: Option<RangeInclusive<i64>>,
|
|
|
16
|
+ relation_ids: Option<RangeInclusive<i64>>,
|
|
|
17
|
+}
|
|
|
18
|
+
|
|
|
19
|
+/// Returns true if the given set contains at least one value that is inside the given range.
|
|
|
20
|
+fn range_included(range: RangeInclusive<i64>, node_ids: &BTreeSet<i64>) -> bool {
|
|
|
21
|
+ node_ids.range(range).next().is_some()
|
|
|
22
|
+}
|
|
|
23
|
+
|
|
|
24
|
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
|
|
|
25
|
+enum SimpleBlobType {
|
|
|
26
|
+ Header,
|
|
|
27
|
+ Primitive,
|
|
|
28
|
+ Unknown,
|
|
|
29
|
+}
|
|
|
30
|
+
|
|
|
31
|
+#[derive(Debug)]
|
|
|
32
|
+struct BlobInfo {
|
|
|
33
|
+ offset: ByteOffset,
|
|
|
34
|
+ blob_type: SimpleBlobType,
|
|
|
35
|
+ id_ranges: Option<IdRanges>,
|
|
|
36
|
+}
|
|
|
37
|
+
|
|
|
38
|
+/// Allows filtering elements and iterating over their dependencies.
|
|
|
39
|
+/// It chooses an efficient method for navigating the PBF structure to achieve this in reasonable
|
|
|
40
|
+/// time and with reasonable memory.
|
|
|
41
|
+pub struct IndexedReader<R: Read + Seek> {
|
|
|
42
|
+ reader: BlobReader<R>,
|
|
|
43
|
+ index: Vec<BlobInfo>,
|
|
|
44
|
+}
|
|
|
45
|
+
|
|
|
46
|
+impl<R: Read + Seek> IndexedReader<R> {
|
|
|
47
|
+ /// Creates a new `IndexedReader`.
|
|
|
48
|
+ ///
|
|
|
49
|
+ /// # Example
|
|
|
50
|
+ /// ```
|
|
|
51
|
+ /// use osmpbf::*;
|
|
|
52
|
+ ///
|
|
|
53
|
+ /// # fn foo() -> Result<()> {
|
|
|
54
|
+ /// let f = std::fs::File::open("tests/test.osm.pbf")?;
|
|
|
55
|
+ /// let buf_reader = std::io::BufReader::new(f);
|
|
|
56
|
+ ///
|
|
|
57
|
+ /// let reader = IndexedReader::new(buf_reader)?;
|
|
|
58
|
+ ///
|
|
|
59
|
+ /// # Ok(())
|
|
|
60
|
+ /// # }
|
|
|
61
|
+ /// # foo().unwrap();
|
|
|
62
|
+ /// ```
|
|
|
63
|
+ pub fn new(reader: R) -> Result<Self> {
|
|
|
64
|
+ let reader = BlobReader::new_seekable(reader)?;
|
|
|
65
|
+ Ok(Self {
|
|
|
66
|
+ reader,
|
|
|
67
|
+ index: vec![],
|
|
|
68
|
+ })
|
|
|
69
|
+ }
|
|
|
70
|
+
|
|
|
71
|
+ pub fn create_index(&mut self) -> Result<()> {
|
|
|
72
|
+ // remove old items
|
|
|
73
|
+ self.index.clear();
|
|
|
74
|
+
|
|
|
75
|
+ while let Some(result) = self.reader.next_header_skip_blob() {
|
|
|
76
|
+ let (header, offset) = result?;
|
|
|
77
|
+ // Reader is seekable, so offset should be Some(ByteOffset)
|
|
|
78
|
+ let offset = offset.unwrap();
|
|
|
79
|
+ let blob_type = match header.blob_type() {
|
|
|
80
|
+ BlobType::OsmHeader => SimpleBlobType::Header,
|
|
|
81
|
+ BlobType::OsmData => SimpleBlobType::Primitive,
|
|
|
82
|
+ BlobType::Unknown(_) => SimpleBlobType::Unknown,
|
|
|
83
|
+ };
|
|
|
84
|
+
|
|
|
85
|
+ self.index.push(BlobInfo {
|
|
|
86
|
+ offset,
|
|
|
87
|
+ blob_type,
|
|
|
88
|
+ id_ranges: None,
|
|
|
89
|
+ });
|
|
|
90
|
+ }
|
|
|
91
|
+
|
|
|
92
|
+ Ok(())
|
|
|
93
|
+ }
|
|
|
94
|
+
|
|
|
95
|
+ /// Filter ways using a closure and return matching ways and their dependent nodes (`Node`s and
|
|
|
96
|
+ /// `DenseNode`s) in another closure.
|
|
|
97
|
+ ///
|
|
|
98
|
+ /// # Example
|
|
|
99
|
+ /// ```
|
|
|
100
|
+ /// use osmpbf::*;
|
|
|
101
|
+ ///
|
|
|
102
|
+ /// # fn foo() -> Result<()> {
|
|
|
103
|
+ /// let mut reader = IndexedReader::from_path("tests/test.osm.pbf")?;
|
|
|
104
|
+ /// let mut ways = 0;
|
|
|
105
|
+ /// let mut nodes = 0;
|
|
|
106
|
+ ///
|
|
|
107
|
+ /// // Filter all ways that are buildings and count their nodes.
|
|
|
108
|
+ /// reader.read_ways_and_deps(
|
|
|
109
|
+ /// |way| {
|
|
|
110
|
+ /// // Filter ways. Return true if tags contain "building": "yes".
|
|
|
111
|
+ /// way.tags().any(|key_value| key_value == ("building", "yes"))
|
|
|
112
|
+ /// },
|
|
|
113
|
+ /// |element| {
|
|
|
114
|
+ /// // Increment counter
|
|
|
115
|
+ /// match element {
|
|
|
116
|
+ /// Element::Way(way) => ways += 1,
|
|
|
117
|
+ /// Element::Node(node) => nodes += 1,
|
|
|
118
|
+ /// Element::DenseNode(dense_node) => nodes += 1,
|
|
|
119
|
+ /// Element::Relation(_) => (), // should not occur
|
|
|
120
|
+ /// }
|
|
|
121
|
+ /// },
|
|
|
122
|
+ /// )?;
|
|
|
123
|
+ ///
|
|
|
124
|
+ /// println!("ways: {}\nnodes: {}", ways, nodes);
|
|
|
125
|
+ ///
|
|
|
126
|
+ /// # assert_eq!(ways, 1);
|
|
|
127
|
+ /// # assert_eq!(nodes, 3);
|
|
|
128
|
+ /// # Ok(())
|
|
|
129
|
+ /// # }
|
|
|
130
|
+ /// # foo().unwrap();
|
|
|
131
|
+ /// ```
|
|
|
132
|
+ pub fn read_ways_and_deps<F, E>(
|
|
|
133
|
+ &mut self,
|
|
|
134
|
+ mut filter: F,
|
|
|
135
|
+ mut element_callback: E,
|
|
|
136
|
+ ) -> Result<()>
|
|
|
137
|
+ where
|
|
|
138
|
+ F: for<'a> FnMut(&Way<'a>) -> bool,
|
|
|
139
|
+ E: for<'a> FnMut(&Element<'a>),
|
|
|
140
|
+ {
|
|
|
141
|
+ // Create index
|
|
|
142
|
+ if self.index.is_empty() {
|
|
|
143
|
+ self.create_index()?;
|
|
|
144
|
+ }
|
|
|
145
|
+
|
|
|
146
|
+ let mut node_ids: BTreeSet<i64> = BTreeSet::new();
|
|
|
147
|
+
|
|
|
148
|
+ // First pass:
|
|
|
149
|
+ // * Filter ways and store their dependencies as node IDs
|
|
|
150
|
+ // * Store range of node IDs (min and max value) of each block
|
|
|
151
|
+ for info in &mut self.index {
|
|
|
152
|
+ //TODO do something useful with header blocks
|
|
|
153
|
+ if info.blob_type == SimpleBlobType::Primitive {
|
|
|
154
|
+ self.reader.seek(info.offset)?;
|
|
|
155
|
+ let blob = self.reader.next().ok_or_else(|| {
|
|
|
156
|
+ ::std::io::Error::new(
|
|
|
157
|
+ ::std::io::ErrorKind::UnexpectedEof,
|
|
|
158
|
+ "could not read next blob",
|
|
|
159
|
+ )
|
|
|
160
|
+ })??;
|
|
|
161
|
+ let block = blob.to_primitiveblock()?;
|
|
|
162
|
+ let mut min_node_id: Option<i64> = None;
|
|
|
163
|
+ let mut max_node_id: Option<i64> = None;
|
|
|
164
|
+ for group in block.groups() {
|
|
|
165
|
+ // filter ways and record node IDs
|
|
|
166
|
+ for way in group.ways() {
|
|
|
167
|
+ if filter(&way) {
|
|
|
168
|
+ let refs = way.refs();
|
|
|
169
|
+
|
|
|
170
|
+ node_ids.extend(refs);
|
|
|
171
|
+
|
|
|
172
|
+ // Return way
|
|
|
173
|
+ element_callback(&Element::Way(way));
|
|
|
174
|
+ }
|
|
|
175
|
+ }
|
|
|
176
|
+
|
|
|
177
|
+ // Check node IDs of this block, record min and max
|
|
|
178
|
+
|
|
|
179
|
+ let mut check_min_max = |id| {
|
|
|
180
|
+ min_node_id = Some(min_node_id.map_or(id, |x| x.min(id)));
|
|
|
181
|
+ max_node_id = Some(max_node_id.map_or(id, |x| x.max(id)));
|
|
|
182
|
+ };
|
|
|
183
|
+
|
|
|
184
|
+ for node in group.nodes() {
|
|
|
185
|
+ check_min_max(node.id())
|
|
|
186
|
+ }
|
|
|
187
|
+ for node in group.dense_nodes() {
|
|
|
188
|
+ check_min_max(node.id)
|
|
|
189
|
+ }
|
|
|
190
|
+ }
|
|
|
191
|
+ if let (Some(min), Some(max)) = (min_node_id, max_node_id) {
|
|
|
192
|
+ info.id_ranges = Some(IdRanges {
|
|
|
193
|
+ node_ids: Some(RangeInclusive::new(min, max)),
|
|
|
194
|
+ way_ids: None,
|
|
|
195
|
+ relation_ids: None,
|
|
|
196
|
+ });
|
|
|
197
|
+ }
|
|
|
198
|
+ }
|
|
|
199
|
+ }
|
|
|
200
|
+
|
|
|
201
|
+ // Second pass:
|
|
|
202
|
+ // * Iterate only over blobs that may include the node IDs we're searching for
|
|
|
203
|
+ for info in &mut self.index {
|
|
|
204
|
+ if info.blob_type == SimpleBlobType::Primitive {
|
|
|
205
|
+ if let Some(node_id_range) = info.id_ranges.as_ref().and_then(|r| r.node_ids.as_ref()) {
|
|
|
206
|
+ if range_included(node_id_range.clone(), &node_ids) {
|
|
|
207
|
+ //TODO Only collect into Vec if range has a reasonable size
|
|
|
208
|
+ let node_ids: Vec<i64> = node_ids.range(node_id_range.clone()).map(|x| *x).collect();
|
|
|
209
|
+ self.reader.seek(info.offset)?;
|
|
|
210
|
+ let blob = self.reader.next().ok_or_else(|| {
|
|
|
211
|
+ ::std::io::Error::new(
|
|
|
212
|
+ ::std::io::ErrorKind::UnexpectedEof,
|
|
|
213
|
+ "could not read next blob",
|
|
|
214
|
+ )
|
|
|
215
|
+ })??;
|
|
|
216
|
+ let block = blob.to_primitiveblock()?;
|
|
|
217
|
+ for group in block.groups() {
|
|
|
218
|
+ for node in group.nodes() {
|
|
|
219
|
+ if node_ids.binary_search(&node.id()).is_ok() {
|
|
|
220
|
+ // ID found, return node
|
|
|
221
|
+ element_callback(&Element::Node(node));
|
|
|
222
|
+ }
|
|
|
223
|
+ }
|
|
|
224
|
+ for node in group.dense_nodes() {
|
|
|
225
|
+ if node_ids.binary_search(&node.id).is_ok() {
|
|
|
226
|
+ // ID found, return dense node
|
|
|
227
|
+ element_callback(&Element::DenseNode(node));
|
|
|
228
|
+ }
|
|
|
229
|
+ }
|
|
|
230
|
+ }
|
|
|
231
|
+ }
|
|
|
232
|
+ }
|
|
|
233
|
+ }
|
|
|
234
|
+ }
|
|
|
235
|
+
|
|
|
236
|
+ Ok(())
|
|
|
237
|
+ }
|
|
|
238
|
+}
|
|
|
239
|
+
|
|
|
240
|
+impl IndexedReader<File> {
|
|
|
241
|
+ /// Creates a new `IndexedReader` from a given path.
|
|
|
242
|
+ ///
|
|
|
243
|
+ /// # Example
|
|
|
244
|
+ /// ```
|
|
|
245
|
+ /// use osmpbf::*;
|
|
|
246
|
+ ///
|
|
|
247
|
+ /// # fn foo() -> Result<()> {
|
|
|
248
|
+ /// let reader = IndexedReader::from_path("tests/test.osm.pbf")?;
|
|
|
249
|
+ ///
|
|
|
250
|
+ /// # Ok(())
|
|
|
251
|
+ /// # }
|
|
|
252
|
+ /// # foo().unwrap();
|
|
|
253
|
+ /// ```
|
|
|
254
|
+ pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self> {
|
|
|
255
|
+ //TODO take some more measurements to determine if `BufReader` should be used here
|
|
|
256
|
+ let f = File::open(path)?;
|
|
|
257
|
+ Self::new(f)
|
|
|
258
|
+ }
|
|
|
259
|
+}
|
|
|
260
|
+
|
|
|
261
|
+#[cfg(test)]
|
|
|
262
|
+mod tests {
|
|
|
263
|
+ use super::*;
|
|
|
264
|
+
|
|
|
265
|
+ #[test]
|
|
|
266
|
+ fn test_range_included_set() {
|
|
|
267
|
+ let mut set = BTreeSet::<i64>::new();
|
|
|
268
|
+ set.extend(&[1,2,6]);
|
|
|
269
|
+
|
|
|
270
|
+ assert_eq!(range_included(RangeInclusive::new(0, 0), &set), false);
|
|
|
271
|
+ assert_eq!(range_included(RangeInclusive::new(1, 1), &set), true);
|
|
|
272
|
+ assert_eq!(range_included(RangeInclusive::new(2, 2), &set), true);
|
|
|
273
|
+ assert_eq!(range_included(RangeInclusive::new(3, 3), &set), false);
|
|
|
274
|
+ assert_eq!(range_included(RangeInclusive::new(3, 5), &set), false);
|
|
|
275
|
+ assert_eq!(range_included(RangeInclusive::new(3, 6), &set), true);
|
|
|
276
|
+ assert_eq!(range_included(RangeInclusive::new(6, 6), &set), true);
|
|
|
277
|
+ assert_eq!(range_included(RangeInclusive::new(7, 7), &set), false);
|
|
|
278
|
+ assert_eq!(range_included(RangeInclusive::new(0, 1), &set), true);
|
|
|
279
|
+ assert_eq!(range_included(RangeInclusive::new(6, 7), &set), true);
|
|
|
280
|
+ assert_eq!(range_included(RangeInclusive::new(2, 3), &set), true);
|
|
|
281
|
+ assert_eq!(range_included(RangeInclusive::new(5, 6), &set), true);
|
|
|
282
|
+ assert_eq!(range_included(RangeInclusive::new(5, 8), &set), true);
|
|
|
283
|
+ assert_eq!(range_included(RangeInclusive::new(0, 8), &set), true);
|
|
|
284
|
+ assert_eq!(range_included(RangeInclusive::new(0, 4), &set), true);
|
|
|
285
|
+ }
|
|
|
286
|
+}
|