Преглед на файлове

Track blob offset, add BlobReader::seek* methods

Johannes Hofmann преди 7 години
родител
ревизия
31e0488700
променени са 1 файла, в които са добавени 118 реда и са изтрити 8 реда
  1. 118
    8
      src/blob.rs

+ 118
- 8
src/blob.rs Целия файл

@@ -8,7 +8,7 @@ use byteorder::ReadBytesExt;
8 8
 use error::{BlobError, Result, new_blob_error, new_protobuf_error};
9 9
 use proto::fileformat;
10 10
 use std::fs::File;
11
-use std::io::{BufReader, Read};
11
+use std::io::{BufReader, Read, Seek, SeekFrom};
12 12
 use std::path::Path;
13 13
 use util::{parse_message_from_bytes, parse_message_from_reader};
14 14
 
@@ -51,6 +51,10 @@ pub enum BlobDecode<'a> {
51 51
     Unknown(&'a str),
52 52
 }
53 53
 
54
+/// The offset of a blob in bytes from stream start.
55
+#[derive(Clone, Copy, Debug, Eq, PartialEq)]
56
+pub struct ByteOffset(pub u64);
57
+
54 58
 /// A blob.
55 59
 ///
56 60
 /// A PBF file consists of a sequence of blobs. This type supports decoding the content of a blob
@@ -59,13 +63,19 @@ pub enum BlobDecode<'a> {
59 63
 pub struct Blob {
60 64
     header: fileformat::BlobHeader,
61 65
     blob: fileformat::Blob,
66
+    offset: Option<ByteOffset>,
62 67
 }
63 68
 
64 69
 impl Blob {
65
-    fn new(header: fileformat::BlobHeader, blob: fileformat::Blob) -> Blob {
70
+    fn new(
71
+        header: fileformat::BlobHeader,
72
+        blob: fileformat::Blob,
73
+        offset: Option<ByteOffset>,
74
+    ) -> Blob {
66 75
         Blob {
67 76
             header,
68
-            blob
77
+            blob,
78
+            offset,
69 79
         }
70 80
     }
71 81
 
@@ -95,6 +105,12 @@ impl Blob {
95 105
         }
96 106
     }
97 107
 
108
+    /// Returns the byte offset of the blob from the start of its source stream.
109
+    /// This might be `None` if the source stream does not implement `Seek`.
110
+    pub fn offset(&self) -> Option<ByteOffset> {
111
+        self.offset
112
+    }
113
+
98 114
     /// Tries to decode the blob to a `HeaderBlock`. This operation might involve an expensive
99 115
     /// decompression step.
100 116
     pub fn to_headerblock(&self) -> Result<HeaderBlock> {
@@ -114,11 +130,13 @@ impl Blob {
114 130
 #[derive(Clone, Debug)]
115 131
 pub struct BlobReader<R: Read> {
116 132
     reader: R,
133
+    /// Current reader offset in bytes from the start of the stream.
134
+    offset: Option<ByteOffset>,
117 135
     last_blob_ok: bool,
118 136
 }
119 137
 
120 138
 impl<R: Read> BlobReader<R> {
121
-    /// Creates a new `ElementReader`.
139
+    /// Creates a new `BlobReader`.
122 140
     ///
123 141
     /// # Example
124 142
     /// ```
@@ -128,7 +146,7 @@ impl<R: Read> BlobReader<R> {
128 146
     /// let f = std::fs::File::open("tests/test.osm.pbf")?;
129 147
     /// let buf_reader = std::io::BufReader::new(f);
130 148
     ///
131
-    /// let reader = ElementReader::new(buf_reader);
149
+    /// let reader = BlobReader::new(buf_reader);
132 150
     ///
133 151
     /// # Ok(())
134 152
     /// # }
@@ -136,6 +154,7 @@ impl<R: Read> BlobReader<R> {
136 154
     pub fn new(reader: R) -> BlobReader<R> {
137 155
         BlobReader {
138 156
             reader,
157
+            offset: None,
139 158
             last_blob_ok: true,
140 159
         }
141 160
     }
@@ -161,7 +180,11 @@ impl BlobReader<BufReader<File>> {
161 180
         let f = File::open(path)?;
162 181
         let reader = BufReader::new(f);
163 182
 
164
-        Ok(BlobReader::new(reader))
183
+        Ok(BlobReader {
184
+            reader,
185
+            offset: Some(ByteOffset(0)),
186
+            last_blob_ok: true,
187
+        })
165 188
     }
166 189
 }
167 190
 
@@ -174,9 +197,15 @@ impl<R: Read> Iterator for BlobReader<R> {
174 197
             return None;
175 198
         }
176 199
 
200
+        let prev_offset = self.offset;
201
+
177 202
         let header_size: u64 = match self.reader.read_u32::<byteorder::BigEndian>() {
178
-            Ok(n) => u64::from(n),
203
+            Ok(n) => {
204
+                self.offset = self.offset.map(|x| ByteOffset(x.0 + 4));
205
+                u64::from(n)
206
+            },
179 207
             Err(e) => {
208
+                self.offset = None;
180 209
                 match e.kind() {
181 210
                     ::std::io::ErrorKind::UnexpectedEof => {
182 211
                         //TODO This also accepts corrupted files in the case of 1-3 available bytes
@@ -198,6 +227,7 @@ impl<R: Read> Iterator for BlobReader<R> {
198 227
         let header: fileformat::BlobHeader = match parse_message_from_reader(&mut self.reader.by_ref().take(header_size)) {
199 228
             Ok(header) => header,
200 229
             Err(e) => {
230
+                self.offset = None;
201 231
                 self.last_blob_ok = false;
202 232
                 return Some(Err(new_protobuf_error(e, "blob header")));
203 233
             },
@@ -206,12 +236,92 @@ impl<R: Read> Iterator for BlobReader<R> {
206 236
         let blob: fileformat::Blob = match parse_message_from_reader(&mut self.reader.by_ref().take(header.get_datasize() as u64)) {
207 237
             Ok(blob) => blob,
208 238
             Err(e) => {
239
+                self.offset = None;
209 240
                 self.last_blob_ok = false;
210 241
                 return Some(Err(new_protobuf_error(e, "blob content")));
211 242
             },
212 243
         };
213 244
 
214
-        Some(Ok(Blob::new(header, blob)))
245
+        self.offset = self.offset.map(|x| ByteOffset(
246
+            x.0 + header_size + header.get_datasize() as u64
247
+        ));
248
+
249
+        Some(Ok(Blob::new(header, blob, prev_offset)))
250
+    }
251
+}
252
+
253
+impl<R: Read + Seek> BlobReader<R> {
254
+    /// Creates a new `BlobReader` from the given reader that is seekable and will be initialized
255
+    /// with a valid offset.
256
+    ///
257
+    /// # Example
258
+    /// ```
259
+    /// use osmpbf::*;
260
+    ///
261
+    /// # fn foo() -> Result<()> {
262
+    /// let f = std::fs::File::open("tests/test.osm.pbf")?;
263
+    /// let buf_reader = std::io::BufReader::new(f);
264
+    ///
265
+    /// let mut reader = BlobReader::new_seekable(buf_reader)?;
266
+    /// let first_blob = reader.next().unwrap()?;
267
+    ///
268
+    /// assert_eq!(first_blob.offset(), Some(ByteOffset(1)));
269
+    /// # Ok(())
270
+    /// # }
271
+    /// ```
272
+    pub fn new_seekable(mut reader: R) -> Result<BlobReader<R>> {
273
+        let pos = reader.seek(SeekFrom::Current(0))?;
274
+
275
+        Ok(BlobReader {
276
+            reader,
277
+            offset: Some(ByteOffset(pos)),
278
+            last_blob_ok: true,
279
+        })
280
+    }
281
+
282
+    /// Seek to an offset in bytes from the start of the stream.
283
+    ///
284
+    /// # Example
285
+    /// ```
286
+    /// use osmpbf::*;
287
+    ///
288
+    /// # fn foo() -> Result<()> {
289
+    /// let mut reader = BlobReader::from_path("tests/test.osm.pbf")?;
290
+    /// let first_blob = reader.next().unwrap()?;
291
+    /// let second_blob = reader.next().unwrap()?;
292
+    ///
293
+    /// reader.seek(first_blob.offset().unwrap())?;
294
+    ///
295
+    /// let first_blob_again = reader.next().unwrap()?;
296
+    /// assert_eq!(first_blob.offset(), first_blob_again.offset());
297
+    /// # Ok(())
298
+    /// # }
299
+    /// ```
300
+    pub fn seek(&mut self, pos: ByteOffset) -> Result<()> {
301
+        match self.reader.seek(SeekFrom::Start(pos.0)) {
302
+            Ok(offset) => {
303
+                self.offset = Some(ByteOffset(offset));
304
+                Ok(())
305
+            },
306
+            Err(e) => {
307
+                self.offset = None;
308
+                Err(e.into())
309
+            },
310
+        }
311
+    }
312
+
313
+    /// Seek to an offset in bytes. (See `std::io::Seek`)
314
+    pub fn seek_raw(&mut self, pos: SeekFrom) -> Result<u64> {
315
+        match self.reader.seek(pos) {
316
+            Ok(offset) => {
317
+                self.offset = Some(ByteOffset(offset));
318
+                Ok(offset)
319
+            },
320
+            Err(e) => {
321
+                self.offset = None;
322
+                Err(e.into())
323
+            },
324
+        }
215 325
     }
216 326
 }
217 327