A Rust library for reading the OpenStreetMap PBF file format (*.osm.pbf).

blob.rs 6.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226
  1. //! Read and decode blobs
  2. extern crate protobuf;
  3. extern crate byteorder;
  4. use block::{HeaderBlock, PrimitiveBlock};
  5. use byteorder::ReadBytesExt;
  6. use errors::*;
  7. use proto::fileformat;
  8. use std::fs::File;
  9. use std::io::{BufReader, ErrorKind, Read};
  10. use std::path::Path;
  11. #[cfg(feature = "system-libz")]
  12. use flate2::read::ZlibDecoder;
  13. #[cfg(not(feature = "system-libz"))]
  14. use inflate::DeflateDecoder;
  15. /// The content type of a blob.
  16. #[derive(Debug, Eq, PartialEq)]
  17. pub enum BlobType<'a> {
  18. /// Blob contains a `HeaderBlock`.
  19. OsmHeader,
  20. /// Blob contains a `PrimitiveBlock`.
  21. OsmData,
  22. /// An unknown blob type with the given string identifier.
  23. /// Parsers should ignore unknown blobs they do not expect.
  24. Unknown(&'a str),
  25. }
  26. //TODO rename variants to fit proto files
  27. /// The decoded content of a blob (analogous to `BlobType`).
  28. pub enum BlobDecode<'a> {
  29. /// Blob contains a `HeaderBlock`.
  30. OsmHeader(HeaderBlock),
  31. /// Blob contains a `PrimitiveBlock`.
  32. OsmData(PrimitiveBlock),
  33. /// An unknown blob type with the given string identifier.
  34. /// Parsers should ignore unknown blobs they do not expect.
  35. Unknown(&'a str),
  36. }
  37. /// A blob.
  38. ///
  39. /// A PBF file consists of a sequence of blobs. This type supports decoding the content of a blob
  40. /// to different types of blocks that are usually more interesting to the user.
  41. pub struct Blob {
  42. header: fileformat::BlobHeader,
  43. blob: fileformat::Blob,
  44. }
  45. impl Blob {
  46. fn new(header: fileformat::BlobHeader, blob: fileformat::Blob) -> Blob {
  47. Blob {
  48. header: header,
  49. blob: blob
  50. }
  51. }
  52. /// Decodes the Blob and tries to obtain the inner content (usually a `HeaderBlock` or a
  53. /// `PrimitiveBlock`). This operation might involve an expensive decompression step.
  54. pub fn decode(&self) -> Result<BlobDecode> {
  55. match self.get_type() {
  56. BlobType::OsmHeader => {
  57. self.to_headerblock()
  58. .map(BlobDecode::OsmHeader)
  59. },
  60. BlobType::OsmData => {
  61. self.to_primitiveblock()
  62. .map(BlobDecode::OsmData)
  63. },
  64. BlobType::Unknown(x) => Ok(BlobDecode::Unknown(x)),
  65. }
  66. }
  67. /// Returns the type of a blob without decoding its content.
  68. pub fn get_type(&self) -> BlobType {
  69. match self.header.get_field_type() {
  70. "OSMHeader" => BlobType::OsmHeader,
  71. "OSMData" => BlobType::OsmData,
  72. x => BlobType::Unknown(x),
  73. }
  74. }
  75. /// Tries to decode the blob to a `HeaderBlock`. This operation might involve an expensive
  76. /// decompression step.
  77. pub fn to_headerblock(&self) -> Result<HeaderBlock> {
  78. decode_blob(&self.blob)
  79. .map(HeaderBlock::new)
  80. .chain_err(|| "failed to decode blob to header block")
  81. }
  82. /// Tries to decode the blob to a `PrimitiveBlock`. This operation might involve an expensive
  83. /// decompression step.
  84. pub fn to_primitiveblock(&self) -> Result<PrimitiveBlock> {
  85. decode_blob(&self.blob)
  86. .map(PrimitiveBlock::new)
  87. .chain_err(|| "failed to decode blob to primitive block")
  88. }
  89. }
  90. /// A reader for PBF files that allows iterating over `Blob`s.
  91. pub struct BlobReader<R: Read> {
  92. reader: R,
  93. last_blob_ok: bool,
  94. }
  95. impl<R: Read> BlobReader<R> {
  96. /// Creates a new `ElementReader`.
  97. ///
  98. /// # Example
  99. /// ```
  100. /// use osmpbf::*;
  101. ///
  102. /// # fn foo() -> Result<()> {
  103. /// let f = std::fs::File::open("tests/test.osm.pbf")?;
  104. /// let buf_reader = std::io::BufReader::new(f);
  105. ///
  106. /// let reader = ElementReader::new(buf_reader);
  107. ///
  108. /// # Ok(())
  109. /// # }
  110. /// ```
  111. pub fn new(reader: R) -> BlobReader<R> {
  112. BlobReader {
  113. reader: reader,
  114. last_blob_ok: true,
  115. }
  116. }
  117. }
  118. impl BlobReader<BufReader<File>> {
  119. /// Tries to open the file at the given path and constructs a `BlobReader` from this.
  120. ///
  121. /// # Errors
  122. /// Returns the same errors that `std::fs::File::open` returns.
  123. ///
  124. /// # Example
  125. /// ```
  126. /// use osmpbf::*;
  127. ///
  128. /// # fn foo() -> Result<()> {
  129. /// let reader = BlobReader::from_path("tests/test.osm.pbf")?;
  130. /// # Ok(())
  131. /// # }
  132. /// ```
  133. pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self>
  134. {
  135. let f = File::open(path)?;
  136. let reader = BufReader::new(f);
  137. Ok(BlobReader::new(reader))
  138. }
  139. }
  140. impl<R: Read> Iterator for BlobReader<R> {
  141. type Item = Result<Blob>;
  142. fn next(&mut self) -> Option<Self::Item> {
  143. // Stop iteration if there was an error.
  144. if !self.last_blob_ok {
  145. return None;
  146. }
  147. let size: u64 = match self.reader.read_u32::<byteorder::BigEndian>() {
  148. Ok(n) => u64::from(n),
  149. Err(e) => {
  150. match e.kind() {
  151. ErrorKind::UnexpectedEof => {
  152. return None
  153. },
  154. _ => {
  155. self.last_blob_ok = false;
  156. return Some(Err(Error::with_chain(e, "Could not decode blob size")));
  157. },
  158. }
  159. },
  160. };
  161. let header: fileformat::BlobHeader = match protobuf::parse_from_reader(&mut self.reader.by_ref().take(size)) {
  162. Ok(header) => header,
  163. Err(e) => {
  164. self.last_blob_ok = false;
  165. return Some(Err(Error::with_chain(e, "Could not decode BlobHeader")));
  166. },
  167. };
  168. let blob: fileformat::Blob = match protobuf::parse_from_reader(&mut self.reader.by_ref().take(header.get_datasize() as u64)) {
  169. Ok(blob) => blob,
  170. Err(e) => {
  171. self.last_blob_ok = false;
  172. return Some(Err(Error::with_chain(e, "Could not decode Blob")));
  173. },
  174. };
  175. Some(Ok(Blob::new(header, blob)))
  176. }
  177. }
  178. #[cfg(feature = "system-libz")]
  179. pub(crate) fn decode_blob<T>(blob: &fileformat::Blob) -> Result<T>
  180. where T: protobuf::Message + protobuf::MessageStatic {
  181. if blob.has_raw() {
  182. protobuf::parse_from_bytes(blob.get_raw()).chain_err(|| "Could not parse raw data")
  183. } else if blob.has_zlib_data() {
  184. let mut decoder = ZlibDecoder::new(blob.get_zlib_data());
  185. protobuf::parse_from_reader(&mut decoder).chain_err(|| "Could not parse zlib data")
  186. } else {
  187. bail!("Blob is missing fields 'raw' and 'zlib_data")
  188. }
  189. }
  190. #[cfg(not(feature = "system-libz"))]
  191. pub(crate) fn decode_blob<T>(blob: &fileformat::Blob) -> Result<T>
  192. where T: protobuf::Message + protobuf::MessageStatic {
  193. if blob.has_raw() {
  194. protobuf::parse_from_bytes(blob.get_raw()).chain_err(|| "Could not parse raw data")
  195. } else if blob.has_zlib_data() {
  196. let mut decoder = DeflateDecoder::from_zlib(blob.get_zlib_data());
  197. protobuf::parse_from_reader(&mut decoder).chain_err(|| "Could not parse zlib data")
  198. } else {
  199. bail!("Blob is missing fields 'raw' and 'zlib_data")
  200. }
  201. }