A Rust library for reading the OpenStreetMap PBF file format (*.osm.pbf).

blob.rs 7.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252
  1. //! Read and decode blobs
  2. extern crate protobuf;
  3. extern crate byteorder;
  4. use block::{HeaderBlock, PrimitiveBlock};
  5. use byteorder::ReadBytesExt;
  6. use errors::*;
  7. use proto::fileformat;
  8. use std::fs::File;
  9. use std::io::{BufReader, Read};
  10. use std::path::Path;
  11. use util::{parse_message_from_bytes, parse_message_from_reader};
  12. #[cfg(feature = "system-libz")]
  13. use flate2::read::ZlibDecoder;
  14. #[cfg(not(feature = "system-libz"))]
  15. use inflate::DeflateDecoder;
  16. /// Maximum allowed `BlobHeader` size in bytes.
  17. pub static MAX_BLOB_HEADER_SIZE: u64 = 64 * 1024;
  18. /// Maximum allowed uncompressed `Blob` content size in bytes.
  19. pub static MAX_BLOB_MESSAGE_SIZE: u64 = 32 * 1024 * 1024;
  20. /// The content type of a blob.
  21. #[derive(Debug, Eq, PartialEq)]
  22. pub enum BlobType<'a> {
  23. /// Blob contains a `HeaderBlock`.
  24. OsmHeader,
  25. /// Blob contains a `PrimitiveBlock`.
  26. OsmData,
  27. /// An unknown blob type with the given string identifier.
  28. /// Parsers should ignore unknown blobs they do not expect.
  29. Unknown(&'a str),
  30. }
  31. //TODO rename variants to fit proto files
  32. /// The decoded content of a blob (analogous to `BlobType`).
  33. pub enum BlobDecode<'a> {
  34. /// Blob contains a `HeaderBlock`.
  35. OsmHeader(Box<HeaderBlock>),
  36. /// Blob contains a `PrimitiveBlock`.
  37. OsmData(PrimitiveBlock),
  38. /// An unknown blob type with the given string identifier.
  39. /// Parsers should ignore unknown blobs they do not expect.
  40. Unknown(&'a str),
  41. }
  42. /// A blob.
  43. ///
  44. /// A PBF file consists of a sequence of blobs. This type supports decoding the content of a blob
  45. /// to different types of blocks that are usually more interesting to the user.
  46. pub struct Blob {
  47. header: fileformat::BlobHeader,
  48. blob: fileformat::Blob,
  49. }
  50. impl Blob {
  51. fn new(header: fileformat::BlobHeader, blob: fileformat::Blob) -> Blob {
  52. Blob {
  53. header: header,
  54. blob: blob
  55. }
  56. }
  57. /// Decodes the Blob and tries to obtain the inner content (usually a `HeaderBlock` or a
  58. /// `PrimitiveBlock`). This operation might involve an expensive decompression step.
  59. pub fn decode(&self) -> Result<BlobDecode> {
  60. match self.get_type() {
  61. BlobType::OsmHeader => {
  62. let block = Box::new(self.to_headerblock()?);
  63. Ok(BlobDecode::OsmHeader(block))
  64. },
  65. BlobType::OsmData => {
  66. let block = self.to_primitiveblock()?;
  67. Ok(BlobDecode::OsmData(block))
  68. },
  69. BlobType::Unknown(x) => Ok(BlobDecode::Unknown(x)),
  70. }
  71. }
  72. /// Returns the type of a blob without decoding its content.
  73. pub fn get_type(&self) -> BlobType {
  74. match self.header.get_field_type() {
  75. "OSMHeader" => BlobType::OsmHeader,
  76. "OSMData" => BlobType::OsmData,
  77. x => BlobType::Unknown(x),
  78. }
  79. }
  80. /// Tries to decode the blob to a `HeaderBlock`. This operation might involve an expensive
  81. /// decompression step.
  82. pub fn to_headerblock(&self) -> Result<HeaderBlock> {
  83. decode_blob(&self.blob)
  84. .map(HeaderBlock::new)
  85. .chain_err(|| "failed to decode blob to header block")
  86. }
  87. /// Tries to decode the blob to a `PrimitiveBlock`. This operation might involve an expensive
  88. /// decompression step.
  89. pub fn to_primitiveblock(&self) -> Result<PrimitiveBlock> {
  90. decode_blob(&self.blob)
  91. .map(PrimitiveBlock::new)
  92. .chain_err(|| "failed to decode blob to primitive block")
  93. }
  94. }
  95. /// A reader for PBF files that allows iterating over `Blob`s.
  96. pub struct BlobReader<R: Read> {
  97. reader: R,
  98. last_blob_ok: bool,
  99. }
  100. impl<R: Read> BlobReader<R> {
  101. /// Creates a new `ElementReader`.
  102. ///
  103. /// # Example
  104. /// ```
  105. /// use osmpbf::*;
  106. ///
  107. /// # fn foo() -> Result<()> {
  108. /// let f = std::fs::File::open("tests/test.osm.pbf")?;
  109. /// let buf_reader = std::io::BufReader::new(f);
  110. ///
  111. /// let reader = ElementReader::new(buf_reader);
  112. ///
  113. /// # Ok(())
  114. /// # }
  115. /// ```
  116. pub fn new(reader: R) -> BlobReader<R> {
  117. BlobReader {
  118. reader: reader,
  119. last_blob_ok: true,
  120. }
  121. }
  122. }
  123. impl BlobReader<BufReader<File>> {
  124. /// Tries to open the file at the given path and constructs a `BlobReader` from this.
  125. ///
  126. /// # Errors
  127. /// Returns the same errors that `std::fs::File::open` returns.
  128. ///
  129. /// # Example
  130. /// ```
  131. /// use osmpbf::*;
  132. ///
  133. /// # fn foo() -> Result<()> {
  134. /// let reader = BlobReader::from_path("tests/test.osm.pbf")?;
  135. /// # Ok(())
  136. /// # }
  137. /// ```
  138. pub fn from_path<P: AsRef<Path>>(path: P) -> Result<Self>
  139. {
  140. let f = File::open(path)?;
  141. let reader = BufReader::new(f);
  142. Ok(BlobReader::new(reader))
  143. }
  144. }
  145. impl<R: Read> Iterator for BlobReader<R> {
  146. type Item = Result<Blob>;
  147. fn next(&mut self) -> Option<Self::Item> {
  148. // Stop iteration if there was an error.
  149. if !self.last_blob_ok {
  150. return None;
  151. }
  152. let header_size: u64 = match self.reader.read_u32::<byteorder::BigEndian>() {
  153. Ok(n) => u64::from(n),
  154. Err(e) => {
  155. match e.kind() {
  156. ::std::io::ErrorKind::UnexpectedEof => {
  157. return None
  158. },
  159. _ => {
  160. self.last_blob_ok = false;
  161. return Some(Err(Error::with_chain(e, "Could not decode blob header size")));
  162. },
  163. }
  164. },
  165. };
  166. if header_size >= MAX_BLOB_HEADER_SIZE {
  167. self.last_blob_ok = false;
  168. return Some(Err(ErrorKind::BlobHeaderTooBig(header_size).into()));
  169. }
  170. let header: fileformat::BlobHeader = match parse_message_from_reader(&mut self.reader.by_ref().take(header_size)) {
  171. Ok(header) => header,
  172. Err(e) => {
  173. self.last_blob_ok = false;
  174. return Some(Err(Error::with_chain(e, "Could not decode BlobHeader")));
  175. },
  176. };
  177. let blob: fileformat::Blob = match parse_message_from_reader(&mut self.reader.by_ref().take(header.get_datasize() as u64)) {
  178. Ok(blob) => blob,
  179. Err(e) => {
  180. self.last_blob_ok = false;
  181. return Some(Err(Error::with_chain(e, "Could not decode Blob")));
  182. },
  183. };
  184. Some(Ok(Blob::new(header, blob)))
  185. }
  186. }
  187. #[cfg(feature = "system-libz")]
  188. pub(crate) fn decode_blob<T>(blob: &fileformat::Blob) -> Result<T>
  189. where T: protobuf::Message + protobuf::MessageStatic {
  190. if blob.has_raw() {
  191. let size = blob.get_raw().len() as u64;
  192. if size < MAX_BLOB_MESSAGE_SIZE {
  193. parse_message_from_bytes(blob.get_raw()).chain_err(|| "Could not parse raw data")
  194. } else {
  195. Err(ErrorKind::BlobMessageTooBig(size).into())
  196. }
  197. } else if blob.has_zlib_data() {
  198. let mut decoder = ZlibDecoder::new(blob.get_zlib_data())
  199. .take(MAX_BLOB_MESSAGE_SIZE);
  200. parse_message_from_reader(&mut decoder).chain_err(|| "Could not parse zlib data")
  201. } else {
  202. bail!("Blob is missing fields 'raw' and 'zlib_data")
  203. }
  204. }
  205. #[cfg(not(feature = "system-libz"))]
  206. pub(crate) fn decode_blob<T>(blob: &fileformat::Blob) -> Result<T>
  207. where T: protobuf::Message + protobuf::MessageStatic {
  208. if blob.has_raw() {
  209. let size = blob.get_raw().len() as u64;
  210. if size < MAX_BLOB_MESSAGE_SIZE {
  211. parse_message_from_bytes(blob.get_raw()).chain_err(|| "Could not parse raw data")
  212. } else {
  213. Err(ErrorKind::BlobMessageTooBig(size).into())
  214. }
  215. } else if blob.has_zlib_data() {
  216. let mut decoder = DeflateDecoder::from_zlib(blob.get_zlib_data())
  217. .take(MAX_BLOB_MESSAGE_SIZE);
  218. parse_message_from_reader(&mut decoder).chain_err(|| "Could not parse zlib data")
  219. } else {
  220. bail!("Blob is missing fields 'raw' and 'zlib_data")
  221. }
  222. }