A Rust library for reading the OpenStreetMap PBF file format (*.osm.pbf).

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205
  1. //! Iterate over blobs from a memory map
  2. extern crate protobuf;
  3. extern crate byteorder;
  4. extern crate memmap;
  5. use blob::{BlobDecode, BlobType, decode_blob};
  6. use block::{HeaderBlock, PrimitiveBlock};
  7. use byteorder::ByteOrder;
  8. use error::{BlobError, Result, new_blob_error, new_protobuf_error};
  9. use proto::{fileformat, osmformat};
  10. use self::fileformat::BlobHeader;
  11. use std::fs::File;
  12. use std::path::Path;
  13. use util::parse_message_from_bytes;
  14. /// A read-only memory map.
  15. #[derive(Debug)]
  16. pub struct Mmap {
  17. mmap: memmap::Mmap,
  18. }
  19. impl Mmap {
  20. /// Creates a memory map from a given file.
  21. ///
  22. /// # Safety
  23. /// The underlying file should not be modified while holding the memory map.
  24. /// See [memmap-rs issue 25](https://github.com/danburkert/memmap-rs/issues/25) for more
  25. /// information on the safety of memory maps.
  26. ///
  27. /// # Example
  28. /// ```
  29. /// use osmpbf::*;
  30. ///
  31. /// # fn foo() -> Result<()> {
  32. /// let f = std::fs::File::open("tests/test.osm.pbf")?;
  33. /// let mmap = unsafe { Mmap::from_file(&f)? };
  34. /// # Ok(())
  35. /// # }
  36. /// ```
  37. pub unsafe fn from_file(file: &File) -> Result<Mmap> {
  38. memmap::Mmap::map(file)
  39. .map(|m| Mmap { mmap: m })
  40. .map_err(|e| e.into())
  41. }
  42. /// Creates a memory map from a given path.
  43. ///
  44. /// # Safety
  45. /// The underlying file should not be modified while holding the memory map.
  46. /// See [memmap-rs issue 25](https://github.com/danburkert/memmap-rs/issues/25) for more
  47. /// information on the safety of memory maps.
  48. ///
  49. /// # Example
  50. /// ```
  51. /// use osmpbf::*;
  52. ///
  53. /// # fn foo() -> Result<()> {
  54. /// let mmap = unsafe { Mmap::from_path("tests/test.osm.pbf")? };
  55. /// # Ok(())
  56. /// # }
  57. /// ```
  58. pub unsafe fn from_path<P: AsRef<Path>>(path: P) -> Result<Mmap> {
  59. let file = File::open(&path)?;
  60. memmap::Mmap::map(&file)
  61. .map(|m| Mmap { mmap: m })
  62. .map_err(|e| e.into())
  63. }
  64. /// Returns an iterator over the blobs in this memory map.
  65. pub fn blob_iter(&self) -> MmapBlobReader {
  66. MmapBlobReader::new(self)
  67. }
  68. fn as_slice(&self) -> &[u8] {
  69. &self.mmap
  70. }
  71. }
  72. /// A PBF blob from a memory map.
  73. #[derive(Clone, Debug)]
  74. pub struct MmapBlob<'a> {
  75. header: BlobHeader,
  76. data: &'a [u8],
  77. }
  78. impl<'a> MmapBlob<'a> {
  79. /// Decodes the Blob and tries to obtain the inner content (usually a `HeaderBlock` or a
  80. /// `PrimitiveBlock`). This operation might involve an expensive decompression step.
  81. pub fn decode(&'a self) -> Result<BlobDecode<'a>> {
  82. let blob: fileformat::Blob = parse_message_from_bytes(self.data)
  83. .map_err(|e| new_protobuf_error(e, "blob content"))?;
  84. match self.header.get_field_type() {
  85. "OSMHeader" => {
  86. let block = Box::new(HeaderBlock::new(decode_blob(&blob)?));
  87. Ok(BlobDecode::OsmHeader(block))
  88. }
  89. "OSMData" => {
  90. let block: osmformat::PrimitiveBlock = decode_blob(&blob)?;
  91. Ok(BlobDecode::OsmData(PrimitiveBlock::new(block)))
  92. }
  93. x => Ok(BlobDecode::Unknown(x)),
  94. }
  95. }
  96. /// Returns the type of a blob without decoding its content.
  97. pub fn get_type(&self) -> BlobType {
  98. match self.header.get_field_type() {
  99. "OSMHeader" => BlobType::OsmHeader,
  100. "OSMData" => BlobType::OsmData,
  101. x => BlobType::Unknown(x),
  102. }
  103. }
  104. }
  105. /// A reader for memory mapped PBF files that allows iterating over `MmapBlob`s.
  106. #[derive(Clone, Debug)]
  107. pub struct MmapBlobReader<'a> {
  108. mmap: &'a Mmap,
  109. offset: usize,
  110. last_blob_ok: bool,
  111. }
  112. impl<'a> MmapBlobReader<'a> {
  113. /// Creates a new `MmapBlobReader`.
  114. ///
  115. /// # Example
  116. /// ```
  117. /// use osmpbf::*;
  118. ///
  119. /// # fn foo() -> Result<()> {
  120. ///
  121. /// let mmap = unsafe { Mmap::from_path("tests/test.osm.pbf")? };
  122. /// let reader = MmapBlobReader::new(&mmap);
  123. ///
  124. /// # Ok(())
  125. /// # }
  126. /// ```
  127. pub fn new(mmap: &Mmap) -> MmapBlobReader {
  128. MmapBlobReader {
  129. mmap,
  130. offset: 0,
  131. last_blob_ok: true,
  132. }
  133. }
  134. }
  135. impl<'a> Iterator for MmapBlobReader<'a> {
  136. type Item = Result<MmapBlob<'a>>;
  137. fn next(&mut self) -> Option<Self::Item> {
  138. let slice = &self.mmap.as_slice()[self.offset..];
  139. match slice.len() {
  140. 0 => return None,
  141. 1 ... 3 => {
  142. self.last_blob_ok = false;
  143. return Some(Err(new_blob_error(BlobError::InvalidHeaderSize)));
  144. },
  145. _ => {},
  146. }
  147. let header_size = byteorder::BigEndian::read_u32(slice) as usize;
  148. if header_size as u64 >= ::blob::MAX_BLOB_HEADER_SIZE {
  149. self.last_blob_ok = false;
  150. return Some(Err(new_blob_error(BlobError::HeaderTooBig{size: header_size as u64})));
  151. }
  152. if slice.len() < 4 + header_size {
  153. self.last_blob_ok = false;
  154. let io_error = ::std::io::Error::new(
  155. ::std::io::ErrorKind::UnexpectedEof, "content too short for header"
  156. );
  157. return Some(Err(io_error.into()));
  158. }
  159. let header: BlobHeader = match parse_message_from_bytes(&slice[4..(4 + header_size)]) {
  160. Ok(x) => x,
  161. Err(e) => {
  162. self.last_blob_ok = false;
  163. return Some(Err(new_protobuf_error(e, "blob header")));
  164. },
  165. };
  166. let data_size = header.get_datasize() as usize;
  167. let chunk_size = 4 + header_size + data_size;
  168. if slice.len() < chunk_size {
  169. self.last_blob_ok = false;
  170. let io_error = ::std::io::Error::new(
  171. ::std::io::ErrorKind::UnexpectedEof, "content too short for block data"
  172. );
  173. return Some(Err(io_error.into()));
  174. }
  175. self.offset += chunk_size;
  176. Some(Ok(MmapBlob {
  177. header,
  178. data: &slice[(4 + header_size)..chunk_size]
  179. }))
  180. }
  181. }