A Rust library for reading the OpenStreetMap PBF file format (*.osm.pbf).

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199
  1. //! Iterate over blobs from a memory map
  2. extern crate protobuf;
  3. extern crate byteorder;
  4. extern crate memmap;
  5. use blob::{BlobDecode, BlobType, decode_blob};
  6. use block::{HeaderBlock, PrimitiveBlock};
  7. use byteorder::ByteOrder;
  8. use errors::*;
  9. use proto::{fileformat, osmformat};
  10. use self::fileformat::BlobHeader;
  11. use std::fs::File;
  12. use std::path::Path;
  13. use util::parse_message_from_bytes;
  14. /// A read-only memory map.
  15. pub struct Mmap {
  16. mmap: memmap::Mmap,
  17. }
  18. impl Mmap {
  19. /// Creates a memory map from a given file.
  20. ///
  21. /// # Safety
  22. /// The underlying file should not be modified while holding the memory map.
  23. /// See https://github.com/danburkert/memmap-rs/issues/25
  24. ///
  25. /// # Example
  26. /// ```
  27. /// use osmpbf::*;
  28. ///
  29. /// # fn foo() -> Result<()> {
  30. /// let f = std::fs::File::open("tests/test.osm.pbf")?;
  31. /// let mmap = unsafe { Mmap::from_file(&f)? };
  32. /// # Ok(())
  33. /// # }
  34. /// ```
  35. pub unsafe fn from_file(file: &File) -> Result<Mmap> {
  36. memmap::Mmap::map(file)
  37. .map(|m| Mmap { mmap: m })
  38. .chain_err(|| "Could not create memory map from file")
  39. }
  40. /// Creates a memory map from a given path.
  41. ///
  42. /// # Safety
  43. /// The underlying file should not be modified while holding the memory map.
  44. /// See https://github.com/danburkert/memmap-rs/issues/25
  45. ///
  46. /// # Example
  47. /// ```
  48. /// use osmpbf::*;
  49. ///
  50. /// # fn foo() -> Result<()> {
  51. /// let mmap = unsafe { Mmap::from_path("tests/test.osm.pbf")? };
  52. /// # Ok(())
  53. /// # }
  54. /// ```
  55. pub unsafe fn from_path<P: AsRef<Path>>(path: P) -> Result<Mmap> {
  56. let file = File::open(&path)?;
  57. memmap::Mmap::map(&file)
  58. .map(|m| Mmap { mmap: m })
  59. .chain_err(|| format!("Could not create memory map from path {}", path.as_ref().display()))
  60. }
  61. /// Returns an iterator over the blobs in this memory map.
  62. pub fn blob_iter(&self) -> MmapBlobReader {
  63. MmapBlobReader::new(self)
  64. }
  65. fn as_slice(&self) -> &[u8] {
  66. &self.mmap
  67. }
  68. }
  69. /// A PBF blob from a memory map.
  70. pub struct MmapBlob<'a> {
  71. header: BlobHeader,
  72. data: &'a [u8],
  73. }
  74. impl<'a> MmapBlob<'a> {
  75. /// Decodes the Blob and tries to obtain the inner content (usually a `HeaderBlock` or a
  76. /// `PrimitiveBlock`). This operation might involve an expensive decompression step.
  77. pub fn decode(&'a self) -> Result<BlobDecode<'a>> {
  78. let blob: fileformat::Blob = parse_message_from_bytes(self.data)
  79. .chain_err(|| "failed to parse Blob")?;
  80. match self.header.get_field_type() {
  81. "OSMHeader" => {
  82. let block: osmformat::HeaderBlock = decode_blob(&blob).unwrap();
  83. Ok(BlobDecode::OsmHeader(HeaderBlock::new(block)))
  84. }
  85. "OSMData" => {
  86. let block: osmformat::PrimitiveBlock = decode_blob(&blob).unwrap();
  87. Ok(BlobDecode::OsmData(PrimitiveBlock::new(block)))
  88. }
  89. x => Ok(BlobDecode::Unknown(x)),
  90. }
  91. }
  92. /// Returns the type of a blob without decoding its content.
  93. pub fn get_type(&self) -> BlobType {
  94. match self.header.get_field_type() {
  95. "OSMHeader" => BlobType::OsmHeader,
  96. "OSMData" => BlobType::OsmData,
  97. x => BlobType::Unknown(x),
  98. }
  99. }
  100. }
  101. /// A reader for memory mapped PBF files that allows iterating over `MmapBlob`s.
  102. #[derive(Clone)]
  103. pub struct MmapBlobReader<'a> {
  104. mmap: &'a Mmap,
  105. offset: usize,
  106. last_blob_ok: bool,
  107. }
  108. impl<'a> MmapBlobReader<'a> {
  109. /// Creates a new `MmapBlobReader`.
  110. ///
  111. /// # Example
  112. /// ```
  113. /// use osmpbf::*;
  114. ///
  115. /// # fn foo() -> Result<()> {
  116. ///
  117. /// let mmap = unsafe { Mmap::from_path("tests/test.osm.pbf")? };
  118. /// let reader = MmapBlobReader::new(&mmap);
  119. ///
  120. /// # Ok(())
  121. /// # }
  122. /// ```
  123. pub fn new(mmap: &Mmap) -> MmapBlobReader {
  124. MmapBlobReader {
  125. mmap: mmap,
  126. offset: 0,
  127. last_blob_ok: true,
  128. }
  129. }
  130. }
  131. impl<'a> Iterator for MmapBlobReader<'a> {
  132. type Item = Result<MmapBlob<'a>>;
  133. fn next(&mut self) -> Option<Self::Item> {
  134. let slice = &self.mmap.as_slice()[self.offset..];
  135. match slice.len() {
  136. 0 => return None,
  137. 1 ... 3 => {
  138. self.last_blob_ok = false;
  139. let io_error = ::std::io::Error::new(
  140. ::std::io::ErrorKind::UnexpectedEof, "failed to parse blob length"
  141. );
  142. return Some(Err(Error::from_kind(ErrorKind::Io(io_error))));
  143. },
  144. _ => {},
  145. }
  146. let header_size = byteorder::BigEndian::read_u32(slice) as usize;
  147. if slice.len() < 4 + header_size {
  148. self.last_blob_ok = false;
  149. let io_error = ::std::io::Error::new(
  150. ::std::io::ErrorKind::UnexpectedEof, "content too short for header"
  151. );
  152. return Some(Err(Error::from_kind(ErrorKind::Io(io_error))));
  153. }
  154. let header: BlobHeader = match parse_message_from_bytes(&slice[4..(4 + header_size)]) {
  155. Ok(x) => x,
  156. Err(e) => {
  157. self.last_blob_ok = false;
  158. return Some(Err(e.into()));
  159. },
  160. };
  161. let data_size = header.get_datasize() as usize;
  162. let chunk_size = 4 + header_size + data_size;
  163. if slice.len() < chunk_size {
  164. self.last_blob_ok = false;
  165. let io_error = ::std::io::Error::new(
  166. ::std::io::ErrorKind::UnexpectedEof, "content too short for block data"
  167. );
  168. return Some(Err(Error::from_kind(ErrorKind::Io(io_error))));
  169. }
  170. self.offset += chunk_size;
  171. Some(Ok(MmapBlob {
  172. header: header,
  173. data: &slice[(4 + header_size)..chunk_size]
  174. }))
  175. }
  176. }