A Rust library for reading the OpenStreetMap PBF file format (*.osm.pbf).

mmap_blob.rs 5.8KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198
  1. //! Iterate over blobs from a memory map
  2. extern crate protobuf;
  3. extern crate byteorder;
  4. extern crate memmap;
  5. use blob::{BlobDecode, BlobType, decode_blob};
  6. use byteorder::ByteOrder;
  7. use errors::*;
  8. use block::{HeaderBlock, PrimitiveBlock};
  9. use proto::{fileformat, osmformat};
  10. use self::fileformat::BlobHeader;
  11. use std::fs::File;
  12. use std::path::Path;
  13. /// A read-only memory map.
  14. pub struct Mmap {
  15. mmap: memmap::Mmap,
  16. }
  17. impl Mmap {
  18. /// Creates a memory map from a given file.
  19. ///
  20. /// # Safety
  21. /// The underlying file should not be modified while holding the memory map.
  22. /// See https://github.com/danburkert/memmap-rs/issues/25
  23. ///
  24. /// # Example
  25. /// ```
  26. /// use osmpbf::*;
  27. ///
  28. /// # fn foo() -> Result<()> {
  29. /// let f = std::fs::File::open("tests/test.osm.pbf")?;
  30. /// let mmap = unsafe { Mmap::from_file(&f)? };
  31. /// # Ok(())
  32. /// # }
  33. /// ```
  34. pub unsafe fn from_file(file: &File) -> Result<Mmap> {
  35. memmap::Mmap::map(file)
  36. .map(|m| Mmap { mmap: m })
  37. .chain_err(|| "Could not create memory map from file")
  38. }
  39. /// Creates a memory map from a given path.
  40. ///
  41. /// # Safety
  42. /// The underlying file should not be modified while holding the memory map.
  43. /// See https://github.com/danburkert/memmap-rs/issues/25
  44. ///
  45. /// # Example
  46. /// ```
  47. /// use osmpbf::*;
  48. ///
  49. /// # fn foo() -> Result<()> {
  50. /// let mmap = unsafe { Mmap::from_path("tests/test.osm.pbf")? };
  51. /// # Ok(())
  52. /// # }
  53. /// ```
  54. pub unsafe fn from_path<P: AsRef<Path>>(path: P) -> Result<Mmap> {
  55. let file = File::open(&path)?;
  56. memmap::Mmap::map(&file)
  57. .map(|m| Mmap { mmap: m })
  58. .chain_err(|| format!("Could not create memory map from path {}", path.as_ref().display()))
  59. }
  60. /// Returns an iterator over the blobs in this memory map.
  61. pub fn blob_iter(&self) -> MmapBlobReader {
  62. MmapBlobReader::new(self)
  63. }
  64. fn as_slice(&self) -> &[u8] {
  65. &self.mmap
  66. }
  67. }
  68. /// A PBF blob from a memory map.
  69. pub struct MmapBlob<'a> {
  70. header: BlobHeader,
  71. data: &'a [u8],
  72. }
  73. impl<'a> MmapBlob<'a> {
  74. /// Decodes the Blob and tries to obtain the inner content (usually a `HeaderBlock` or a
  75. /// `PrimitiveBlock`). This operation might involve an expensive decompression step.
  76. pub fn decode(&'a self) -> Result<BlobDecode<'a>> {
  77. let blob: fileformat::Blob = protobuf::parse_from_bytes(self.data)
  78. .chain_err(|| "failed to parse Blob")?;
  79. match self.header.get_field_type() {
  80. "OSMHeader" => {
  81. let block: osmformat::HeaderBlock = decode_blob(&blob).unwrap();
  82. Ok(BlobDecode::OsmHeader(HeaderBlock::new(block)))
  83. }
  84. "OSMData" => {
  85. let block: osmformat::PrimitiveBlock = decode_blob(&blob).unwrap();
  86. Ok(BlobDecode::OsmData(PrimitiveBlock::new(block)))
  87. }
  88. x => Ok(BlobDecode::Unknown(x)),
  89. }
  90. }
  91. /// Returns the type of a blob without decoding its content.
  92. pub fn get_type(&self) -> BlobType {
  93. match self.header.get_field_type() {
  94. "OSMHeader" => BlobType::OsmHeader,
  95. "OSMData" => BlobType::OsmData,
  96. x => BlobType::Unknown(x),
  97. }
  98. }
  99. }
  100. /// A reader for memory mapped PBF files that allows iterating over `MmapBlob`s.
  101. #[derive(Clone)]
  102. pub struct MmapBlobReader<'a> {
  103. mmap: &'a Mmap,
  104. offset: usize,
  105. last_blob_ok: bool,
  106. }
  107. impl<'a> MmapBlobReader<'a> {
  108. /// Creates a new `MmapBlobReader`.
  109. ///
  110. /// # Example
  111. /// ```
  112. /// use osmpbf::*;
  113. ///
  114. /// # fn foo() -> Result<()> {
  115. ///
  116. /// let mmap = unsafe { Mmap::from_path("tests/test.osm.pbf")? };
  117. /// let reader = MmapBlobReader::new(&mmap);
  118. ///
  119. /// # Ok(())
  120. /// # }
  121. /// ```
  122. pub fn new(mmap: &Mmap) -> MmapBlobReader {
  123. MmapBlobReader {
  124. mmap: mmap,
  125. offset: 0,
  126. last_blob_ok: true,
  127. }
  128. }
  129. }
  130. impl<'a> Iterator for MmapBlobReader<'a> {
  131. type Item = Result<MmapBlob<'a>>;
  132. fn next(&mut self) -> Option<Self::Item> {
  133. let slice = &self.mmap.as_slice()[self.offset..];
  134. match slice.len() {
  135. 0 => return None,
  136. 1 ... 3 => {
  137. self.last_blob_ok = false;
  138. let io_error = ::std::io::Error::new(
  139. ::std::io::ErrorKind::UnexpectedEof, "failed to parse blob length"
  140. );
  141. return Some(Err(Error::from_kind(ErrorKind::Io(io_error))));
  142. },
  143. _ => {},
  144. }
  145. let header_size = byteorder::BigEndian::read_u32(slice) as usize;
  146. if slice.len() < 4 + header_size {
  147. self.last_blob_ok = false;
  148. let io_error = ::std::io::Error::new(
  149. ::std::io::ErrorKind::UnexpectedEof, "content too short for header"
  150. );
  151. return Some(Err(Error::from_kind(ErrorKind::Io(io_error))));
  152. }
  153. let header: BlobHeader = match protobuf::parse_from_bytes(&slice[4..(4 + header_size)]) {
  154. Ok(x) => x,
  155. Err(e) => {
  156. self.last_blob_ok = false;
  157. return Some(Err(e.into()));
  158. },
  159. };
  160. let data_size = header.get_datasize() as usize;
  161. let chunk_size = 4 + header_size + data_size;
  162. if slice.len() < chunk_size {
  163. self.last_blob_ok = false;
  164. let io_error = ::std::io::Error::new(
  165. ::std::io::ErrorKind::UnexpectedEof, "content too short for block data"
  166. );
  167. return Some(Err(Error::from_kind(ErrorKind::Io(io_error))));
  168. }
  169. self.offset += chunk_size;
  170. Some(Ok(MmapBlob {
  171. header: header,
  172. data: &slice[(4 + header_size)..chunk_size]
  173. }))
  174. }
  175. }