A Rust library for reading the OpenStreetMap PBF file format (*.osm.pbf).

mmap_blob.rs 7.1KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246
  1. //! Iterate over blobs from a memory map
  2. extern crate byteorder;
  3. extern crate memmap;
  4. extern crate protobuf;
  5. use self::fileformat::BlobHeader;
  6. use blob::{decode_blob, BlobDecode, BlobType, ByteOffset};
  7. use block::{HeaderBlock, PrimitiveBlock};
  8. use byteorder::ByteOrder;
  9. use error::{new_blob_error, new_protobuf_error, BlobError, Result};
  10. use proto::{fileformat, osmformat};
  11. use std::fs::File;
  12. use std::path::Path;
  13. use util::parse_message_from_bytes;
  14. /// A read-only memory map.
  15. #[derive(Debug)]
  16. pub struct Mmap {
  17. mmap: memmap::Mmap,
  18. }
  19. impl Mmap {
  20. /// Creates a memory map from a given file.
  21. ///
  22. /// # Safety
  23. /// The underlying file should not be modified while holding the memory map.
  24. /// See [memmap-rs issue 25](https://github.com/danburkert/memmap-rs/issues/25) for more
  25. /// information on the safety of memory maps.
  26. ///
  27. /// # Example
  28. /// ```
  29. /// use osmpbf::*;
  30. ///
  31. /// # fn foo() -> Result<()> {
  32. /// let f = std::fs::File::open("tests/test.osm.pbf")?;
  33. /// let mmap = unsafe { Mmap::from_file(&f)? };
  34. /// # Ok(())
  35. /// # }
  36. /// # foo().unwrap();
  37. /// ```
  38. pub unsafe fn from_file(file: &File) -> Result<Mmap> {
  39. memmap::Mmap::map(file)
  40. .map(|m| Mmap { mmap: m })
  41. .map_err(|e| e.into())
  42. }
  43. /// Creates a memory map from a given path.
  44. ///
  45. /// # Safety
  46. /// The underlying file should not be modified while holding the memory map.
  47. /// See [memmap-rs issue 25](https://github.com/danburkert/memmap-rs/issues/25) for more
  48. /// information on the safety of memory maps.
  49. ///
  50. /// # Example
  51. /// ```
  52. /// use osmpbf::*;
  53. ///
  54. /// # fn foo() -> Result<()> {
  55. /// let mmap = unsafe { Mmap::from_path("tests/test.osm.pbf")? };
  56. /// # Ok(())
  57. /// # }
  58. /// # foo().unwrap();
  59. /// ```
  60. pub unsafe fn from_path<P: AsRef<Path>>(path: P) -> Result<Mmap> {
  61. let file = File::open(&path)?;
  62. memmap::Mmap::map(&file)
  63. .map(|m| Mmap { mmap: m })
  64. .map_err(|e| e.into())
  65. }
  66. /// Returns an iterator over the blobs in this memory map.
  67. pub fn blob_iter(&self) -> MmapBlobReader {
  68. MmapBlobReader::new(self)
  69. }
  70. fn as_slice(&self) -> &[u8] {
  71. &self.mmap
  72. }
  73. }
  74. /// A PBF blob from a memory map.
  75. #[derive(Clone, Debug)]
  76. pub struct MmapBlob<'a> {
  77. header: BlobHeader,
  78. data: &'a [u8],
  79. offset: ByteOffset,
  80. }
  81. impl<'a> MmapBlob<'a> {
  82. /// Decodes the Blob and tries to obtain the inner content (usually a `HeaderBlock` or a
  83. /// `PrimitiveBlock`). This operation might involve an expensive decompression step.
  84. pub fn decode(&'a self) -> Result<BlobDecode<'a>> {
  85. let blob: fileformat::Blob = parse_message_from_bytes(self.data)
  86. .map_err(|e| new_protobuf_error(e, "blob content"))?;
  87. match self.header.get_field_type() {
  88. "OSMHeader" => {
  89. let block = Box::new(HeaderBlock::new(decode_blob(&blob)?));
  90. Ok(BlobDecode::OsmHeader(block))
  91. }
  92. "OSMData" => {
  93. let block: osmformat::PrimitiveBlock = decode_blob(&blob)?;
  94. Ok(BlobDecode::OsmData(PrimitiveBlock::new(block)))
  95. }
  96. x => Ok(BlobDecode::Unknown(x)),
  97. }
  98. }
  99. /// Returns the type of a blob without decoding its content.
  100. pub fn get_type(&self) -> BlobType {
  101. match self.header.get_field_type() {
  102. "OSMHeader" => BlobType::OsmHeader,
  103. "OSMData" => BlobType::OsmData,
  104. x => BlobType::Unknown(x),
  105. }
  106. }
  107. /// Returns the byte offset of the blob from the start of its memory map.
  108. pub fn offset(&self) -> ByteOffset {
  109. self.offset
  110. }
  111. }
  112. /// A reader for memory mapped PBF files that allows iterating over `MmapBlob`s.
  113. #[derive(Clone, Debug)]
  114. pub struct MmapBlobReader<'a> {
  115. mmap: &'a Mmap,
  116. offset: usize,
  117. last_blob_ok: bool,
  118. }
  119. impl<'a> MmapBlobReader<'a> {
  120. /// Creates a new `MmapBlobReader`.
  121. ///
  122. /// # Example
  123. /// ```
  124. /// use osmpbf::*;
  125. ///
  126. /// # fn foo() -> Result<()> {
  127. ///
  128. /// let mmap = unsafe { Mmap::from_path("tests/test.osm.pbf")? };
  129. /// let reader = MmapBlobReader::new(&mmap);
  130. ///
  131. /// # Ok(())
  132. /// # }
  133. /// # foo().unwrap();
  134. /// ```
  135. pub fn new(mmap: &Mmap) -> MmapBlobReader {
  136. MmapBlobReader {
  137. mmap,
  138. offset: 0,
  139. last_blob_ok: true,
  140. }
  141. }
  142. /// Move the cursor to the given byte offset.
  143. ///
  144. /// # Example
  145. /// ```
  146. /// use osmpbf::*;
  147. ///
  148. /// # fn foo() -> Result<()> {
  149. ///
  150. /// let mmap = unsafe { Mmap::from_path("tests/test.osm.pbf")? };
  151. /// let mut reader = MmapBlobReader::new(&mmap);
  152. ///
  153. /// let first_blob = reader.next().unwrap()?;
  154. /// let second_blob = reader.next().unwrap()?;
  155. ///
  156. /// reader.seek(first_blob.offset());
  157. /// let first_blob_again = reader.next().unwrap()?;
  158. ///
  159. /// assert_eq!(first_blob.offset(), first_blob_again.offset());
  160. ///
  161. /// # Ok(())
  162. /// # }
  163. /// # foo().unwrap();
  164. /// ```
  165. pub fn seek(&mut self, pos: ByteOffset) {
  166. self.offset = pos.0 as usize;
  167. }
  168. }
  169. impl<'a> Iterator for MmapBlobReader<'a> {
  170. type Item = Result<MmapBlob<'a>>;
  171. fn next(&mut self) -> Option<Self::Item> {
  172. let slice = &self.mmap.as_slice()[self.offset..];
  173. match slice.len() {
  174. 0 => return None,
  175. 1..=3 => {
  176. self.last_blob_ok = false;
  177. return Some(Err(new_blob_error(BlobError::InvalidHeaderSize)));
  178. }
  179. _ => {}
  180. }
  181. let header_size = byteorder::BigEndian::read_u32(slice) as usize;
  182. if header_size as u64 >= ::blob::MAX_BLOB_HEADER_SIZE {
  183. self.last_blob_ok = false;
  184. return Some(Err(new_blob_error(BlobError::HeaderTooBig {
  185. size: header_size as u64,
  186. })));
  187. }
  188. if slice.len() < 4 + header_size {
  189. self.last_blob_ok = false;
  190. let io_error = ::std::io::Error::new(
  191. ::std::io::ErrorKind::UnexpectedEof,
  192. "content too short for header",
  193. );
  194. return Some(Err(io_error.into()));
  195. }
  196. let header: BlobHeader = match parse_message_from_bytes(&slice[4..(4 + header_size)]) {
  197. Ok(x) => x,
  198. Err(e) => {
  199. self.last_blob_ok = false;
  200. return Some(Err(new_protobuf_error(e, "blob header")));
  201. }
  202. };
  203. let data_size = header.get_datasize() as usize;
  204. let chunk_size = 4 + header_size + data_size;
  205. if slice.len() < chunk_size {
  206. self.last_blob_ok = false;
  207. let io_error = ::std::io::Error::new(
  208. ::std::io::ErrorKind::UnexpectedEof,
  209. "content too short for block data",
  210. );
  211. return Some(Err(io_error.into()));
  212. }
  213. let prev_offset = self.offset;
  214. self.offset += chunk_size;
  215. Some(Ok(MmapBlob {
  216. header,
  217. data: &slice[(4 + header_size)..chunk_size],
  218. offset: ByteOffset(prev_offset as u64),
  219. }))
  220. }
  221. }