omf/file/
reader.rs

1use std::{
2    io::{BufReader, Read},
3    sync::Arc,
4};
5
6use flate2::read::GzDecoder;
7
8use crate::{
9    FORMAT_VERSION_MAJOR, FORMAT_VERSION_MINOR, FORMAT_VERSION_PRERELEASE, Project, array,
10    error::{Error, Limit},
11    validate::{Problems, Validate, Validator},
12};
13
14use super::{
15    ReadAt, SubFile,
16    zip_container::{Archive, INDEX_NAME},
17};
18
19pub const DEFAULT_VALIDATION_LIMIT: u32 = 100;
20pub const DEFAULT_JSON_LIMIT: u64 = 1024 * 1024;
21#[cfg(target_pointer_width = "32")]
22pub const DEFAULT_MEMORY_LIMIT: u64 = 1024 * 1024 * 1024;
23#[cfg(not(target_pointer_width = "32"))]
24pub const DEFAULT_MEMORY_LIMIT: u64 = 16 * 1024 * 1024 * 1024;
25
26/// Memory limits for reading OMF files.
27#[derive(Debug, Clone, Copy)]
28pub struct Limits {
29    /// Maximum uncompressed size for the JSON index.
30    ///
31    /// Default is 1 MB.
32    pub json_bytes: Option<u64>,
33    /// Maximum uncompressed image size.
34    ///
35    /// Default is 1 GB on 32-bit systems or 16 GB on 64-bit systems.
36    pub image_bytes: Option<u64>,
37    /// Maximum image width or height, default unlimited.
38    pub image_dim: Option<u32>,
39    /// Maximum number of validation errors.
40    ///
41    /// Errors beyond this limit will be discarded. Default is 100.
42    pub validation: Option<u32>,
43}
44
45impl Limits {
46    /// Creates an object with no limits set.
47    ///
48    /// Running without limits is not recommended.
49    pub fn no_limits() -> Self {
50        Self {
51            json_bytes: None,
52            image_bytes: None,
53            image_dim: None,
54            validation: None,
55        }
56    }
57}
58
59impl Default for Limits {
60    /// The default limits.
61    fn default() -> Self {
62        Self {
63            json_bytes: Some(DEFAULT_JSON_LIMIT),
64            image_bytes: Some(DEFAULT_MEMORY_LIMIT),
65            image_dim: None,
66            validation: Some(DEFAULT_VALIDATION_LIMIT),
67        }
68    }
69}
70
71/// OMF reader object.
72///
73/// Typical usage pattern is:
74///
75/// 1. Create the reader object.
76/// 1. Optional: retrieve the file version with `reader.version()`.
77/// 1. Optional: adjust the limits with `reader.set_limits(...)`.
78/// 1. Read the project from the file with `reader.project()`.
79/// 1. Iterate through the project's contents to find the elements and attributes you want to load.
80/// 1. For each of those items load the array or image data.
81///
82/// > **Warning:**
83/// > When loading arrays and images from OMF files, beware of "zip bombs"
84/// > where data is maliciously crafted to expand to an excessive size when decompressed,
85/// > leading to a potential denial of service attack.
86/// > Use the limits provided and check arrays sizes before allocating memory.
87pub struct Reader<R> {
88    archive: Archive<R>,
89    version: [u32; 2],
90    limits: Limits,
91}
92
93#[cfg(not(target_family = "wasm"))]
94impl Reader<std::fs::File> {
95    /// Creates a reader by opening the given path.
96    pub fn open(path: impl AsRef<std::path::Path>) -> Result<Self, Error> {
97        Self::new(std::fs::File::open(path)?)
98    }
99}
100
101impl<R: ReadAt> Reader<R> {
102    /// Creates the reader from a [`ReadAt`] object.
103    ///
104    /// This object can be a `std::fs::File`, a `Vec<u8>`, or anything you implement the trait for.
105    /// Currently this is restricted to `'static` objects, and they must be `Sync` and `Send`.
106    pub fn new(data: R) -> Result<Self, Error> {
107        let size = data.size()?;
108        let archive = Archive::new(SubFile::new(Arc::new(data), 0, size)?)?;
109        let (version, pre_release) = archive.version();
110        if let Some(pre) = pre_release {
111            if Some(pre) != FORMAT_VERSION_PRERELEASE {
112                return Err(Error::PreReleaseVersion(version[0], version[1], pre.into()));
113            }
114        }
115        if version > [FORMAT_VERSION_MAJOR, FORMAT_VERSION_MINOR] {
116            return Err(Error::NewerVersion(version[0], version[1]));
117        }
118        Ok(Self {
119            archive,
120            version,
121            limits: Default::default(),
122        })
123    }
124
125    /// Returns the current limits.
126    pub fn limits(&self) -> Limits {
127        self.limits
128    }
129
130    /// Sets the memory limits.
131    ///
132    /// These limits prevent the reader from consuming excessive system resources, which might
133    /// allow denial of service attacks with maliciously crafted files. Running without limits
134    /// is not recommended.
135    pub fn set_limits(&mut self, limits: Limits) {
136        self.limits = limits;
137    }
138
139    /// Return the version number of the file, which can only be `[2, 0]` right now.
140    pub fn version(&self) -> [u32; 2] {
141        self.version
142    }
143
144    /// Reads, validates, and returns the root `Project` object from the file.
145    ///
146    /// Fails with an error if an IO error occurs, the `json_bytes` limit is exceeded, or validation
147    /// fails. Validation warnings are returned alongside the project if successful or included
148    /// with the errors if not.
149    pub fn project(&self) -> Result<(Project, Problems), Error> {
150        let mut project: Project = serde_json::from_reader(BufReader::new(LimitedRead::new(
151            GzDecoder::new(self.archive.open(INDEX_NAME)?),
152            self.limits().json_bytes.unwrap_or(u64::MAX),
153        )))
154        .map_err(Error::DeserializationFailed)?;
155        let mut val = Validator::new()
156            .with_filenames(self.archive.filenames())
157            .with_limit(self.limits().validation);
158        project.validate_inner(&mut val);
159        let warnings = val.finish().into_result()?;
160        Ok((project, warnings))
161    }
162
163    /// Returns the size in bytes of the compressed array.
164    pub fn array_compressed_size(
165        &self,
166        array: &array::Array<impl array::ArrayType>,
167    ) -> Result<u64, Error> {
168        Ok(self.archive.span(array.filename())?.size)
169    }
170
171    /// Returns a sub-file for reading raw bytes from the file.
172    ///
173    /// Fails with an error if the range is invalid. The contents are not checked or validated by
174    /// this method. The caller must ensure they are valid and safe to use. This function doesn't
175    /// check against any limit.
176    pub fn array_bytes_reader(
177        &self,
178        array: &array::Array<impl array::ArrayType>,
179    ) -> Result<SubFile<R>, Error> {
180        array.constraint(); // Check that validation has been done.
181        self.archive.open(array.filename())
182    }
183
184    /// Return the compressed bytes of an array.
185    ///
186    /// The will allocate memory to store the result. Call `array_compressed_size` to find out how
187    /// much will be allocated.
188    pub fn array_bytes(
189        &self,
190        array: &array::Array<impl array::ArrayType>,
191    ) -> Result<Vec<u8>, Error> {
192        let mut buf = Vec::new();
193        self.array_bytes_reader(array)?.read_to_end(&mut buf)?;
194        Ok(buf)
195    }
196}
197
198struct LimitedRead<R> {
199    inner: R,
200    limit: u64,
201}
202
203impl<R> LimitedRead<R> {
204    fn new(inner: R, limit: u64) -> Self {
205        Self { inner, limit }
206    }
207}
208
209impl<R: Read> Read for LimitedRead<R> {
210    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
211        let n = self.inner.read(buf)?;
212        self.limit = self.limit.saturating_sub(n as u64);
213        if self.limit == 0 {
214            Err(std::io::Error::new(
215                std::io::ErrorKind::Other,
216                Error::LimitExceeded(Limit::JsonBytes),
217            ))
218        } else {
219            Ok(n)
220        }
221    }
222}