1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
use std::{
    fs::File,
    io::{BufReader, Read},
    path::Path,
};

use flate2::read::GzDecoder;

use crate::{
    array,
    error::{Error, Limit},
    validate::{Problems, Validate, Validator},
    Project, FORMAT_VERSION_MAJOR, FORMAT_VERSION_MINOR, FORMAT_VERSION_PRERELEASE,
};

use super::{
    zip_container::{Archive, INDEX_NAME},
    SubFile,
};

pub const DEFAULT_VALIDATION_LIMIT: u32 = 100;
pub const DEFAULT_JSON_LIMIT: u64 = 1024 * 1024;
#[cfg(target_pointer_width = "32")]
pub const DEFAULT_MEMORY_LIMIT: u64 = 1024 * 1024 * 1024;
#[cfg(not(target_pointer_width = "32"))]
pub const DEFAULT_MEMORY_LIMIT: u64 = 16 * 1024 * 1024 * 1024;

/// Memory limits for reading OMF files.
#[derive(Debug, Clone, Copy)]
pub struct Limits {
    /// Maximum uncompressed size for the JSON index.
    ///
    /// Default is 1 MB.
    pub json_bytes: Option<u64>,
    /// Maximum uncompressed image size.
    ///
    /// Default is 1 GB on 32-bit systems or 16 GB on 64-bit systems.
    pub image_bytes: Option<u64>,
    /// Maximum image width or height, default unlimited.
    pub image_dim: Option<u32>,
    /// Maximum number of validation errors.
    ///
    /// Errors beyond this limit will be discarded. Default is 100.
    pub validation: Option<u32>,
}

impl Limits {
    /// Creates an object with no limits set.
    ///
    /// Running without limits is not recommended.
    pub fn no_limits() -> Self {
        Self {
            json_bytes: None,
            image_bytes: None,
            image_dim: None,
            validation: None,
        }
    }
}

impl Default for Limits {
    /// The default limits.
    fn default() -> Self {
        Self {
            json_bytes: Some(DEFAULT_JSON_LIMIT),
            image_bytes: Some(DEFAULT_MEMORY_LIMIT),
            image_dim: None,
            validation: Some(DEFAULT_VALIDATION_LIMIT),
        }
    }
}

/// OMF reader object.
///
/// Typical usage pattern is:
///
/// 1. Create the reader object.
/// 1. Optional: retrieve the file version with `reader.version()`.
/// 1. Optional: adjust the limits with `reader.set_limits(...)`.
/// 1. Read the project from the file with `reader.project()`.
/// 1. Iterate through the project's contents to find the elements and attributes you want to load.
/// 1. For each of those items load the array or image data.
///
/// > **Warning:**
/// > When loading arrays and images from OMF files, beware of "zip bombs"
/// > where data is maliciously crafted to expand to an excessive size when decompressed,
/// > leading to a potential denial of service attack.
/// > Use the limits provided check arrays sizes before allocating memory.
pub struct Reader {
    archive: Archive,
    version: [u32; 2],
    limits: Limits,
}

impl Reader {
    /// Creates the reader from a `SeekRead` implementation.
    ///
    /// Makes only the minimum number of reads to check the file header and footer.
    /// Fails with an error if an IO error occurs or the file isn't in OMF 2 format.
    pub fn new(file: File) -> Result<Self, Error> {
        let archive = Archive::new(file)?;
        let (version, pre_release) = archive.version();
        if let Some(pre) = pre_release {
            if Some(pre) != FORMAT_VERSION_PRERELEASE {
                return Err(Error::PreReleaseVersion(version[0], version[1], pre.into()));
            }
        }
        if version > [FORMAT_VERSION_MAJOR, FORMAT_VERSION_MINOR] {
            return Err(Error::NewerVersion(version[0], version[1]));
        }
        Ok(Self {
            archive,
            version,
            limits: Default::default(),
        })
    }

    /// Creates a reader by opening the given path.
    pub fn open(path: impl AsRef<Path>) -> Result<Self, Error> {
        Self::new(File::open(path)?)
    }

    /// Returns the current limits.
    pub fn limits(&self) -> Limits {
        self.limits
    }

    /// Sets the memory limits.
    ///
    /// These limits prevent the reader from consuming excessive system resources, which might
    /// allow denial of service attacks with maliciously crafted files. Running without limits
    /// is not recommended.
    pub fn set_limits(&mut self, limits: Limits) {
        self.limits = limits;
    }

    /// Return the version number of the file, which can only be `[2, 0]` right now.
    pub fn version(&self) -> [u32; 2] {
        self.version
    }

    /// Reads, validates, and returns the root `Project` object from the file.
    ///
    /// Fails with an error if an IO error occurs, the `json_bytes` limit is exceeded, or validation
    /// fails. Validation warnings are returned alongside the project if successful or included
    /// with the errors if not.
    pub fn project(&self) -> Result<(Project, Problems), Error> {
        let mut project: Project = serde_json::from_reader(BufReader::new(LimitedRead::new(
            GzDecoder::new(self.archive.open(INDEX_NAME)?),
            self.limits().json_bytes.unwrap_or(u64::MAX),
        )))
        .map_err(Error::DeserializationFailed)?;
        let mut val = Validator::new()
            .with_filenames(self.archive.filenames())
            .with_limit(self.limits().validation);
        project.validate_inner(&mut val);
        let warnings = val.finish().into_result()?;
        Ok((project, warnings))
    }

    /// Returns the size in bytes of the compressed array.
    pub fn array_compressed_size(
        &self,
        array: &array::Array<impl array::ArrayType>,
    ) -> Result<u64, Error> {
        Ok(self.archive.span(array.filename())?.size)
    }

    /// Returns a sub-file for reading raw bytes from the file.
    ///
    /// Fails with an error if the range is invalid. The contents are not checked or validated by
    /// this method. The caller must ensure they are valid and safe to use. This function doesn't
    /// check against any limit.
    pub fn array_bytes_reader(
        &self,
        array: &array::Array<impl array::ArrayType>,
    ) -> Result<SubFile, Error> {
        array.constraint(); // Check that validation has been done.
        self.archive.open(array.filename())
    }

    /// Return the compressed bytes of an array.
    ///
    /// The will allocate memory to store the result. Call `array_compressed_size` to find out how
    /// much will be allocated.
    pub fn array_bytes(
        &self,
        array: &array::Array<impl array::ArrayType>,
    ) -> Result<Vec<u8>, Error> {
        let mut buf = Vec::new();
        self.array_bytes_reader(array)?.read_to_end(&mut buf)?;
        Ok(buf)
    }
}

struct LimitedRead<R> {
    inner: R,
    limit: u64,
}

impl<R> LimitedRead<R> {
    fn new(inner: R, limit: u64) -> Self {
        Self { inner, limit }
    }
}

impl<R: Read> Read for LimitedRead<R> {
    fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
        let n = self.inner.read(buf)?;
        self.limit = self.limit.saturating_sub(n as u64);
        if self.limit == 0 {
            Err(std::io::Error::new(
                std::io::ErrorKind::Other,
                Error::LimitExceeded(Limit::JsonBytes),
            ))
        } else {
            Ok(n)
        }
    }
}