1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
use std::{
fs::File,
io::{BufReader, Read},
path::Path,
};
use flate2::read::GzDecoder;
use crate::{
array,
error::{Error, Limit},
validate::{Problems, Validate, Validator},
Project, FORMAT_VERSION_MAJOR, FORMAT_VERSION_MINOR, FORMAT_VERSION_PRERELEASE,
};
use super::{
zip_container::{Archive, INDEX_NAME},
SubFile,
};
pub const DEFAULT_VALIDATION_LIMIT: u32 = 100;
pub const DEFAULT_JSON_LIMIT: u64 = 1024 * 1024;
#[cfg(target_pointer_width = "32")]
pub const DEFAULT_MEMORY_LIMIT: u64 = 1024 * 1024 * 1024;
#[cfg(not(target_pointer_width = "32"))]
pub const DEFAULT_MEMORY_LIMIT: u64 = 16 * 1024 * 1024 * 1024;
/// Memory limits for reading OMF files.
#[derive(Debug, Clone, Copy)]
pub struct Limits {
/// Maximum uncompressed size for the JSON index.
///
/// Default is 1 MB.
pub json_bytes: Option<u64>,
/// Maximum uncompressed image size.
///
/// Default is 1 GB on 32-bit systems or 16 GB on 64-bit systems.
pub image_bytes: Option<u64>,
/// Maximum image width or height, default unlimited.
pub image_dim: Option<u32>,
/// Maximum number of validation errors.
///
/// Errors beyond this limit will be discarded. Default is 100.
pub validation: Option<u32>,
}
impl Limits {
/// Creates an object with no limits set.
///
/// Running without limits is not recommended.
pub fn no_limits() -> Self {
Self {
json_bytes: None,
image_bytes: None,
image_dim: None,
validation: None,
}
}
}
impl Default for Limits {
/// The default limits.
fn default() -> Self {
Self {
json_bytes: Some(DEFAULT_JSON_LIMIT),
image_bytes: Some(DEFAULT_MEMORY_LIMIT),
image_dim: None,
validation: Some(DEFAULT_VALIDATION_LIMIT),
}
}
}
/// OMF reader object.
///
/// Typical usage pattern is:
///
/// 1. Create the reader object.
/// 1. Optional: retrieve the file version with `reader.version()`.
/// 1. Optional: adjust the limits with `reader.set_limits(...)`.
/// 1. Read the project from the file with `reader.project()`.
/// 1. Iterate through the project's contents to find the elements and attributes you want to load.
/// 1. For each of those items load the array or image data.
///
/// > **Warning:**
/// > When loading arrays and images from OMF files, beware of "zip bombs"
/// > where data is maliciously crafted to expand to an excessive size when decompressed,
/// > leading to a potential denial of service attack.
/// > Use the limits provided check arrays sizes before allocating memory.
pub struct Reader {
archive: Archive,
version: [u32; 2],
limits: Limits,
}
impl Reader {
/// Creates the reader from a `SeekRead` implementation.
///
/// Makes only the minimum number of reads to check the file header and footer.
/// Fails with an error if an IO error occurs or the file isn't in OMF 2 format.
pub fn new(file: File) -> Result<Self, Error> {
let archive = Archive::new(file)?;
let (version, pre_release) = archive.version();
if let Some(pre) = pre_release {
if Some(pre) != FORMAT_VERSION_PRERELEASE {
return Err(Error::PreReleaseVersion(version[0], version[1], pre.into()));
}
}
if version > [FORMAT_VERSION_MAJOR, FORMAT_VERSION_MINOR] {
return Err(Error::NewerVersion(version[0], version[1]));
}
Ok(Self {
archive,
version,
limits: Default::default(),
})
}
/// Creates a reader by opening the given path.
pub fn open(path: impl AsRef<Path>) -> Result<Self, Error> {
Self::new(File::open(path)?)
}
/// Returns the current limits.
pub fn limits(&self) -> Limits {
self.limits
}
/// Sets the memory limits.
///
/// These limits prevent the reader from consuming excessive system resources, which might
/// allow denial of service attacks with maliciously crafted files. Running without limits
/// is not recommended.
pub fn set_limits(&mut self, limits: Limits) {
self.limits = limits;
}
/// Return the version number of the file, which can only be `[2, 0]` right now.
pub fn version(&self) -> [u32; 2] {
self.version
}
/// Reads, validates, and returns the root `Project` object from the file.
///
/// Fails with an error if an IO error occurs, the `json_bytes` limit is exceeded, or validation
/// fails. Validation warnings are returned alongside the project if successful or included
/// with the errors if not.
pub fn project(&self) -> Result<(Project, Problems), Error> {
let mut project: Project = serde_json::from_reader(BufReader::new(LimitedRead::new(
GzDecoder::new(self.archive.open(INDEX_NAME)?),
self.limits().json_bytes.unwrap_or(u64::MAX),
)))
.map_err(Error::DeserializationFailed)?;
let mut val = Validator::new()
.with_filenames(self.archive.filenames())
.with_limit(self.limits().validation);
project.validate_inner(&mut val);
let warnings = val.finish().into_result()?;
Ok((project, warnings))
}
/// Returns the size in bytes of the compressed array.
pub fn array_compressed_size(
&self,
array: &array::Array<impl array::ArrayType>,
) -> Result<u64, Error> {
Ok(self.archive.span(array.filename())?.size)
}
/// Returns a sub-file for reading raw bytes from the file.
///
/// Fails with an error if the range is invalid. The contents are not checked or validated by
/// this method. The caller must ensure they are valid and safe to use. This function doesn't
/// check against any limit.
pub fn array_bytes_reader(
&self,
array: &array::Array<impl array::ArrayType>,
) -> Result<SubFile, Error> {
array.constraint(); // Check that validation has been done.
self.archive.open(array.filename())
}
/// Return the compressed bytes of an array.
///
/// The will allocate memory to store the result. Call `array_compressed_size` to find out how
/// much will be allocated.
pub fn array_bytes(
&self,
array: &array::Array<impl array::ArrayType>,
) -> Result<Vec<u8>, Error> {
let mut buf = Vec::new();
self.array_bytes_reader(array)?.read_to_end(&mut buf)?;
Ok(buf)
}
}
struct LimitedRead<R> {
inner: R,
limit: u64,
}
impl<R> LimitedRead<R> {
fn new(inner: R, limit: u64) -> Self {
Self { inner, limit }
}
}
impl<R: Read> Read for LimitedRead<R> {
fn read(&mut self, buf: &mut [u8]) -> std::io::Result<usize> {
let n = self.inner.read(buf)?;
self.limit = self.limit.saturating_sub(n as u64);
if self.limit == 0 {
Err(std::io::Error::new(
std::io::ErrorKind::Other,
Error::LimitExceeded(Limit::JsonBytes),
))
} else {
Ok(n)
}
}
}