omf/file/
writer.rs

1use std::{
2    fmt::Debug,
3    fs::{File, OpenOptions},
4    io::{Read, Seek, Write},
5    path::Path,
6};
7
8use flate2::write::GzEncoder;
9
10use crate::{
11    Array, ArrayType, FORMAT_VERSION_MAJOR, FORMAT_VERSION_MINOR, FORMAT_VERSION_PRERELEASE,
12    Project,
13    array::DataType,
14    array_type,
15    error::Error,
16    file::zip_container::FileType,
17    validate::{Problems, Validate, Validator},
18};
19
20use super::zip_container::Builder;
21
22/// Compression level to use. Applies to Parquet and JSON data in the OMF file.
23#[derive(Debug, Clone, Copy)]
24pub struct Compression(u32);
25
26impl Compression {
27    const MINIMUM: u32 = 0;
28    const MAXIMUM: u32 = 9;
29
30    /// Create a compression level, clamped to the range `0..=9`.
31    pub fn new(level: u32) -> Self {
32        Self(level.clamp(Self::MINIMUM, Self::MAXIMUM))
33    }
34
35    /// No compression.
36    pub const fn none() -> Self {
37        Self(0)
38    }
39
40    /// Compress as fast as possible at the cost of file size.
41    pub const fn fast() -> Self {
42        Self(1)
43    }
44
45    /// Take as long as necessary to compress as small as possible.
46    pub const fn best() -> Self {
47        Self(9)
48    }
49
50    /// Returns the compression level.
51    pub const fn level(&self) -> u32 {
52        self.0
53    }
54}
55
56impl Default for Compression {
57    /// The default compression level, a balance between speed and file size.
58    fn default() -> Self {
59        Self(6)
60    }
61}
62
63impl From<Compression> for flate2::Compression {
64    fn from(value: Compression) -> Self {
65        Self::new(value.level())
66    }
67}
68
69/// OMF writer object.
70///
71/// To use the writer:
72///
73/// 1. Create the writer object.
74/// 1. Create an empty [`Project`] and fill in the details.
75/// 1. For each element you want to store:
76///     1. Write the arrays and image with the writer.
77///     1. Fill in the required struct with the array pointers and other details then add it to the project.
78///     1. Repeat for the attributes, adding them to the newly created element.
79/// 1. Call `writer.finish(project)` to validate everything inside the the project and write it.
80pub struct Writer<W: Write + Seek> {
81    pub(crate) builder: Builder<W>,
82    compression: Compression,
83}
84
85impl Writer<File> {
86    /// Creates a writer by opening a file.
87    ///
88    /// The file will be created if it doesn't exist, and truncated and replaced if it does.
89    pub fn open(path: impl AsRef<Path>) -> Result<Self, Error> {
90        Self::new(
91            OpenOptions::new()
92                .write(true)
93                .truncate(true)
94                .create(true)
95                .open(path)?,
96        )
97    }
98}
99
100impl<W: Write + Seek> Writer<W> {
101    /// Creates a writer that writes into a file-like object.
102    pub fn new(write: W) -> Result<Self, Error> {
103        Ok(Self {
104            builder: Builder::new(write)?,
105            compression: Default::default(),
106        })
107    }
108
109    /// Return the current compression.
110    pub fn compression(&self) -> Compression {
111        self.compression
112    }
113
114    /// Set the compression to use.
115    ///
116    /// This affects Parquet data and the JSON index, but not images.
117    /// The default is `Compression::default()`.
118    pub fn set_compression(&mut self, compression: Compression) {
119        self.compression = compression;
120    }
121
122    /// Write an array from already-encoded bytes.
123    ///
124    /// Returns the new [`Array`](crate::Array) on success or an error if file IO fails.
125    pub fn array_bytes<A: ArrayType>(
126        &mut self,
127        length: u64,
128        bytes: &[u8],
129    ) -> Result<Array<A>, Error> {
130        let file_type = check_header::<A>(bytes)?;
131        let mut f = self.builder.open(file_type)?;
132        let name = f.name().to_owned();
133        f.write_all(bytes)?;
134        Ok(Array::new(name, length))
135    }
136
137    /// Consumes everything from `read` and writes it as a new array.
138    ///
139    /// The bytes must already be encoded in Parquet, PNG, or JPEG depending on the array type.
140    /// Returns the new [`Array`](crate::Array) on success or an error if file IO fails on either
141    /// side.
142    pub fn array_bytes_from<A: ArrayType>(
143        &mut self,
144        length: u64,
145        mut read: impl Read,
146    ) -> Result<Array<A>, Error> {
147        let mut header = [0_u8; 8];
148        read.read_exact(&mut header)?;
149        let file_type = check_header::<A>(&header)?;
150        let mut f = self.builder.open(file_type)?;
151        let name = f.name().to_owned();
152        f.write_all(&header)?;
153        let mut buffer = vec![0_u8; 4096];
154        loop {
155            let n = read.read(&mut buffer)?;
156            if n == 0 {
157                break;
158            }
159            f.write_all(&buffer)?;
160        }
161        Ok(Array::new(name, length))
162    }
163
164    /// Write an existing PNG or JPEG image from a slice without re-encoding it.
165    pub fn image_bytes(&mut self, bytes: &[u8]) -> Result<Array<array_type::Image>, Error> {
166        self.array_bytes(0, bytes)
167    }
168
169    /// Write an existing PNG or JPEG image from a file without re-encoding it.
170    pub fn image_bytes_from(&mut self, read: impl Read) -> Result<Array<array_type::Image>, Error> {
171        self.array_bytes_from(0, read)
172    }
173
174    /// Validate and write the project and close the file.
175    ///
176    /// Returns validation warnings on success or an [`Error`] on failure, which can be a
177    /// validation failure or a file IO error.
178    pub fn finish(mut self, mut project: Project) -> Result<(W, Problems), Error> {
179        let mut val = Validator::new().with_filenames(self.builder.filenames());
180        project.validate_inner(&mut val);
181        let warnings = val.finish().into_result()?;
182        let gz = GzEncoder::new(self.builder.open(FileType::Index)?, self.compression.into());
183        serde_json::to_writer(gz, &project).map_err(Error::SerializationFailed)?;
184        // In the future we could base the format version on the data, writing backward
185        // compatible files if new features weren't used.
186        let write = self.builder.finish(
187            FORMAT_VERSION_MAJOR,
188            FORMAT_VERSION_MINOR,
189            FORMAT_VERSION_PRERELEASE,
190        )?;
191        Ok((write, warnings))
192    }
193}
194
195fn check_header<A: ArrayType>(bytes: &[u8]) -> Result<FileType, Error> {
196    const PNG_MAGIC: &[u8] = &[0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A, 0x1A, 0x0A];
197    const JPEG_MAGIC: &[u8] = &[0xFF, 0xD8, 0xFF];
198    const PARQUET_MAGIC: &[u8] = b"PAR1";
199    match A::DATA_TYPE {
200        DataType::Image => {
201            if bytes.starts_with(PNG_MAGIC) {
202                Ok(FileType::Png)
203            } else if bytes.starts_with(JPEG_MAGIC) {
204                Ok(FileType::Jpeg)
205            } else {
206                Err(Error::NotImageData)
207            }
208        }
209        _ => {
210            if !bytes.starts_with(PARQUET_MAGIC) || !bytes.ends_with(PARQUET_MAGIC) {
211                Err(Error::NotParquetData)
212            } else {
213                Ok(FileType::Parquet)
214            }
215        }
216    }
217}