1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
use std::fmt::Write;

use schemars::{
    gen::SchemaSettings,
    schema::{
        InstanceType, Metadata, RootSchema, Schema, SchemaObject, SingleOrVec, SubschemaValidation,
    },
    visit::visit_schema_object,
    JsonSchema,
};
use serde_json::Value;

use crate::{format_full_name, Project};

use schemars::visit::Visitor;

fn simple_enum_variant(outer_schema: &Schema) -> Option<(String, String)> {
    if let Schema::Object(schema) = outer_schema {
        let Some([Value::String(variant)]) = schema.enum_values.as_deref() else {
            return None;
        };
        let Some(Metadata {
            description: Some(descr),
            ..
        }) = schema.metadata.as_deref()
        else {
            return None;
        };
        Some((variant.clone(), descr.clone()))
    } else {
        None
    }
}

#[derive(Debug, Clone, Default)]
struct TweakSchema {
    remove_descr: bool,
}

impl Visitor for TweakSchema {
    fn visit_schema_object(&mut self, schema: &mut SchemaObject) {
        // Add a maximum for uint8 values.
        if schema.format.as_deref() == Some("uint8") {
            schema.number().maximum = Some(255.0);
        }
        // Move descriptions of simple enum values into the parent.
        if let Some(SubschemaValidation {
            one_of: Some(variants),
            ..
        }) = schema.subschemas.as_deref()
        {
            if let Some(v) = variants
                .iter()
                .map(simple_enum_variant)
                .collect::<Option<Vec<_>>>()
            {
                schema.subschemas = None;
                schema.enum_values = Some(v.iter().map(|(name, _)| (&name[..]).into()).collect());
                schema.instance_type = Some(SingleOrVec::Single(Box::new(InstanceType::String)));
                let mut descr = schema.metadata().description.clone().unwrap_or_default();
                descr += "\n\n### Values\n\n";
                for (n, d) in v {
                    let body = d.replace("\n\n", "\n\n    ");
                    write!(&mut descr, "`{n}`\n:   {body}\n\n").unwrap();
                }
                schema.metadata().description = Some(descr);
            }
        }
        // Optionally remove descriptions. These get transformed into the documentation,
        // they're a bit too complex to be readable in the schema itself.
        if self.remove_descr {
            let mut empty = false;
            if let Some(m) = schema.metadata.as_deref_mut() {
                m.description = None;
                empty = m == &Metadata::default();
            }
            if empty {
                schema.metadata = None;
            }
        }
        // Change references to the generics Array_for_* to just Array.
        if let Some(r) = schema.reference.as_mut() {
            if r.starts_with("#/definitions/Array_for_") {
                "#/definitions/Array".clone_into(r);
            }
        }
        // Then delegate to default implementation to visit any subschemas.
        visit_schema_object(self, schema);
    }
}

pub(crate) fn schema_for<T: JsonSchema>(remove_descr: bool) -> RootSchema {
    SchemaSettings::draft2019_09()
        .with_visitor(TweakSchema { remove_descr })
        .into_generator()
        .into_root_schema_for::<T>()
}

pub(crate) fn project_schema(remove_descr: bool) -> RootSchema {
    let mut root = schema_for::<Project>(remove_descr);
    root.schema.metadata().title = Some(format_full_name());
    root.schema.metadata().id =
        Some("https://github.com/gmggroup/omf-rust/blob/main/omf.schema.json".to_owned());
    let array_def = root.definitions.get("Array_for_Boolean").unwrap().clone();
    root.definitions
        .retain(|name, _| !name.starts_with("Array_for"));
    root.definitions.insert("Array".to_owned(), array_def);
    root
}

pub fn json_schema() -> RootSchema {
    project_schema(true)
}

#[cfg(test)]
pub(crate) mod tests {
    use schemars::schema::RootSchema;

    use crate::schema::json_schema;

    const SCHEMA: &str = "omf.schema.json";

    #[ignore = "used to get schema"]
    #[test]
    fn update_schema() {
        std::fs::write(
            SCHEMA,
            serde_json::to_string_pretty(&json_schema())
                .unwrap()
                .as_bytes(),
        )
        .unwrap();
        crate::schema_doc::update_schema_docs();
    }

    #[ignore = "used to get schema docs"]
    #[test]
    fn update_schema_docs() {
        crate::schema_doc::update_schema_docs();
        #[cfg(feature = "parquet")]
        crate::file::parquet::schemas::dump_parquet_schemas();
    }

    #[test]
    fn schema() {
        let schema = json_schema();
        let expected: RootSchema =
            serde_json::from_reader(std::fs::File::open(SCHEMA).unwrap()).unwrap();
        assert!(schema == expected, "schema has changed");
    }
}