1#!/usr/bin/env python3
2
3##
4# Licensed to the Apache Software Foundation (ASF) under one
5# or more contributor license agreements.  See the NOTICE file
6# distributed with this work for additional information
7# regarding copyright ownership.  The ASF licenses this file
8# to you under the Apache License, Version 2.0 (the
9# "License"); you may not use this file except in compliance
10# with the License.  You may obtain a copy of the License at
11#
12#     https://www.apache.org/licenses/LICENSE-2.0
13#
14# Unless required by applicable law or agreed to in writing, software
15# distributed under the License is distributed on an "AS IS" BASIS,
16# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
17#
18# See the License for the specific language governing permissions and
19# limitations under the License.
20
21import argparse
22import base64
23import io
24import json
25import os
26from pathlib import Path
27from typing import IO, TextIO
28
29import avro.codecs
30import avro.datafile
31import avro.io
32import avro.schema
33
34NULL_CODEC = avro.datafile.NULL_CODEC
35CODECS_TO_VALIDATE = avro.codecs.KNOWN_CODECS.keys()
36
37DATUM = {
38    "intField": 12,
39    "longField": 15234324,
40    "stringField": "hey",
41    "boolField": True,
42    "floatField": 1234.0,
43    "doubleField": -1234.0,
44    "bytesField": b"12312adf",
45    "nullField": None,
46    "arrayField": [5.0, 0.0, 12.0],
47    "mapField": {"a": {"label": "a"}, "bee": {"label": "cee"}},
48    "unionField": 12.0,
49    "enumField": "C",
50    "fixedField": b"1019181716151413",
51    "recordField": {"label": "blah", "children": [{"label": "inner", "children": []}]},
52}
53
54
55def gen_data(codec: str, datum_writer: avro.io.DatumWriter, interop_schema: avro.schema.Schema) -> bytes:
56    with io.BytesIO() as file_, avro.datafile.DataFileWriter(file_, datum_writer, interop_schema, codec=codec) as dfw:
57        dfw.append(DATUM)
58        dfw.flush()
59        return file_.getvalue()
60
61
62def generate(schema_file: TextIO, output_path: IO) -> None:
63    interop_schema = avro.schema.parse(schema_file.read())
64    datum_writer = avro.io.DatumWriter()
65    output = ((codec, gen_data(codec, datum_writer, interop_schema)) for codec in CODECS_TO_VALIDATE)
66    if output_path.isatty():
67        json.dump({codec: base64.b64encode(data).decode() for codec, data in output}, output_path)
68        return
69    for codec, data in output:
70        if codec == NULL_CODEC:
71            output_path.write(data)
72            continue
73        base, ext = os.path.splitext(output_path.name)
74        Path(f"{base}_{codec}{ext}").write_bytes(data)
75
76
77def _parse_args() -> argparse.Namespace:
78    """Parse the command-line arguments"""
79    parser = argparse.ArgumentParser()
80    parser.add_argument("schema_path", type=argparse.FileType("r"))
81    parser.add_argument(
82        "output_path",
83        type=argparse.FileType("wb"),
84        help=(
85            "Write the different codec variants to these files. "
86            "Will append codec extensions to multiple files. "
87            "If '-', will output base64 encoded binary"
88        ),
89    )
90    return parser.parse_args()
91
92
93def main() -> int:
94    args = _parse_args()
95    generate(args.schema_path, args.output_path)
96    return 0
97
98
99if __name__ == "__main__":
100    raise SystemExit(main())
101