xref: /qemu/scripts/xml-preprocess.py (revision 0848ca87)
19b286e76SMarc-André Lureau#!/usr/bin/env python3
29b286e76SMarc-André Lureau#
39b286e76SMarc-André Lureau# Copyright (c) 2017-2019 Tony Su
49b286e76SMarc-André Lureau# Copyright (c) 2023 Red Hat, Inc.
59b286e76SMarc-André Lureau#
69b286e76SMarc-André Lureau# SPDX-License-Identifier: MIT
79b286e76SMarc-André Lureau#
89b286e76SMarc-André Lureau# Adapted from https://github.com/peitaosu/XML-Preprocessor
99b286e76SMarc-André Lureau#
109b286e76SMarc-André Lureau"""This is a XML Preprocessor which can be used to process your XML file before
119b286e76SMarc-André Lureauyou use it, to process conditional statements, variables, iteration
129b286e76SMarc-André Lureaustatements, error/warning, execute command, etc.
139b286e76SMarc-André Lureau
149b286e76SMarc-André Lureau## XML Schema
159b286e76SMarc-André Lureau
169b286e76SMarc-André Lureau### Include Files
179b286e76SMarc-André Lureau```
189b286e76SMarc-André Lureau<?include path/to/file ?>
199b286e76SMarc-André Lureau```
209b286e76SMarc-André Lureau
219b286e76SMarc-André Lureau### Variables
229b286e76SMarc-André Lureau```
239b286e76SMarc-André Lureau$(env.EnvironmentVariable)
249b286e76SMarc-André Lureau
259b286e76SMarc-André Lureau$(sys.SystemVariable)
269b286e76SMarc-André Lureau
279b286e76SMarc-André Lureau$(var.CustomVariable)
289b286e76SMarc-André Lureau```
299b286e76SMarc-André Lureau
309b286e76SMarc-André Lureau### Conditional Statements
319b286e76SMarc-André Lureau```
329b286e76SMarc-André Lureau<?if ?>
339b286e76SMarc-André Lureau
349b286e76SMarc-André Lureau<?ifdef ?>
359b286e76SMarc-André Lureau
369b286e76SMarc-André Lureau<?ifndef ?>
379b286e76SMarc-André Lureau
389b286e76SMarc-André Lureau<?else?>
399b286e76SMarc-André Lureau
409b286e76SMarc-André Lureau<?elseif ?>
419b286e76SMarc-André Lureau
429b286e76SMarc-André Lureau<?endif?>
439b286e76SMarc-André Lureau```
449b286e76SMarc-André Lureau
459b286e76SMarc-André Lureau### Iteration Statements
469b286e76SMarc-André Lureau```
479b286e76SMarc-André Lureau<?foreach VARNAME in 1;2;3?>
489b286e76SMarc-André Lureau    $(var.VARNAME)
499b286e76SMarc-André Lureau<?endforeach?>
509b286e76SMarc-André Lureau```
519b286e76SMarc-André Lureau
529b286e76SMarc-André Lureau### Errors and Warnings
539b286e76SMarc-André Lureau```
549b286e76SMarc-André Lureau<?error "This is error message!" ?>
559b286e76SMarc-André Lureau
569b286e76SMarc-André Lureau<?warning "This is warning message!" ?>
579b286e76SMarc-André Lureau```
589b286e76SMarc-André Lureau
599b286e76SMarc-André Lureau### Commands
609b286e76SMarc-André Lureau```
619b286e76SMarc-André Lureau<? cmd "echo hello world" ?>
629b286e76SMarc-André Lureau```
639b286e76SMarc-André Lureau"""
649b286e76SMarc-André Lureau
659b286e76SMarc-André Lureauimport os
669b286e76SMarc-André Lureauimport platform
679b286e76SMarc-André Lureauimport re
689b286e76SMarc-André Lureauimport subprocess
699b286e76SMarc-André Lureauimport sys
709b286e76SMarc-André Lureaufrom typing import Optional
719b286e76SMarc-André Lureaufrom xml.dom import minidom
729b286e76SMarc-André Lureau
739b286e76SMarc-André Lureau
749b286e76SMarc-André Lureauclass Preprocessor():
759b286e76SMarc-André Lureau    """This class holds the XML preprocessing state"""
769b286e76SMarc-André Lureau
779b286e76SMarc-André Lureau    def __init__(self):
789b286e76SMarc-André Lureau        self.sys_vars = {
799b286e76SMarc-André Lureau            "ARCH": platform.architecture()[0],
809b286e76SMarc-André Lureau            "SOURCE": os.path.abspath(__file__),
819b286e76SMarc-André Lureau            "CURRENT": os.getcwd(),
829b286e76SMarc-André Lureau        }
839b286e76SMarc-André Lureau        self.cus_vars = {}
849b286e76SMarc-André Lureau
859b286e76SMarc-André Lureau    def _pp_include(self, xml_str: str) -> str:
869b286e76SMarc-André Lureau        include_regex = r"(<\?include([\w\s\\/.:_-]+)\s*\?>)"
879b286e76SMarc-André Lureau        matches = re.findall(include_regex, xml_str)
889b286e76SMarc-André Lureau        for group_inc, group_xml in matches:
899b286e76SMarc-André Lureau            inc_file_path = group_xml.strip()
909b286e76SMarc-André Lureau            with open(inc_file_path, "r", encoding="utf-8") as inc_file:
919b286e76SMarc-André Lureau                inc_file_content = inc_file.read()
929b286e76SMarc-André Lureau                xml_str = xml_str.replace(group_inc, inc_file_content)
939b286e76SMarc-André Lureau        return xml_str
949b286e76SMarc-André Lureau
959b286e76SMarc-André Lureau    def _pp_env_var(self, xml_str: str) -> str:
969b286e76SMarc-André Lureau        envvar_regex = r"(\$\(env\.(\w+)\))"
979b286e76SMarc-André Lureau        matches = re.findall(envvar_regex, xml_str)
989b286e76SMarc-André Lureau        for group_env, group_var in matches:
999b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_env, os.environ[group_var])
1009b286e76SMarc-André Lureau        return xml_str
1019b286e76SMarc-André Lureau
1029b286e76SMarc-André Lureau    def _pp_sys_var(self, xml_str: str) -> str:
1039b286e76SMarc-André Lureau        sysvar_regex = r"(\$\(sys\.(\w+)\))"
1049b286e76SMarc-André Lureau        matches = re.findall(sysvar_regex, xml_str)
1059b286e76SMarc-André Lureau        for group_sys, group_var in matches:
1069b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_sys, self.sys_vars[group_var])
1079b286e76SMarc-André Lureau        return xml_str
1089b286e76SMarc-André Lureau
1099b286e76SMarc-André Lureau    def _pp_cus_var(self, xml_str: str) -> str:
1109b286e76SMarc-André Lureau        define_regex = r"(<\?define\s*(\w+)\s*=\s*([\w\s\"]+)\s*\?>)"
1119b286e76SMarc-André Lureau        matches = re.findall(define_regex, xml_str)
1129b286e76SMarc-André Lureau        for group_def, group_name, group_var in matches:
1139b286e76SMarc-André Lureau            group_name = group_name.strip()
1149b286e76SMarc-André Lureau            group_var = group_var.strip().strip("\"")
1159b286e76SMarc-André Lureau            self.cus_vars[group_name] = group_var
1169b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_def, "")
1179b286e76SMarc-André Lureau        cusvar_regex = r"(\$\(var\.(\w+)\))"
1189b286e76SMarc-André Lureau        matches = re.findall(cusvar_regex, xml_str)
1199b286e76SMarc-André Lureau        for group_cus, group_var in matches:
1209b286e76SMarc-André Lureau            xml_str = xml_str.replace(
1219b286e76SMarc-André Lureau                group_cus,
1229b286e76SMarc-André Lureau                self.cus_vars.get(group_var, "")
1239b286e76SMarc-André Lureau            )
1249b286e76SMarc-André Lureau        return xml_str
1259b286e76SMarc-André Lureau
1269b286e76SMarc-André Lureau    def _pp_foreach(self, xml_str: str) -> str:
1279b286e76SMarc-André Lureau        foreach_regex = r"(<\?foreach\s+(\w+)\s+in\s+([\w;]+)\s*\?>(.*)<\?endforeach\?>)"
1289b286e76SMarc-André Lureau        matches = re.findall(foreach_regex, xml_str)
1299b286e76SMarc-André Lureau        for group_for, group_name, group_vars, group_text in matches:
1309b286e76SMarc-André Lureau            group_texts = ""
1319b286e76SMarc-André Lureau            for var in group_vars.split(";"):
1329b286e76SMarc-André Lureau                self.cus_vars[group_name] = var
1339b286e76SMarc-André Lureau                group_texts += self._pp_cus_var(group_text)
1349b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_for, group_texts)
1359b286e76SMarc-André Lureau        return xml_str
1369b286e76SMarc-André Lureau
1379b286e76SMarc-André Lureau    def _pp_error_warning(self, xml_str: str) -> str:
1389b286e76SMarc-André Lureau        error_regex = r"<\?error\s*\"([^\"]+)\"\s*\?>"
1399b286e76SMarc-André Lureau        matches = re.findall(error_regex, xml_str)
1409b286e76SMarc-André Lureau        for group_var in matches:
1419b286e76SMarc-André Lureau            raise RuntimeError("[Error]: " + group_var)
1429b286e76SMarc-André Lureau        warning_regex = r"(<\?warning\s*\"([^\"]+)\"\s*\?>)"
1439b286e76SMarc-André Lureau        matches = re.findall(warning_regex, xml_str)
1449b286e76SMarc-André Lureau        for group_wrn, group_var in matches:
1459b286e76SMarc-André Lureau            print("[Warning]: " + group_var)
1469b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_wrn, "")
1479b286e76SMarc-André Lureau        return xml_str
1489b286e76SMarc-André Lureau
1499b286e76SMarc-André Lureau    def _pp_if_eval(self, xml_str: str) -> str:
1509b286e76SMarc-André Lureau        ifelif_regex = (
1519b286e76SMarc-André Lureau            r"(<\?(if|elseif)\s*([^\"\s=<>!]+)\s*([!=<>]+)\s*\"*([^\"=<>!]+)\"*\s*\?>)"
1529b286e76SMarc-André Lureau        )
1539b286e76SMarc-André Lureau        matches = re.findall(ifelif_regex, xml_str)
1549b286e76SMarc-André Lureau        for ifelif, tag, left, operator, right in matches:
1559b286e76SMarc-André Lureau            if "<" in operator or ">" in operator:
1569b286e76SMarc-André Lureau                result = eval(f"{left} {operator} {right}")
1579b286e76SMarc-André Lureau            else:
1589b286e76SMarc-André Lureau                result = eval(f'"{left}" {operator} "{right}"')
1599b286e76SMarc-André Lureau            xml_str = xml_str.replace(ifelif, f"<?{tag} {result}?>")
1609b286e76SMarc-André Lureau        return xml_str
1619b286e76SMarc-André Lureau
1629b286e76SMarc-André Lureau    def _pp_ifdef_ifndef(self, xml_str: str) -> str:
1639b286e76SMarc-André Lureau        ifndef_regex = r"(<\?(ifdef|ifndef)\s*([\w]+)\s*\?>)"
1649b286e76SMarc-André Lureau        matches = re.findall(ifndef_regex, xml_str)
1659b286e76SMarc-André Lureau        for group_ifndef, group_tag, group_var in matches:
1669b286e76SMarc-André Lureau            if group_tag == "ifdef":
1679b286e76SMarc-André Lureau                result = group_var in self.cus_vars
1689b286e76SMarc-André Lureau            else:
1699b286e76SMarc-André Lureau                result = group_var not in self.cus_vars
1709b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_ifndef, f"<?if {result}?>")
1719b286e76SMarc-André Lureau        return xml_str
1729b286e76SMarc-André Lureau
1739b286e76SMarc-André Lureau    def _pp_if_elseif(self, xml_str: str) -> str:
1749b286e76SMarc-André Lureau        if_elif_else_regex = (
1759b286e76SMarc-André Lureau            r"(<\?if\s(True|False)\?>"
1769b286e76SMarc-André Lureau            r"(.*?)"
1779b286e76SMarc-André Lureau            r"<\?elseif\s(True|False)\?>"
1789b286e76SMarc-André Lureau            r"(.*?)"
1799b286e76SMarc-André Lureau            r"<\?else\?>"
1809b286e76SMarc-André Lureau            r"(.*?)"
1819b286e76SMarc-André Lureau            r"<\?endif\?>)"
1829b286e76SMarc-André Lureau        )
1839b286e76SMarc-André Lureau        if_else_regex = (
1849b286e76SMarc-André Lureau            r"(<\?if\s(True|False)\?>"
1859b286e76SMarc-André Lureau            r"(.*?)"
1869b286e76SMarc-André Lureau            r"<\?else\?>"
1879b286e76SMarc-André Lureau            r"(.*?)"
1889b286e76SMarc-André Lureau            r"<\?endif\?>)"
1899b286e76SMarc-André Lureau        )
1909b286e76SMarc-André Lureau        if_regex = r"(<\?if\s(True|False)\?>(.*?)<\?endif\?>)"
1919b286e76SMarc-André Lureau        matches = re.findall(if_elif_else_regex, xml_str, re.DOTALL)
1929b286e76SMarc-André Lureau        for (group_full, group_if, group_if_elif, group_elif,
1939b286e76SMarc-André Lureau             group_elif_else, group_else) in matches:
1949b286e76SMarc-André Lureau            result = ""
1959b286e76SMarc-André Lureau            if group_if == "True":
1969b286e76SMarc-André Lureau                result = group_if_elif
1979b286e76SMarc-André Lureau            elif group_elif == "True":
1989b286e76SMarc-André Lureau                result = group_elif_else
1999b286e76SMarc-André Lureau            else:
2009b286e76SMarc-André Lureau                result = group_else
2019b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_full, result)
2029b286e76SMarc-André Lureau        matches = re.findall(if_else_regex, xml_str, re.DOTALL)
2039b286e76SMarc-André Lureau        for group_full, group_if, group_if_else, group_else in matches:
2049b286e76SMarc-André Lureau            result = ""
2059b286e76SMarc-André Lureau            if group_if == "True":
2069b286e76SMarc-André Lureau                result = group_if_else
2079b286e76SMarc-André Lureau            else:
2089b286e76SMarc-André Lureau                result = group_else
2099b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_full, result)
2109b286e76SMarc-André Lureau        matches = re.findall(if_regex, xml_str, re.DOTALL)
2119b286e76SMarc-André Lureau        for group_full, group_if, group_text in matches:
2129b286e76SMarc-André Lureau            result = ""
2139b286e76SMarc-André Lureau            if group_if == "True":
2149b286e76SMarc-André Lureau                result = group_text
2159b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_full, result)
2169b286e76SMarc-André Lureau        return xml_str
2179b286e76SMarc-André Lureau
2189b286e76SMarc-André Lureau    def _pp_command(self, xml_str: str) -> str:
2199b286e76SMarc-André Lureau        cmd_regex = r"(<\?cmd\s*\"([^\"]+)\"\s*\?>)"
2209b286e76SMarc-André Lureau        matches = re.findall(cmd_regex, xml_str)
2219b286e76SMarc-André Lureau        for group_cmd, group_exec in matches:
2229b286e76SMarc-André Lureau            output = subprocess.check_output(
2239b286e76SMarc-André Lureau                group_exec, shell=True,
2249b286e76SMarc-André Lureau                text=True, stderr=subprocess.STDOUT
2259b286e76SMarc-André Lureau            )
2269b286e76SMarc-André Lureau            xml_str = xml_str.replace(group_cmd, output)
2279b286e76SMarc-André Lureau        return xml_str
2289b286e76SMarc-André Lureau
2299b286e76SMarc-André Lureau    def _pp_blanks(self, xml_str: str) -> str:
2309b286e76SMarc-André Lureau        right_blank_regex = r">[\n\s\t\r]*"
2319b286e76SMarc-André Lureau        left_blank_regex = r"[\n\s\t\r]*<"
2329b286e76SMarc-André Lureau        xml_str = re.sub(right_blank_regex, ">", xml_str)
2339b286e76SMarc-André Lureau        xml_str = re.sub(left_blank_regex, "<", xml_str)
2349b286e76SMarc-André Lureau        return xml_str
2359b286e76SMarc-André Lureau
2369b286e76SMarc-André Lureau    def preprocess(self, xml_str: str) -> str:
2379b286e76SMarc-André Lureau        fns = [
2389b286e76SMarc-André Lureau            self._pp_blanks,
2399b286e76SMarc-André Lureau            self._pp_include,
2409b286e76SMarc-André Lureau            self._pp_foreach,
2419b286e76SMarc-André Lureau            self._pp_env_var,
2429b286e76SMarc-André Lureau            self._pp_sys_var,
2439b286e76SMarc-André Lureau            self._pp_cus_var,
2449b286e76SMarc-André Lureau            self._pp_if_eval,
2459b286e76SMarc-André Lureau            self._pp_ifdef_ifndef,
2469b286e76SMarc-André Lureau            self._pp_if_elseif,
2479b286e76SMarc-André Lureau            self._pp_command,
2489b286e76SMarc-André Lureau            self._pp_error_warning,
2499b286e76SMarc-André Lureau        ]
2509b286e76SMarc-André Lureau
2519b286e76SMarc-André Lureau        while True:
2529b286e76SMarc-André Lureau            changed = False
2539b286e76SMarc-André Lureau            for func in fns:
2549b286e76SMarc-André Lureau                out_xml = func(xml_str)
2559b286e76SMarc-André Lureau                if not changed and out_xml != xml_str:
2569b286e76SMarc-André Lureau                    changed = True
2579b286e76SMarc-André Lureau                xml_str = out_xml
2589b286e76SMarc-André Lureau            if not changed:
2599b286e76SMarc-André Lureau                break
2609b286e76SMarc-André Lureau
2619b286e76SMarc-André Lureau        return xml_str
2629b286e76SMarc-André Lureau
2639b286e76SMarc-André Lureau
2649b286e76SMarc-André Lureaudef preprocess_xml(path: str) -> str:
2659b286e76SMarc-André Lureau    with open(path, "r", encoding="utf-8") as original_file:
2669b286e76SMarc-André Lureau        input_xml = original_file.read()
2679b286e76SMarc-André Lureau
2689b286e76SMarc-André Lureau        proc = Preprocessor()
2699b286e76SMarc-André Lureau        return proc.preprocess(input_xml)
2709b286e76SMarc-André Lureau
2719b286e76SMarc-André Lureau
2729b286e76SMarc-André Lureaudef save_xml(xml_str: str, path: Optional[str]):
2739b286e76SMarc-André Lureau    xml = minidom.parseString(xml_str)
2749b286e76SMarc-André Lureau    with open(path, "w", encoding="utf-8") if path else sys.stdout as output_file:
2759b286e76SMarc-André Lureau        output_file.write(xml.toprettyxml())
2769b286e76SMarc-André Lureau
2779b286e76SMarc-André Lureau
2789b286e76SMarc-André Lureaudef main():
2799b286e76SMarc-André Lureau    if len(sys.argv) < 2:
2809b286e76SMarc-André Lureau        print("Usage: xml-preprocessor input.xml [output.xml]")
2819b286e76SMarc-André Lureau        sys.exit(1)
2829b286e76SMarc-André Lureau
2839b286e76SMarc-André Lureau    output_file = None
2849b286e76SMarc-André Lureau    if len(sys.argv) == 3:
2859b286e76SMarc-André Lureau        output_file = sys.argv[2]
2869b286e76SMarc-André Lureau
2879b286e76SMarc-André Lureau    input_file = sys.argv[1]
2889b286e76SMarc-André Lureau    output_xml = preprocess_xml(input_file)
2899b286e76SMarc-André Lureau    save_xml(output_xml, output_file)
2909b286e76SMarc-André Lureau
2919b286e76SMarc-André Lureau
2929b286e76SMarc-André Lureauif __name__ == "__main__":
2939b286e76SMarc-André Lureau    main()
294