19b286e76SMarc-André Lureau#!/usr/bin/env python3 29b286e76SMarc-André Lureau# 39b286e76SMarc-André Lureau# Copyright (c) 2017-2019 Tony Su 49b286e76SMarc-André Lureau# Copyright (c) 2023 Red Hat, Inc. 59b286e76SMarc-André Lureau# 69b286e76SMarc-André Lureau# SPDX-License-Identifier: MIT 79b286e76SMarc-André Lureau# 89b286e76SMarc-André Lureau# Adapted from https://github.com/peitaosu/XML-Preprocessor 99b286e76SMarc-André Lureau# 109b286e76SMarc-André Lureau"""This is a XML Preprocessor which can be used to process your XML file before 119b286e76SMarc-André Lureauyou use it, to process conditional statements, variables, iteration 129b286e76SMarc-André Lureaustatements, error/warning, execute command, etc. 139b286e76SMarc-André Lureau 149b286e76SMarc-André Lureau## XML Schema 159b286e76SMarc-André Lureau 169b286e76SMarc-André Lureau### Include Files 179b286e76SMarc-André Lureau``` 189b286e76SMarc-André Lureau<?include path/to/file ?> 199b286e76SMarc-André Lureau``` 209b286e76SMarc-André Lureau 219b286e76SMarc-André Lureau### Variables 229b286e76SMarc-André Lureau``` 239b286e76SMarc-André Lureau$(env.EnvironmentVariable) 249b286e76SMarc-André Lureau 259b286e76SMarc-André Lureau$(sys.SystemVariable) 269b286e76SMarc-André Lureau 279b286e76SMarc-André Lureau$(var.CustomVariable) 289b286e76SMarc-André Lureau``` 299b286e76SMarc-André Lureau 309b286e76SMarc-André Lureau### Conditional Statements 319b286e76SMarc-André Lureau``` 329b286e76SMarc-André Lureau<?if ?> 339b286e76SMarc-André Lureau 349b286e76SMarc-André Lureau<?ifdef ?> 359b286e76SMarc-André Lureau 369b286e76SMarc-André Lureau<?ifndef ?> 379b286e76SMarc-André Lureau 389b286e76SMarc-André Lureau<?else?> 399b286e76SMarc-André Lureau 409b286e76SMarc-André Lureau<?elseif ?> 419b286e76SMarc-André Lureau 429b286e76SMarc-André Lureau<?endif?> 439b286e76SMarc-André Lureau``` 449b286e76SMarc-André Lureau 459b286e76SMarc-André Lureau### Iteration Statements 469b286e76SMarc-André Lureau``` 479b286e76SMarc-André Lureau<?foreach VARNAME in 1;2;3?> 489b286e76SMarc-André Lureau $(var.VARNAME) 499b286e76SMarc-André Lureau<?endforeach?> 509b286e76SMarc-André Lureau``` 519b286e76SMarc-André Lureau 529b286e76SMarc-André Lureau### Errors and Warnings 539b286e76SMarc-André Lureau``` 549b286e76SMarc-André Lureau<?error "This is error message!" ?> 559b286e76SMarc-André Lureau 569b286e76SMarc-André Lureau<?warning "This is warning message!" ?> 579b286e76SMarc-André Lureau``` 589b286e76SMarc-André Lureau 599b286e76SMarc-André Lureau### Commands 609b286e76SMarc-André Lureau``` 619b286e76SMarc-André Lureau<? cmd "echo hello world" ?> 629b286e76SMarc-André Lureau``` 639b286e76SMarc-André Lureau""" 649b286e76SMarc-André Lureau 659b286e76SMarc-André Lureauimport os 669b286e76SMarc-André Lureauimport platform 679b286e76SMarc-André Lureauimport re 689b286e76SMarc-André Lureauimport subprocess 699b286e76SMarc-André Lureauimport sys 709b286e76SMarc-André Lureaufrom typing import Optional 719b286e76SMarc-André Lureaufrom xml.dom import minidom 729b286e76SMarc-André Lureau 739b286e76SMarc-André Lureau 749b286e76SMarc-André Lureauclass Preprocessor(): 759b286e76SMarc-André Lureau """This class holds the XML preprocessing state""" 769b286e76SMarc-André Lureau 779b286e76SMarc-André Lureau def __init__(self): 789b286e76SMarc-André Lureau self.sys_vars = { 799b286e76SMarc-André Lureau "ARCH": platform.architecture()[0], 809b286e76SMarc-André Lureau "SOURCE": os.path.abspath(__file__), 819b286e76SMarc-André Lureau "CURRENT": os.getcwd(), 829b286e76SMarc-André Lureau } 839b286e76SMarc-André Lureau self.cus_vars = {} 849b286e76SMarc-André Lureau 859b286e76SMarc-André Lureau def _pp_include(self, xml_str: str) -> str: 869b286e76SMarc-André Lureau include_regex = r"(<\?include([\w\s\\/.:_-]+)\s*\?>)" 879b286e76SMarc-André Lureau matches = re.findall(include_regex, xml_str) 889b286e76SMarc-André Lureau for group_inc, group_xml in matches: 899b286e76SMarc-André Lureau inc_file_path = group_xml.strip() 909b286e76SMarc-André Lureau with open(inc_file_path, "r", encoding="utf-8") as inc_file: 919b286e76SMarc-André Lureau inc_file_content = inc_file.read() 929b286e76SMarc-André Lureau xml_str = xml_str.replace(group_inc, inc_file_content) 939b286e76SMarc-André Lureau return xml_str 949b286e76SMarc-André Lureau 959b286e76SMarc-André Lureau def _pp_env_var(self, xml_str: str) -> str: 969b286e76SMarc-André Lureau envvar_regex = r"(\$\(env\.(\w+)\))" 979b286e76SMarc-André Lureau matches = re.findall(envvar_regex, xml_str) 989b286e76SMarc-André Lureau for group_env, group_var in matches: 999b286e76SMarc-André Lureau xml_str = xml_str.replace(group_env, os.environ[group_var]) 1009b286e76SMarc-André Lureau return xml_str 1019b286e76SMarc-André Lureau 1029b286e76SMarc-André Lureau def _pp_sys_var(self, xml_str: str) -> str: 1039b286e76SMarc-André Lureau sysvar_regex = r"(\$\(sys\.(\w+)\))" 1049b286e76SMarc-André Lureau matches = re.findall(sysvar_regex, xml_str) 1059b286e76SMarc-André Lureau for group_sys, group_var in matches: 1069b286e76SMarc-André Lureau xml_str = xml_str.replace(group_sys, self.sys_vars[group_var]) 1079b286e76SMarc-André Lureau return xml_str 1089b286e76SMarc-André Lureau 1099b286e76SMarc-André Lureau def _pp_cus_var(self, xml_str: str) -> str: 1109b286e76SMarc-André Lureau define_regex = r"(<\?define\s*(\w+)\s*=\s*([\w\s\"]+)\s*\?>)" 1119b286e76SMarc-André Lureau matches = re.findall(define_regex, xml_str) 1129b286e76SMarc-André Lureau for group_def, group_name, group_var in matches: 1139b286e76SMarc-André Lureau group_name = group_name.strip() 1149b286e76SMarc-André Lureau group_var = group_var.strip().strip("\"") 1159b286e76SMarc-André Lureau self.cus_vars[group_name] = group_var 1169b286e76SMarc-André Lureau xml_str = xml_str.replace(group_def, "") 1179b286e76SMarc-André Lureau cusvar_regex = r"(\$\(var\.(\w+)\))" 1189b286e76SMarc-André Lureau matches = re.findall(cusvar_regex, xml_str) 1199b286e76SMarc-André Lureau for group_cus, group_var in matches: 1209b286e76SMarc-André Lureau xml_str = xml_str.replace( 1219b286e76SMarc-André Lureau group_cus, 1229b286e76SMarc-André Lureau self.cus_vars.get(group_var, "") 1239b286e76SMarc-André Lureau ) 1249b286e76SMarc-André Lureau return xml_str 1259b286e76SMarc-André Lureau 1269b286e76SMarc-André Lureau def _pp_foreach(self, xml_str: str) -> str: 1279b286e76SMarc-André Lureau foreach_regex = r"(<\?foreach\s+(\w+)\s+in\s+([\w;]+)\s*\?>(.*)<\?endforeach\?>)" 1289b286e76SMarc-André Lureau matches = re.findall(foreach_regex, xml_str) 1299b286e76SMarc-André Lureau for group_for, group_name, group_vars, group_text in matches: 1309b286e76SMarc-André Lureau group_texts = "" 1319b286e76SMarc-André Lureau for var in group_vars.split(";"): 1329b286e76SMarc-André Lureau self.cus_vars[group_name] = var 1339b286e76SMarc-André Lureau group_texts += self._pp_cus_var(group_text) 1349b286e76SMarc-André Lureau xml_str = xml_str.replace(group_for, group_texts) 1359b286e76SMarc-André Lureau return xml_str 1369b286e76SMarc-André Lureau 1379b286e76SMarc-André Lureau def _pp_error_warning(self, xml_str: str) -> str: 1389b286e76SMarc-André Lureau error_regex = r"<\?error\s*\"([^\"]+)\"\s*\?>" 1399b286e76SMarc-André Lureau matches = re.findall(error_regex, xml_str) 1409b286e76SMarc-André Lureau for group_var in matches: 1419b286e76SMarc-André Lureau raise RuntimeError("[Error]: " + group_var) 1429b286e76SMarc-André Lureau warning_regex = r"(<\?warning\s*\"([^\"]+)\"\s*\?>)" 1439b286e76SMarc-André Lureau matches = re.findall(warning_regex, xml_str) 1449b286e76SMarc-André Lureau for group_wrn, group_var in matches: 1459b286e76SMarc-André Lureau print("[Warning]: " + group_var) 1469b286e76SMarc-André Lureau xml_str = xml_str.replace(group_wrn, "") 1479b286e76SMarc-André Lureau return xml_str 1489b286e76SMarc-André Lureau 1499b286e76SMarc-André Lureau def _pp_if_eval(self, xml_str: str) -> str: 1509b286e76SMarc-André Lureau ifelif_regex = ( 1519b286e76SMarc-André Lureau r"(<\?(if|elseif)\s*([^\"\s=<>!]+)\s*([!=<>]+)\s*\"*([^\"=<>!]+)\"*\s*\?>)" 1529b286e76SMarc-André Lureau ) 1539b286e76SMarc-André Lureau matches = re.findall(ifelif_regex, xml_str) 1549b286e76SMarc-André Lureau for ifelif, tag, left, operator, right in matches: 1559b286e76SMarc-André Lureau if "<" in operator or ">" in operator: 1569b286e76SMarc-André Lureau result = eval(f"{left} {operator} {right}") 1579b286e76SMarc-André Lureau else: 1589b286e76SMarc-André Lureau result = eval(f'"{left}" {operator} "{right}"') 1599b286e76SMarc-André Lureau xml_str = xml_str.replace(ifelif, f"<?{tag} {result}?>") 1609b286e76SMarc-André Lureau return xml_str 1619b286e76SMarc-André Lureau 1629b286e76SMarc-André Lureau def _pp_ifdef_ifndef(self, xml_str: str) -> str: 1639b286e76SMarc-André Lureau ifndef_regex = r"(<\?(ifdef|ifndef)\s*([\w]+)\s*\?>)" 1649b286e76SMarc-André Lureau matches = re.findall(ifndef_regex, xml_str) 1659b286e76SMarc-André Lureau for group_ifndef, group_tag, group_var in matches: 1669b286e76SMarc-André Lureau if group_tag == "ifdef": 1679b286e76SMarc-André Lureau result = group_var in self.cus_vars 1689b286e76SMarc-André Lureau else: 1699b286e76SMarc-André Lureau result = group_var not in self.cus_vars 1709b286e76SMarc-André Lureau xml_str = xml_str.replace(group_ifndef, f"<?if {result}?>") 1719b286e76SMarc-André Lureau return xml_str 1729b286e76SMarc-André Lureau 1739b286e76SMarc-André Lureau def _pp_if_elseif(self, xml_str: str) -> str: 1749b286e76SMarc-André Lureau if_elif_else_regex = ( 1759b286e76SMarc-André Lureau r"(<\?if\s(True|False)\?>" 1769b286e76SMarc-André Lureau r"(.*?)" 1779b286e76SMarc-André Lureau r"<\?elseif\s(True|False)\?>" 1789b286e76SMarc-André Lureau r"(.*?)" 1799b286e76SMarc-André Lureau r"<\?else\?>" 1809b286e76SMarc-André Lureau r"(.*?)" 1819b286e76SMarc-André Lureau r"<\?endif\?>)" 1829b286e76SMarc-André Lureau ) 1839b286e76SMarc-André Lureau if_else_regex = ( 1849b286e76SMarc-André Lureau r"(<\?if\s(True|False)\?>" 1859b286e76SMarc-André Lureau r"(.*?)" 1869b286e76SMarc-André Lureau r"<\?else\?>" 1879b286e76SMarc-André Lureau r"(.*?)" 1889b286e76SMarc-André Lureau r"<\?endif\?>)" 1899b286e76SMarc-André Lureau ) 1909b286e76SMarc-André Lureau if_regex = r"(<\?if\s(True|False)\?>(.*?)<\?endif\?>)" 1919b286e76SMarc-André Lureau matches = re.findall(if_elif_else_regex, xml_str, re.DOTALL) 1929b286e76SMarc-André Lureau for (group_full, group_if, group_if_elif, group_elif, 1939b286e76SMarc-André Lureau group_elif_else, group_else) in matches: 1949b286e76SMarc-André Lureau result = "" 1959b286e76SMarc-André Lureau if group_if == "True": 1969b286e76SMarc-André Lureau result = group_if_elif 1979b286e76SMarc-André Lureau elif group_elif == "True": 1989b286e76SMarc-André Lureau result = group_elif_else 1999b286e76SMarc-André Lureau else: 2009b286e76SMarc-André Lureau result = group_else 2019b286e76SMarc-André Lureau xml_str = xml_str.replace(group_full, result) 2029b286e76SMarc-André Lureau matches = re.findall(if_else_regex, xml_str, re.DOTALL) 2039b286e76SMarc-André Lureau for group_full, group_if, group_if_else, group_else in matches: 2049b286e76SMarc-André Lureau result = "" 2059b286e76SMarc-André Lureau if group_if == "True": 2069b286e76SMarc-André Lureau result = group_if_else 2079b286e76SMarc-André Lureau else: 2089b286e76SMarc-André Lureau result = group_else 2099b286e76SMarc-André Lureau xml_str = xml_str.replace(group_full, result) 2109b286e76SMarc-André Lureau matches = re.findall(if_regex, xml_str, re.DOTALL) 2119b286e76SMarc-André Lureau for group_full, group_if, group_text in matches: 2129b286e76SMarc-André Lureau result = "" 2139b286e76SMarc-André Lureau if group_if == "True": 2149b286e76SMarc-André Lureau result = group_text 2159b286e76SMarc-André Lureau xml_str = xml_str.replace(group_full, result) 2169b286e76SMarc-André Lureau return xml_str 2179b286e76SMarc-André Lureau 2189b286e76SMarc-André Lureau def _pp_command(self, xml_str: str) -> str: 2199b286e76SMarc-André Lureau cmd_regex = r"(<\?cmd\s*\"([^\"]+)\"\s*\?>)" 2209b286e76SMarc-André Lureau matches = re.findall(cmd_regex, xml_str) 2219b286e76SMarc-André Lureau for group_cmd, group_exec in matches: 2229b286e76SMarc-André Lureau output = subprocess.check_output( 2239b286e76SMarc-André Lureau group_exec, shell=True, 2249b286e76SMarc-André Lureau text=True, stderr=subprocess.STDOUT 2259b286e76SMarc-André Lureau ) 2269b286e76SMarc-André Lureau xml_str = xml_str.replace(group_cmd, output) 2279b286e76SMarc-André Lureau return xml_str 2289b286e76SMarc-André Lureau 2299b286e76SMarc-André Lureau def _pp_blanks(self, xml_str: str) -> str: 2309b286e76SMarc-André Lureau right_blank_regex = r">[\n\s\t\r]*" 2319b286e76SMarc-André Lureau left_blank_regex = r"[\n\s\t\r]*<" 2329b286e76SMarc-André Lureau xml_str = re.sub(right_blank_regex, ">", xml_str) 2339b286e76SMarc-André Lureau xml_str = re.sub(left_blank_regex, "<", xml_str) 2349b286e76SMarc-André Lureau return xml_str 2359b286e76SMarc-André Lureau 2369b286e76SMarc-André Lureau def preprocess(self, xml_str: str) -> str: 2379b286e76SMarc-André Lureau fns = [ 2389b286e76SMarc-André Lureau self._pp_blanks, 2399b286e76SMarc-André Lureau self._pp_include, 2409b286e76SMarc-André Lureau self._pp_foreach, 2419b286e76SMarc-André Lureau self._pp_env_var, 2429b286e76SMarc-André Lureau self._pp_sys_var, 2439b286e76SMarc-André Lureau self._pp_cus_var, 2449b286e76SMarc-André Lureau self._pp_if_eval, 2459b286e76SMarc-André Lureau self._pp_ifdef_ifndef, 2469b286e76SMarc-André Lureau self._pp_if_elseif, 2479b286e76SMarc-André Lureau self._pp_command, 2489b286e76SMarc-André Lureau self._pp_error_warning, 2499b286e76SMarc-André Lureau ] 2509b286e76SMarc-André Lureau 2519b286e76SMarc-André Lureau while True: 2529b286e76SMarc-André Lureau changed = False 2539b286e76SMarc-André Lureau for func in fns: 2549b286e76SMarc-André Lureau out_xml = func(xml_str) 2559b286e76SMarc-André Lureau if not changed and out_xml != xml_str: 2569b286e76SMarc-André Lureau changed = True 2579b286e76SMarc-André Lureau xml_str = out_xml 2589b286e76SMarc-André Lureau if not changed: 2599b286e76SMarc-André Lureau break 2609b286e76SMarc-André Lureau 2619b286e76SMarc-André Lureau return xml_str 2629b286e76SMarc-André Lureau 2639b286e76SMarc-André Lureau 2649b286e76SMarc-André Lureaudef preprocess_xml(path: str) -> str: 2659b286e76SMarc-André Lureau with open(path, "r", encoding="utf-8") as original_file: 2669b286e76SMarc-André Lureau input_xml = original_file.read() 2679b286e76SMarc-André Lureau 2689b286e76SMarc-André Lureau proc = Preprocessor() 2699b286e76SMarc-André Lureau return proc.preprocess(input_xml) 2709b286e76SMarc-André Lureau 2719b286e76SMarc-André Lureau 2729b286e76SMarc-André Lureaudef save_xml(xml_str: str, path: Optional[str]): 2739b286e76SMarc-André Lureau xml = minidom.parseString(xml_str) 2749b286e76SMarc-André Lureau with open(path, "w", encoding="utf-8") if path else sys.stdout as output_file: 2759b286e76SMarc-André Lureau output_file.write(xml.toprettyxml()) 2769b286e76SMarc-André Lureau 2779b286e76SMarc-André Lureau 2789b286e76SMarc-André Lureaudef main(): 2799b286e76SMarc-André Lureau if len(sys.argv) < 2: 2809b286e76SMarc-André Lureau print("Usage: xml-preprocessor input.xml [output.xml]") 2819b286e76SMarc-André Lureau sys.exit(1) 2829b286e76SMarc-André Lureau 2839b286e76SMarc-André Lureau output_file = None 2849b286e76SMarc-André Lureau if len(sys.argv) == 3: 2859b286e76SMarc-André Lureau output_file = sys.argv[2] 2869b286e76SMarc-André Lureau 2879b286e76SMarc-André Lureau input_file = sys.argv[1] 2889b286e76SMarc-André Lureau output_xml = preprocess_xml(input_file) 2899b286e76SMarc-André Lureau save_xml(output_xml, output_file) 2909b286e76SMarc-André Lureau 2919b286e76SMarc-André Lureau 2929b286e76SMarc-André Lureauif __name__ == "__main__": 2939b286e76SMarc-André Lureau main() 294