1#! /usr/bin/env python3
2#                          __  __            _
3#                       ___\ \/ /_ __   __ _| |_
4#                      / _ \\  /| '_ \ / _` | __|
5#                     |  __//  \| |_) | (_| | |_
6#                      \___/_/\_\ .__/ \__,_|\__|
7#                               |_| XML parser
8#
9# Copyright (c) 2019-2021 Sebastian Pipping <sebastian@pipping.org>
10# Copyright (c) 2021      Tim Bray <tbray@textuality.com>
11# Licensed under the MIT license:
12#
13# Permission is  hereby granted,  free of charge,  to any  person obtaining
14# a  copy  of  this  software   and  associated  documentation  files  (the
15# "Software"),  to  deal in  the  Software  without restriction,  including
16# without  limitation the  rights  to use,  copy,  modify, merge,  publish,
17# distribute, sublicense, and/or sell copies of the Software, and to permit
18# persons  to whom  the Software  is  furnished to  do so,  subject to  the
19# following conditions:
20#
21# The above copyright  notice and this permission notice  shall be included
22# in all copies or substantial portions of the Software.
23#
24# THE  SOFTWARE  IS  PROVIDED  "AS  IS",  WITHOUT  WARRANTY  OF  ANY  KIND,
25# EXPRESS  OR IMPLIED,  INCLUDING  BUT  NOT LIMITED  TO  THE WARRANTIES  OF
26# MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN
27# NO EVENT SHALL THE AUTHORS OR  COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM,
28# DAMAGES OR  OTHER LIABILITY, WHETHER  IN AN  ACTION OF CONTRACT,  TORT OR
29# OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
30# USE OR OTHER DEALINGS IN THE SOFTWARE.
31
32import argparse
33
34epilog = """
35exit status:
36  0             the input files are well-formed and the output (if requested) was written successfully
37  1             could not allocate data structures, signals a serious problem with execution environment
38  2             one or more input files were not well-formed
39  3             could not create an output file
40  4             command-line argument error
41
42xmlwf of libexpat is software libre, licensed under the MIT license.
43Please report bugs at https://github.com/libexpat/libexpat/issues.  Thank you!
44"""
45
46usage = """
47  %(prog)s [OPTIONS] [FILE ...]
48  %(prog)s -h
49  %(prog)s -v
50"""
51
52parser = argparse.ArgumentParser(prog='xmlwf', add_help=False,
53                                 usage=usage,
54                                 description='xmlwf - Determines if an XML document is well-formed',
55                                 formatter_class=argparse.RawTextHelpFormatter,
56                                 epilog=epilog)
57
58input_related = parser.add_argument_group('input control arguments')
59input_related.add_argument('-s', action='store_true', help='print an error if the document is not [s]tandalone')
60input_related.add_argument('-n', action='store_true', help='enable [n]amespace processing')
61input_related.add_argument('-p', action='store_true', help='enable processing external DTDs and [p]arameter entities')
62input_related.add_argument('-x', action='store_true', help='enable processing of e[x]ternal entities')
63input_related.add_argument('-e', action='store', metavar='ENCODING', help='override any in-document [e]ncoding declaration')
64input_related.add_argument('-w', action='store_true', help='enable support for [W]indows code pages')
65input_related.add_argument('-r', action='store_true', help='disable memory-mapping and use normal file [r]ead IO calls instead')
66input_related.add_argument('-k', action='store_true', help='when processing multiple files, [k]eep processing after first file with error')
67
68output_related = parser.add_argument_group('output control arguments')
69output_related.add_argument('-d', action='store', metavar='DIRECTORY', help='output [d]estination directory')
70output_mode = output_related.add_mutually_exclusive_group()
71output_mode.add_argument('-c', action='store_true', help='write a [c]opy of input XML, not canonical XML')
72output_mode.add_argument('-m', action='store_true', help='write [m]eta XML, not canonical XML')
73output_mode.add_argument('-t', action='store_true', help='write no XML output for [t]iming of plain parsing')
74output_related.add_argument('-N', action='store_true', help='enable adding doctype and [n]otation declarations')
75
76billion_laughs = parser.add_argument_group('billion laughs attack protection',
77                                           description='NOTE: '
78                                                       'If you ever need to increase these values '
79                                                       'for non-attack payload, please file a bug report.')
80billion_laughs.add_argument('-a', metavar='FACTOR',
81                            help='set maximum tolerated [a]mplification factor (default: 100.0)')
82billion_laughs.add_argument('-b', metavar='BYTES', help='set number of output [b]ytes needed to activate (default: 8 MiB)')
83
84parser.add_argument('files', metavar='FILE', nargs='*', help='file to process (default: STDIN)')
85
86info = parser.add_argument_group('info arguments')
87info = info.add_mutually_exclusive_group()
88info.add_argument('-h', action='store_true', help='show this [h]elp message and exit')
89info.add_argument('-v', action='store_true', help='show program\'s [v]ersion number and exit')
90
91
92if __name__ == '__main__':
93    parser.print_help()
94