1"""Interface to Mediawiki's api.php."""
2#
3# (C) Pywikibot team, 2007-2021
4#
5# Distributed under the terms of the MIT license.
6#
7import datetime
8import hashlib
9import inspect
10import os
11import pickle
12import pprint
13import re
14import traceback
15from collections.abc import Container, MutableMapping, Sized
16from contextlib import suppress
17from email.generator import BytesGenerator
18from email.mime.multipart import MIMEMultipart as MIMEMultipartOrig
19from email.mime.nonmultipart import MIMENonMultipart
20from inspect import getfullargspec
21from io import BytesIO
22from typing import Optional, Union
23from urllib.parse import unquote, urlencode
24from warnings import warn
25
26import pywikibot
27from pywikibot import config, login
28from pywikibot.backports import Dict, Tuple, removeprefix
29from pywikibot.comms import http
30from pywikibot.exceptions import (
31    Error,
32    FatalServerError,
33    InvalidTitleError,
34    MaxlagTimeoutError,
35    NoUsernameError,
36    Server414Error,
37    Server504Error,
38    SiteDefinitionError,
39    TimeoutError,
40    UnsupportedPageError,
41)
42from pywikibot.family import SubdomainFamily
43from pywikibot.login import LoginStatus
44from pywikibot.textlib import removeHTMLParts
45from pywikibot.tools import PYTHON_VERSION, ModuleDeprecationWrapper, itergroup
46from pywikibot.tools.formatter import color_format
47
48
49_logger = 'data.api'
50
51lagpattern = re.compile(
52    r'Waiting for [\w.: ]+: (?P<lag>\d+(?:\.\d+)?) seconds? lagged')
53
54
55def _invalidate_superior_cookies(family):
56    """
57    Clear cookies for site's second level domain.
58
59    get_login_token() will generate new cookies needed.
60    This is a workaround for requests bug, see T224712
61    and https://github.com/psf/requests/issues/5411
62    for more details.
63    """
64    if isinstance(family, SubdomainFamily):
65        for cookie in http.cookie_jar:
66            if family.domain == cookie.domain:
67                http.cookie_jar.clear(cookie.domain, cookie.path, cookie.name)
68
69
70# Bug: T113120, T228841
71# Subclassing necessary to fix bug of the email package in Python 3:
72# see https://bugs.python.org/issue19003
73# see https://bugs.python.org/issue18886
74# The following solution might be removed if the bug is fixed for
75# Python versions which are supported by PWB, probably with Python 3.5
76
77class CTEBinaryBytesGenerator(BytesGenerator):
78
79    """Workaround for bug in python 3 email handling of CTE binary."""
80
81    def __init__(self, *args, **kwargs):
82        """Initializer."""
83        super().__init__(*args, **kwargs)
84        self._writeBody = self._write_body
85
86    def _write_body(self, msg):
87        if msg['content-transfer-encoding'] == 'binary':
88            self._fp.write(msg.get_payload(decode=True))
89        else:
90            super()._handle_text(msg)
91
92
93class CTEBinaryMIMEMultipart(MIMEMultipartOrig):
94
95    """Workaround for bug in python 3 email handling of CTE binary."""
96
97    def as_bytes(self, unixfrom=False, policy=None):
98        """Return unmodified binary payload."""
99        policy = self.policy if policy is None else policy
100        fp = BytesIO()
101        g = CTEBinaryBytesGenerator(fp, mangle_from_=False, policy=policy)
102        g.flatten(self, unixfrom=unixfrom)
103        return fp.getvalue()
104
105
106MIMEMultipart = CTEBinaryMIMEMultipart
107
108
109class ParamInfo(Sized, Container):
110
111    """
112    API parameter information data object.
113
114    Provides cache aware fetching of parameter information.
115
116    It does not support the format modules.
117    """
118
119    paraminfo_keys = frozenset(['modules', 'querymodules', 'formatmodules',
120                                'mainmodule', 'pagesetmodule'])
121
122    root_modules = frozenset(['main', 'pageset'])
123
124    init_modules = frozenset(['main', 'paraminfo'])
125
126    def __init__(self, site, preloaded_modules=None, modules_only_mode=None):
127        """
128        Initializer.
129
130        :param preloaded_modules: API modules to preload
131        :type preloaded_modules: set of string
132        :param modules_only_mode: use the 'modules' only syntax for API request
133        :type modules_only_mode: bool or None to only use default, which True
134            if the site is 1.25wmf4+
135        """
136        self.site = site
137
138        # Keys are module names, values are the raw responses from the server.
139        self._paraminfo = {}
140
141        # Cached data.
142        self._prefixes = {}
143        self._prefix_map = {}
144        self._with_limits = None
145
146        self._action_modules = frozenset()  # top level modules
147        self._modules = {}  # filled in _init() (and enlarged in fetch)
148        self._limit = None
149
150        self.preloaded_modules = self.init_modules
151        if preloaded_modules:
152            self.preloaded_modules |= set(preloaded_modules)
153
154        self.modules_only_mode = modules_only_mode
155        if self.modules_only_mode:
156            self.paraminfo_keys = frozenset(['modules'])
157
158    def _add_submodules(self, name, modules):
159        """Add the modules to the internal cache or check if equal."""
160        # The current implementation here doesn't support submodules inside of
161        # submodules, because that would require to fetch all modules when only
162        # the names of them were requested
163        assert '+' not in name
164        modules = frozenset(modules)
165        if name == 'main':
166            # The main module behaves differently as it has no prefix
167            if self._action_modules:
168                assert modules == self._action_modules
169            else:
170                self._action_modules = modules
171        elif name in self._modules:
172            assert modules == self._modules[name]
173        else:
174            self._modules[name] = modules
175
176    def _init(self):
177        assert ('query' in self._modules) is ('main' in self._paraminfo)
178        if 'query' in self._modules:
179            return
180        mw_ver = self.site.mw_version
181
182        # The paraminfo api deprecated the old request syntax of
183        # querymodules='info'; to avoid warnings sites with 1.25wmf4+
184        # must only use 'modules' parameter.
185        if self.modules_only_mode is None:
186            self.modules_only_mode = mw_ver >= '1.25wmf4'
187            if self.modules_only_mode:
188                self.paraminfo_keys = frozenset(['modules'])
189
190        # Assume that by v1.26, it will be desirable to prefetch 'query'
191        if mw_ver > '1.26':
192            self.preloaded_modules |= {'query'}
193
194        self._fetch(self.preloaded_modules)
195
196        main_modules_param = self.parameter('main', 'action')
197        assert main_modules_param
198        assert 'type' in main_modules_param
199        assert isinstance(main_modules_param['type'], list)
200        assert self._action_modules == set(main_modules_param['type'])
201
202        # While deprecated with warning in 1.25, paraminfo param 'querymodules'
203        # provides a list of all query modules. This will likely be removed
204        # from the API in the future, in which case the fallback is the use
205        # the same data available in the paraminfo for query.
206        query_modules_param = self.parameter('paraminfo', 'querymodules')
207
208        if 'limit' not in query_modules_param:
209            raise RuntimeError('"limit" not found in query modules')
210        self._limit = query_modules_param['limit']
211
212        if query_modules_param and 'type' in query_modules_param:
213            # 'type' is the list of modules
214            self._add_submodules('query', query_modules_param['type'])
215
216        if 'query' not in self._modules:
217            assert 'query' not in self._paraminfo
218            self._fetch({'query'})
219        assert 'query' in self._modules
220
221    def _emulate_pageset(self):
222        """Emulate the pageset module, which existed until MW 1.24."""
223        # pageset isn't a module in the new system, so it is emulated, with
224        # the paraminfo from the query module.
225        assert('query' in self._paraminfo)
226
227        self._paraminfo['pageset'] = {
228            'name': 'pageset',
229            'path': 'pageset',
230            'classname': 'ApiPageSet',
231            'prefix': '',
232            'readrights': '',
233            'helpurls': [],
234            'parameters': self._paraminfo['query']['parameters']
235        }
236
237    @staticmethod
238    def _modules_to_set(modules) -> set:
239        """Return modules as a set.
240
241        :type modules: iterable or str
242        """
243        if isinstance(modules, str):
244            return set(modules.split('|'))
245        return set(modules)
246
247    def fetch(self, modules) -> None:
248        """
249        Fetch paraminfo for multiple modules.
250
251        No exception is raised when paraminfo for a module does not exist.
252        Use __getitem__ to cause an exception if a module does not exist.
253
254        :param modules: API modules to load
255        :type modules: iterable or str
256        """
257        if 'main' not in self._paraminfo:
258            # The first request should be 'paraminfo', so that
259            # query modules can be prefixed with 'query+'
260            self._init()
261
262        modules = self._modules_to_set(modules)
263
264        if self._action_modules:
265            # The query module may be added before the action modules have been
266            if 'query' in self._modules:
267                # It does fetch() while initializing, and this method can't be
268                # called before it's initialized.
269                modules = self._normalize_modules(modules)
270            else:
271                # We do know the valid action modules and require a subset
272                assert not modules - self._action_modules - self.root_modules
273
274        self._fetch(modules)
275
276    def _fetch(self, modules: Union[set, frozenset]) -> None:
277        """
278        Fetch paraminfo for multiple modules without initializing beforehand.
279
280        :param modules: API modules to load and which haven't been loaded yet.
281        """
282        def module_generator():
283            """A generator yielding batches of modules."""
284            i = itergroup(sorted(modules), self._limit)
285            for batch in i:
286                for failed_module in failed_modules:
287                    yield [failed_module]
288                del failed_modules[:]
289                yield batch
290
291        modules = modules - set(self._paraminfo.keys())
292        if not modules:
293            return
294
295        assert 'query' in self._modules or 'paraminfo' not in self._paraminfo
296
297        # If something went wrong in a batch it can add each module to the
298        # batch and the generator will on the next iteration yield each module
299        # separately
300        failed_modules = []
301
302        # This can be further optimised, by grouping them in more stable
303        # subsets, which are unlikely to change. i.e. first request core
304        # modules which have been a stable part of the API for a long time.
305        # Also detecting extension based modules may help.
306        # Also, when self.modules_only_mode is disabled, both modules and
307        # querymodules may each be filled with self._limit items, doubling the
308        # number of modules that may be processed in a single batch.
309        for module_batch in module_generator():
310            if self.modules_only_mode and 'pageset' in module_batch:
311                pywikibot.debug('paraminfo fetch: removed pageset', _logger)
312                module_batch.remove('pageset')
313                # If this occurred during initialisation,
314                # also record it in the preloaded_modules.
315                # (at least so tests know an extra load was intentional)
316                if 'query' not in self._paraminfo:
317                    pywikibot.debug('paraminfo batch: added query', _logger)
318                    module_batch.append('query')
319                    self.preloaded_modules |= {'query'}
320
321            params = {
322                'action': 'paraminfo',
323            }
324
325            if self.modules_only_mode:
326                params['modules'] = module_batch
327            else:
328                params['modules'] = [mod for mod in module_batch
329                                     if not mod.startswith('query+')
330                                     and mod not in self.root_modules]
331                params['querymodules'] = [mod[6:] for mod in module_batch
332                                          if mod.startswith('query+')]
333
334                for mod in set(module_batch) & self.root_modules:
335                    params[mod + 'module'] = 1
336
337            # Request need ParamInfo to determine use_get
338            request = self.site._request(expiry=config.API_config_expiry,
339                                         use_get=True,
340                                         parameters=params)
341            result = request.submit()
342
343            normalized_result = self.normalize_paraminfo(result)
344            for path in list(normalized_result):
345                if normalized_result[path] is False:
346                    del normalized_result[path]
347
348            # Sometimes the name/path of the module is not actually the name
349            # which was requested, so we need to manually determine which
350            # (wrongly named) module uses which actual name. See also T105478
351            missing_modules = [m for m in module_batch
352                               if m not in normalized_result]
353            if len(missing_modules) == 1 and len(normalized_result) == 1:
354                # Okay it's possible to recover
355                normalized_result = next(iter(normalized_result.values()))
356                pywikibot.warning('The module "{0[name]}" ("{0[path]}") '
357                                  'was returned as path even though "{1}" '
358                                  'was requested'.format(normalized_result,
359                                                         missing_modules[0]))
360                normalized_result['path'] = missing_modules[0]
361                normalized_result['name'] = missing_modules[0].rsplit('+')[0]
362                normalized_result = {missing_modules[0]: normalized_result}
363            elif len(module_batch) > 1 and missing_modules:
364                # Rerequest the missing ones separately
365                pywikibot.log('Inconsistency in batch "{}"; rerequest '
366                              'separately'.format(missing_modules))
367                failed_modules.extend(missing_modules)
368
369            # Remove all modules which weren't requested, we can't be sure that
370            # they are valid
371            for path in list(normalized_result):
372                if path not in module_batch:
373                    del normalized_result[path]
374
375            self._paraminfo.update(normalized_result)
376            self._generate_submodules(mod['path']
377                                      for mod in normalized_result.values())
378
379        if 'pageset' in modules and 'pageset' not in self._paraminfo:
380            self._emulate_pageset()
381
382    def _generate_submodules(self, modules):
383        """Check and generate submodules for the given modules."""
384        for module in modules:
385            parameters = self._paraminfo[module].get('parameters', [])
386            submodules = set()
387            # Advanced submodule into added to MW API in df80f1ea
388            if self.site.mw_version >= '1.26wmf9':
389                # This is supplying submodules even if they aren't submodules
390                # of the given module so skip those
391                for param in parameters:
392                    if ((module == 'main' and param['name'] == 'format')
393                            or 'submodules' not in param):
394                        continue
395                    for submodule in param['submodules'].values():
396                        if '+' in submodule:
397                            parent, child = submodule.rsplit('+', 1)
398                        else:
399                            parent = 'main'
400                            child = submodule
401                        if parent == module:
402                            submodules.add(child)
403            else:
404                # Boolean submodule info added to MW API in afa153ae
405                if self.site.mw_version < '1.24wmf18':
406                    if module == 'main':
407                        params = {'action'}
408                    elif module == 'query':
409                        params = {'prop', 'list', 'meta'}
410                    else:
411                        params = set()
412                    for param in parameters:
413                        if param['name'] in params:
414                            param['submodules'] = ''
415
416                for param in parameters:
417                    # Do not add format modules
418                    if ('submodules' in param
419                        and (module != 'main'
420                             or param['name'] != 'format')):
421                        submodules |= set(param['type'])
422
423            if submodules:
424                self._add_submodules(module, submodules)
425            if module == 'query':
426                # Previously also modules from generator were used as query
427                # modules, but verify that those are just a subset of the
428                # prop/list/meta modules. There is no sanity check as this
429                # needs to be revisited if query has no generator parameter
430                for param in parameters:
431                    if param['name'] == 'generator':
432                        break
433                else:
434                    param = {}
435                assert param['name'] == 'generator' \
436                    and submodules >= set(param['type'])
437
438    def _normalize_modules(self, modules) -> set:
439        """Add query+ to any query module name not also in action modules."""
440        # Users will supply the wrong type, and expect it to work.
441        modules = self._modules_to_set(modules)
442
443        assert self._action_modules
444
445        return {'query+' + mod
446                if '+' not in mod and mod in self.query_modules
447                and mod not in self._action_modules
448                else mod
449                for mod in modules}
450
451    def normalize_modules(self, modules) -> set:
452        """
453        Convert the modules into module paths.
454
455        Add query+ to any query module name not also in action modules.
456
457        :return: The modules converted into a module paths
458        """
459        self._init()
460        return self._normalize_modules(modules)
461
462    @classmethod
463    def normalize_paraminfo(cls, data):
464        """
465        Convert both old and new API JSON into a new-ish data structure.
466
467        For duplicate paths, the value will be False.
468        """
469        result_data = {}
470        for paraminfo_key, modules_data in data['paraminfo'].items():
471            if not modules_data:
472                continue
473
474            if paraminfo_key[:-len('module')] in cls.root_modules:
475                modules_data = [modules_data]
476            elif not paraminfo_key.endswith('modules'):
477                continue
478
479            for mod_data in modules_data:
480                if 'missing' in mod_data:
481                    continue
482
483                name = mod_data.get('name')
484                php_class = mod_data.get('classname')
485
486                if not name and php_class:
487                    if php_class == 'ApiMain':
488                        name = 'main'
489                    elif php_class == 'ApiPageSet':
490                        name = 'pageset'
491                    else:
492                        pywikibot.warning('Unknown paraminfo module "{}"'
493                                          .format(php_class))
494                        name = '<unknown>:' + php_class
495
496                    mod_data['name'] = name
497
498                if 'path' not in mod_data:
499                    # query modules often contain 'ApiQuery' and have a suffix.
500                    # 'ApiQuery' alone is the action 'query'
501                    if ('querytype' in mod_data
502                        or php_class and len(php_class) > 8
503                            and 'ApiQuery' in php_class):
504                        mod_data['path'] = 'query+' + name
505                    else:
506                        mod_data['path'] = name
507
508                path = mod_data['path']
509
510                if path in result_data:
511                    # Only warn first time
512                    if result_data[path] is not False:
513                        pywikibot.warning('Path "{}" is ambiguous.'
514                                          .format(path))
515                    else:
516                        pywikibot.log('Found another path "{}"'.format(path))
517                    result_data[path] = False
518                else:
519                    result_data[path] = mod_data
520
521        return result_data
522
523    def __getitem__(self, key):
524        """
525        Return a paraminfo module for the module path, caching it.
526
527        Use the module path, such as 'query+x', to obtain the paraminfo for
528        submodule 'x' in the query module.
529
530        If the key does not include a '+' and is not present in the top level
531        of the API, it will fallback to looking for the key 'query+x'.
532        """
533        self.fetch({key})
534        if key in self._paraminfo:
535            return self._paraminfo[key]
536        if '+' not in key:
537            return self._paraminfo['query+' + key]
538        raise KeyError(key)
539
540    def __contains__(self, key) -> bool:
541        """Return whether the key is valid."""
542        try:
543            self[key]
544            return True
545        except KeyError:
546            return False
547
548    def __len__(self) -> int:
549        """Return number of cached modules."""
550        return len(self._paraminfo)
551
552    def parameter(self, module: str, param_name: str) -> Optional[dict]:
553        """
554        Get details about one modules parameter.
555
556        Returns None if the parameter does not exist.
557
558        :param module: API module name
559        :param param_name: parameter name in the module
560        :return: metadata that describes how the parameter may be used
561        """
562        # TODO: the 'description' field of each parameter is not in the default
563        # output of v1.25, and can't removed from previous API versions.
564        # There should be an option to remove this verbose data from the cached
565        # version, for earlier versions of the API, and/or extract any useful
566        # data and discard the entire received paraminfo structure. There are
567        # also params which are common to many modules, such as those provided
568        # by the ApiPageSet php class: titles, pageids, redirects, etc.
569        try:
570            module = self[module]
571        except KeyError:
572            raise ValueError("paraminfo for '{}' not loaded".format(module))
573
574        try:
575            params = module['parameters']
576        except KeyError:
577            pywikibot.warning("module '{}' has no parameters".format(module))
578            return None
579
580        param_data = [param for param in params
581                      if param['name'] == param_name]
582
583        if not param_data:
584            return None
585
586        if len(param_data) != 1:
587            raise RuntimeError(
588                'parameter data length is eiter empty or not unique.\n{}'
589                .format(param_data))
590        return param_data[0]
591
592    @property
593    def module_paths(self):
594        """Set of all modules using their paths."""
595        return self._module_set(True)
596
597    # As soon as modules() is removed, module_paths and _module_set can be
598    # combined, so don't add any code between these two methods.
599    def _module_set(self, path):
600        # Load the submodules of all action modules available
601        self.fetch(self.action_modules)
602        modules = set(self.action_modules)
603        for parent_module in self._modules:
604            submodules = self.submodules(parent_module, path)
605            assert not submodules & modules or not path
606            modules |= submodules
607        return modules
608
609    @property
610    def action_modules(self):
611        """Set of all action modules."""
612        self._init()
613        return self._action_modules
614
615    @property
616    def query_modules(self):
617        """Set of all query module names without query+ path prefix."""
618        return self.submodules('query')
619
620    def submodules(self, name: str, path: bool = False) -> set:
621        """
622        Set of all submodules.
623
624        :param name: The name of the parent module.
625        :param path: Whether the path and not the name is returned.
626        :return: The names or paths of the submodules.
627        """
628        if name not in self._modules:
629            self.fetch([name])
630        submodules = self._modules[name]
631        if path:
632            submodules = self._prefix_submodules(submodules, name)
633        return submodules
634
635    @staticmethod
636    def _prefix_submodules(modules, prefix):
637        """Prefix submodules with path."""
638        return {'{}+{}'.format(prefix, mod) for mod in modules}
639
640    @property
641    def prefix_map(self):
642        """
643        Mapping of module to its prefix for all modules with a prefix.
644
645        This loads paraminfo for all modules.
646        """
647        if not self._prefix_map:
648            self._prefix_map = {module: prefix
649                                for module, prefix
650                                in self.attributes('prefix').items()
651                                if prefix}
652        return self._prefix_map.copy()
653
654    def attributes(self, attribute: str, modules: Optional[set] = None):
655        """
656        Mapping of modules with an attribute to the attribute value.
657
658        It will include all modules which have that attribute set, also if that
659        attribute is empty or set to False.
660
661        :param attribute: attribute name
662        :param modules: modules to include. If None (default), it'll load all
663            modules including all submodules using the paths.
664        :rtype: dict using modules as keys
665        """
666        if modules is None:
667            modules = self.module_paths
668        self.fetch(modules)
669
670        return {mod: self[mod][attribute]
671                for mod in modules if attribute in self[mod]}
672
673
674class OptionSet(MutableMapping):
675
676    """
677    A class to store a set of options which can be either enabled or not.
678
679    If it is instantiated with the associated site, module and parameter it
680    will only allow valid names as options. If instantiated 'lazy loaded' it
681    won't checks if the names are valid until the site has been set (which
682    isn't required, but recommended). The site can only be set once if it's not
683    None and after setting it, any site (even None) will fail.
684    """
685
686    def __init__(self, site=None,
687                 module: Optional[str] = None,
688                 param: Optional[str] = None,
689                 dict: Optional[dict] = None):
690        """
691        Initializer.
692
693        If a site is given, the module and param must be given too.
694
695        :param site: The associated site
696        :type site: pywikibot.site.APISite or None
697        :param module: The module name which is used by paraminfo. (Ignored
698            when site is None)
699        :param param: The parameter name inside the module. That parameter must
700            have a 'type' entry. (Ignored when site is None)
701        :param dict: The initializing dict which is used for
702            :py:obj:`from_dict`
703        """
704        self._site_set = False
705        self._enabled = set()
706        self._disabled = set()
707        self._set_site(site, module, param)
708        if dict:
709            self.from_dict(dict)
710
711    def _set_site(self, site, module: str, param: str,
712                  clear_invalid: bool = False):
713        """Set the site and valid names.
714
715        As soon as the site has been not None, any subsequent calls will fail,
716        unless there had been invalid names and a KeyError was thrown.
717
718        :param site: The associated site
719        :type site: pywikibot.site.APISite
720        :param module: The module name which is used by paraminfo.
721        :param param: The parameter name inside the module. That parameter must
722            have a 'type' entry.
723        :param clear_invalid: Instead of throwing a KeyError, invalid names are
724            silently removed from the options (disabled by default).
725        """
726        if self._site_set:
727            raise TypeError('The site cannot be set multiple times.')
728        # If the entries written to this are valid, it will never be
729        # overwritten
730        self._valid_enable = set()
731        self._valid_disable = set()
732        if site is None:
733            return
734        for type_value in site._paraminfo.parameter(module, param)['type']:
735            if type_value[0] == '!':
736                self._valid_disable.add(type_value[1:])
737            else:
738                self._valid_enable.add(type_value)
739        if clear_invalid:
740            self._enabled &= self._valid_enable
741            self._disabled &= self._valid_disable
742        else:
743            invalid_names = ((self._enabled - self._valid_enable)
744                             | (self._disabled - self._valid_disable))
745            if invalid_names:
746                raise KeyError('OptionSet already contains invalid name(s) '
747                               '"{}"'.format('", "'.join(invalid_names)))
748        self._site_set = True
749
750    def from_dict(self, dictionary):
751        """
752        Load options from the dict.
753
754        The options are not cleared before. If changes have been made
755        previously, but only the dict values should be applied it needs to be
756        cleared first.
757
758        :param dictionary:
759            a dictionary containing for each entry either the value
760            False, True or None. The names must be valid depending on whether
761            they enable or disable the option. All names with the value None
762            can be in either of the list.
763        :type dictionary: dict (keys are strings, values are bool/None)
764        """
765        enabled = set()
766        disabled = set()
767        removed = set()
768        for name, value in dictionary.items():
769            if value is True:
770                enabled.add(name)
771            elif value is False:
772                disabled.add(name)
773            elif value is None:
774                removed.add(name)
775            else:
776                raise ValueError('Dict contains invalid value "{}"'.format(
777                    value))
778        invalid_names = (
779            (enabled - self._valid_enable) | (disabled - self._valid_disable)
780            | (removed - self._valid_enable - self._valid_disable)
781        )
782        if invalid_names and self._site_set:
783            raise ValueError('Dict contains invalid name(s) "{}"'.format(
784                '", "'.join(invalid_names)))
785        self._enabled = enabled | (self._enabled - disabled - removed)
786        self._disabled = disabled | (self._disabled - enabled - removed)
787
788    def clear(self):
789        """Clear all enabled and disabled options."""
790        self._enabled.clear()
791        self._disabled.clear()
792
793    def __setitem__(self, name, value):
794        """Set option to enabled, disabled or neither."""
795        if value is True:
796            if self._site_set and name not in self._valid_enable:
797                raise KeyError('Invalid name "{}"'.format(name))
798            self._enabled.add(name)
799            self._disabled.discard(name)
800        elif value is False:
801            if self._site_set and name not in self._valid_disable:
802                raise KeyError('Invalid name "{}"'.format(name))
803            self._disabled.add(name)
804            self._enabled.discard(name)
805        elif value is None:
806            if self._site_set and (name not in self._valid_enable
807                                   or name not in self._valid_disable):
808                raise KeyError('Invalid name "{}"'.format(name))
809            self._enabled.discard(name)
810            self._disabled.discard(name)
811        else:
812            raise ValueError('Invalid value "{}"'.format(value))
813
814    def __getitem__(self, name) -> Optional[bool]:
815        """
816        Return whether the option is enabled.
817
818        :return: If the name has been set it returns whether it is enabled.
819            Otherwise it returns None. If the site has been set it raises a
820            KeyError if the name is invalid. Otherwise it might return a value
821            even though the name might be invalid.
822        """
823        if name in self._enabled:
824            return True
825        if name in self._disabled:
826            return False
827        if (self._site_set or name in self._valid_enable
828                or name in self._valid_disable):
829            return None
830        raise KeyError('Invalid name "{}"'.format(name))
831
832    def __delitem__(self, name):
833        """Remove the item by setting it to None."""
834        self[name] = None
835
836    def __contains__(self, name):
837        """Return True if option has been set."""
838        return name in self._enabled or name in self._disabled
839
840    def __iter__(self):
841        """Iterate over each enabled and disabled option."""
842        yield from self._enabled
843        yield from self._disabled
844
845    def api_iter(self):
846        """Iterate over each option as they appear in the URL."""
847        yield from self._enabled
848        for disabled in self._disabled:
849            yield '!{}'.format(disabled)
850
851    def __len__(self):
852        """Return the number of enabled and disabled options."""
853        return len(self._enabled) + len(self._disabled)
854
855
856class Request(MutableMapping):
857
858    """A request to a Site's api.php interface.
859
860    Attributes of this object (except for the special parameters listed
861    below) get passed as commands to api.php, and can be get or set
862    using the dict interface. All attributes must be strings. Use an
863    empty string for parameters that don't require a value. For example,
864    Request(action="query", titles="Foo bar", prop="info", redirects="")
865    corresponds to the API request
866    "api.php?action=query&titles=Foo%20bar&prop=info&redirects"
867
868    This is the lowest-level interface to the API, and can be used for any
869    request that a particular site's API supports. See the API documentation
870    (https://www.mediawiki.org/wiki/API) and site-specific settings for
871    details on what parameters are accepted for each request type.
872
873    Uploading files is a special case: to upload, the parameter "mime" must
874    contain a dict, and the parameter "file" must be set equal to a valid
875    filename on the local computer, _not_ to the content of the file.
876
877    Returns a dict containing the JSON data returned by the wiki. Normally,
878    one of the dict keys will be equal to the value of the 'action'
879    parameter. Errors are caught and raise an APIError exception.
880
881    Example:
882
883    >>> r = Request(parameters={'action': 'query', 'meta': 'userinfo'})
884    >>> # This is equivalent to
885    >>> # https://{path}/api.php?action=query&meta=userinfo&format=json
886    >>> # change a parameter
887    >>> r['meta'] = "userinfo|siteinfo"
888    >>> # add a new parameter
889    >>> r['siprop'] = "namespaces"
890    >>> # note that "uiprop" param gets added automatically
891    >>> str(r.action)
892    'query'
893    >>> sorted(str(key) for key in r._params.keys())
894    ['action', 'meta', 'siprop']
895    >>> [str(key) for key in r._params['action']]
896    ['query']
897    >>> [str(key) for key in r._params['meta']]
898    ['userinfo', 'siteinfo']
899    >>> [str(key) for key in r._params['siprop']]
900    ['namespaces']
901    >>> data = r.submit()
902    >>> isinstance(data, dict)
903    True
904    >>> set(['query', 'batchcomplete', 'warnings']).issuperset(data.keys())
905    True
906    >>> 'query' in data
907    True
908    >>> sorted(str(key) for key in data['query'].keys())
909    ['namespaces', 'userinfo']
910
911    """
912
913    # To make sure the default value of 'parameters' can be identified.
914    _PARAM_DEFAULT = object()
915
916    def __init__(self, site=None,
917                 mime: Optional[dict] = None,
918                 throttle: bool = True,
919                 max_retries: Optional[int] = None,
920                 retry_wait: Optional[int] = None,
921                 use_get: Optional[bool] = None,
922                 parameters=_PARAM_DEFAULT, **kwargs):
923        """
924        Create a new Request instance with the given parameters.
925
926        The parameters for the request can be defined via either the
927        'parameters' parameter or the keyword arguments. The keyword arguments
928        were the previous implementation but could cause problems when there
929        are arguments to the API named the same as normal arguments to this
930        class. So the second parameter 'parameters' was added which just
931        contains all parameters. When a Request instance is created it must use
932        either one of them and not both at the same time. To have backwards
933        compatibility it adds a parameter named 'parameters' to kwargs when
934        both parameters are set as that indicates an old call and 'parameters'
935        was originally supplied as a keyword parameter.
936
937        If undefined keyword arguments were given AND the 'parameters'
938        parameter was supplied as a positional parameter it still assumes
939        'parameters' were part of the keyword arguments.
940
941        If a class is using Request and is directly forwarding the parameters,
942        :py:obj:`Request.clean_kwargs` can be used to automatically
943        convert the old kwargs mode into the new parameter mode. This
944        normalizes the arguments so that when the API parameters are
945        modified the changes can always be applied to the 'parameters'
946        parameter.
947
948        :param site: The Site to which the request will be submitted. If not
949               supplied, uses the user's configured default Site.
950        :param mime: If not None, send in "multipart/form-data" format (default
951               None). Parameters which should only be transferred via mime
952               mode are defined via this parameter (even an empty dict means
953               mime shall be used).
954        :param max_retries: Maximum number of times to retry after
955               errors, defaults to config.max_retries.
956        :param retry_wait: Minimum time in seconds to wait after an
957               error, defaults to config.retry_wait seconds (doubles each retry
958               until config.retry_max seconds is reached).
959        :param use_get: Use HTTP GET request if possible. If False it
960               uses a POST request. If None, it'll try to determine via
961               action=paraminfo if the action requires a POST.
962        :param parameters: The parameters used for the request to the API.
963        :type parameters: dict
964        :param kwargs: The parameters used for the request to the API.
965        """
966        if site is None:
967            self.site = pywikibot.Site()
968            warn('Request() invoked without a site; setting to {}'
969                 .format(self.site), RuntimeWarning, 2)
970        else:
971            self.site = site
972
973        self.mime = mime
974        if isinstance(mime, bool):
975            raise TypeError('mime param in api.Request() must not be boolean')
976
977        self.throttle = throttle
978        self.use_get = use_get
979        if max_retries is None:
980            self.max_retries = pywikibot.config.max_retries
981        else:
982            self.max_retries = max_retries
983        self.current_retries = 0
984        if retry_wait is None:
985            self.retry_wait = pywikibot.config.retry_wait
986        else:
987            self.retry_wait = retry_wait
988        # The only problem with that system is that it won't detect when
989        # 'parameters' is actually the only parameter for the request as it
990        # then assumes it's using the new mode (and the parameters are actually
991        # in the parameter 'parameters' not that the parameter 'parameters' is
992        # actually a parameter for the request). But that is invalid anyway as
993        # it MUST have at least an action parameter for the request which would
994        # be in kwargs if it's using the old mode.
995        if kwargs:
996            if parameters is not self._PARAM_DEFAULT:
997                # 'parameters' AND kwargs is set. In that case think of
998                # 'parameters' being an old kwarg which is now filled in an
999                # actual parameter
1000                self._warn_both()
1001                kwargs['parameters'] = parameters
1002            # When parameters wasn't set it's likely that kwargs-mode was used
1003            self._warn_kwargs()
1004            parameters = kwargs
1005        elif parameters is self._PARAM_DEFAULT:
1006            parameters = {}
1007        self._params = {}
1008        if 'action' not in parameters:
1009            raise ValueError("'action' specification missing from Request.")
1010        self.action = parameters['action']
1011        self.update(parameters)  # also convert all parameter values to lists
1012        self._warning_handler = None
1013        # Actions that imply database updates on the server, used for various
1014        # things like throttling or skipping actions when we're in simulation
1015        # mode
1016        self.write = self.action in {
1017            'block', 'clearhasmsg', 'createaccount', 'delete', 'edit',
1018            'emailuser', 'filerevert', 'flowthank', 'imagerotate', 'import',
1019            'managetags', 'mergehistory', 'move', 'options', 'patrol',
1020            'protect', 'purge', 'resetpassword', 'revisiondelete', 'rollback',
1021            'setnotificationtimestamp', 'setpagelanguage', 'tag', 'thank',
1022            'unblock', 'undelete', 'upload', 'userrights', 'watch',
1023            'wbcreateclaim', 'wbcreateredirect', 'wbeditentity',
1024            'wblinktitles', 'wbmergeitems', 'wbremoveclaims',
1025            'wbremovequalifiers', 'wbremovereferences', 'wbsetaliases',
1026            'wbsetclaim', 'wbsetclaimvalue', 'wbsetdescription', 'wbsetlabel',
1027            'wbsetqualifier', 'wbsetreference', 'wbsetsitelink',
1028        }
1029        # Client side verification that the request is being performed
1030        # by a logged in user, and warn if it isn't a config username.
1031        if self.write:
1032            try:
1033                username = self.site.userinfo['name']
1034            except KeyError:
1035                raise Error('API write action attempted without user name')
1036
1037            if 'anon' in self.site.userinfo:
1038                raise Error("API write action attempted as IP '{}'"
1039                            .format(username))
1040
1041            if not self.site.user() or self.site.username() != username:
1042                pywikibot.warning(
1043                    'API write action by unexpected username {} commenced.\n'
1044                    'userinfo: {!r}'.format(username, self.site.userinfo))
1045
1046        # Make sure user is logged in
1047        if self.write:
1048            pywikibot.debug('Adding user assertion', _logger)
1049            self['assert'] = 'user'
1050
1051    @classmethod
1052    def create_simple(cls, req_site, **kwargs):
1053        """Create a new instance using all args except site for the API."""
1054        # This ONLY support site so that any caller can be sure there will be
1055        # no conflict with PWB parameters
1056        # req_site is needed to avoid conflicts with possible site keyword in
1057        # kwarg until positional-only parameters are supported, see T262926
1058        # TODO: Use ParamInfo request to determine valid parameters
1059        if isinstance(kwargs.get('parameters'), dict):
1060            warn('The request contains already a "parameters" entry which is '
1061                 'a dict.')
1062        return cls(site=req_site, parameters=kwargs)
1063
1064    @classmethod
1065    def _warn_both(cls):
1066        """Warn that kwargs mode was used but parameters was set too."""
1067        warn('Both kwargs and parameters are set in Request.__init__. It '
1068             'assumes that "parameters" is actually a parameter of the '
1069             'Request and is added to kwargs.', DeprecationWarning, 3)
1070
1071    @classmethod
1072    def _warn_kwargs(cls):
1073        """Warn that kwargs was used instead of parameters."""
1074        warn('Instead of using kwargs from Request.__init__, parameters '
1075             'for the request to the API should be added via the '
1076             '"parameters" parameter.', DeprecationWarning, 3)
1077
1078    @classmethod
1079    def clean_kwargs(cls, kwargs: dict) -> dict:
1080        """
1081        Convert keyword arguments into new parameters mode.
1082
1083        If there are no other arguments in kwargs apart from the used arguments
1084        by the class' initializer it'll just return kwargs and otherwise remove
1085        those which aren't in the initializer and put them in a dict which is
1086        added as a 'parameters' keyword. It will always create a shallow copy.
1087
1088        :param kwargs: The original keyword arguments which is not modified.
1089        :return: The normalized keyword arguments.
1090        """
1091        if 'expiry' in kwargs and kwargs['expiry'] is None:
1092            del kwargs['expiry']
1093
1094        args = set()
1095        for super_cls in inspect.getmro(cls):
1096            if not super_cls.__name__.endswith('Request'):
1097                break
1098            args |= set(getfullargspec(super_cls.__init__).args)
1099        else:
1100            raise ValueError('Request was not a super class of '
1101                             '{0!r}'.format(cls))
1102        args -= {'self'}
1103        old_kwargs = set(kwargs)
1104        # all kwargs defined above but not in args indicate 'kwargs' mode
1105        if old_kwargs - args:
1106            # Move all kwargs into parameters
1107            parameters = {name: value for name, value in kwargs.items()
1108                          if name not in args or name == 'parameters'}
1109            if 'parameters' in parameters:
1110                cls._warn_both()
1111            # Copy only arguments and not the parameters
1112            kwargs = {name: value for name, value in kwargs.items()
1113                      if name in args or name == 'self'}
1114            kwargs['parameters'] = parameters
1115            # Make sure that all arguments have remained
1116            assert(old_kwargs | {'parameters'}
1117                   == set(kwargs) | set(kwargs['parameters']))
1118            assert(('parameters' in old_kwargs)
1119                   is ('parameters' in kwargs['parameters']))
1120            cls._warn_kwargs()
1121        else:
1122            kwargs = dict(kwargs)
1123            kwargs.setdefault('parameters', {})
1124        return kwargs
1125
1126    def _format_value(self, value):
1127        """
1128        Format the MediaWiki API request parameter.
1129
1130        Converts from Python datatypes to MediaWiki API parameter values.
1131
1132        Supports:
1133         * datetime.datetime (using strftime and ISO8601 format)
1134         * pywikibot.page.BasePage (using title (+namespace; -section))
1135
1136        All other datatypes are converted to string.
1137        """
1138        if isinstance(value, datetime.datetime):
1139            return value.strftime(pywikibot.Timestamp.ISO8601Format)
1140        if isinstance(value, pywikibot.page.BasePage):
1141            if value.site != self.site:
1142                raise RuntimeError(
1143                    'value.site {!r} is different from Request.site {!r}'
1144                    .format(value.site, self.site))
1145            return value.title(with_section=False)
1146        return str(value)
1147
1148    def __getitem__(self, key):
1149        """Implement dict interface."""
1150        return self._params[key]
1151
1152    def __setitem__(self, key: str, value):
1153        """Set MediaWiki API request parameter.
1154
1155        :param value: param value(s)
1156        :type value: str in site encoding
1157            (string types may be a `|`-separated list)
1158            iterable, where items are converted to string
1159            with special handling for datetime.datetime to convert it to a
1160            string using the ISO 8601 format accepted by the MediaWiki API.
1161        """
1162        if isinstance(value, bytes):
1163            value = value.decode(self.site.encoding())
1164
1165        if isinstance(value, str):
1166            value = value.split('|')
1167
1168        if hasattr(value, 'api_iter'):
1169            self._params[key] = value
1170        else:
1171            try:
1172                iter(value)
1173            except TypeError:
1174                # convert any non-iterable value into a single-element list
1175                self._params[key] = [value]
1176            else:
1177                self._params[key] = list(value)
1178
1179    def __delitem__(self, key):
1180        """Implement dict interface."""
1181        del self._params[key]
1182
1183    def keys(self):
1184        """Implement dict interface."""
1185        return list(self._params.keys())
1186
1187    def __contains__(self, key):
1188        """Implement dict interface."""
1189        return key in self._params
1190
1191    def __iter__(self):
1192        """Implement dict interface."""
1193        return iter(self._params)
1194
1195    def __len__(self):
1196        """Implement dict interface."""
1197        return len(self._params)
1198
1199    def iteritems(self):
1200        """Implement dict interface."""
1201        return iter(self._params.items())
1202
1203    def items(self):
1204        """Return a list of tuples containing the parameters in any order."""
1205        return list(self._params.items())
1206
1207    def _add_defaults(self):
1208        """
1209        Add default parameters to the API request.
1210
1211        This method will only add them once.
1212        """
1213        if hasattr(self, '__defaulted'):
1214            return
1215
1216        if self.mime is not None \
1217           and set(self._params.keys()) & set(self.mime.keys()):
1218            raise ValueError('The mime and params shall not share the '
1219                             'same keys.')
1220
1221        if self.action == 'query':
1222            meta = self._params.get('meta', [])
1223            # Special logic for private wikis (T153903).
1224            # If the wiki requires login privileges to read articles, pywikibot
1225            # will be blocked from accessing the userinfo.
1226            # Work around this by requiring userinfo only if 'tokens' and
1227            # 'login' are not both set.
1228            typep = self._params.get('type', [])
1229            if not ('tokens' in meta and 'login' in typep):
1230                if 'userinfo' not in meta:
1231                    meta = set(meta + ['userinfo'])
1232                    self['meta'] = sorted(meta)
1233                uiprop = self._params.get('uiprop', [])
1234                uiprop = set(uiprop + ['blockinfo', 'hasmsg'])
1235                self['uiprop'] = sorted(uiprop)
1236            if 'prop' in self._params:
1237                if self.site.has_extension('ProofreadPage'):
1238                    prop = set(self['prop'] + ['proofread'])
1239                    self['prop'] = sorted(prop)
1240            # When neither 'continue' nor 'rawcontinue' is present and the
1241            # version number is at least 1.25wmf5 we add a dummy rawcontinue
1242            # parameter. Querying siteinfo is save as it adds 'continue'
1243            # except for 'tokens' (T284577)
1244            if ('tokens' not in meta and 'continue' not in self._params
1245                    and self.site.mw_version >= '1.25wmf5'):
1246                self._params.setdefault('rawcontinue', [''])
1247        elif self.action == 'help' and self.site.mw_version > '1.24':
1248            self['wrap'] = ''
1249
1250        if config.maxlag:
1251            self._params.setdefault('maxlag', [str(config.maxlag)])
1252        self._params.setdefault('format', ['json'])
1253        if self['format'] != ['json']:
1254            raise TypeError(
1255                "Query format '{}' cannot be parsed.".format(self['format']))
1256
1257        self.__defaulted = True
1258
1259    def _encoded_items(self):
1260        """
1261        Build a dict of params with minimal encoding needed for the site.
1262
1263        This helper method only prepares params for serialisation or
1264        transmission, so it only encodes values which are not ASCII,
1265        requiring callers to consider how to handle ASCII vs other values,
1266        however the output is designed to enable __str__ and __repr__ to
1267        do the right thing in most circumstances.
1268
1269        Servers which use an encoding that is not a superset of ASCII
1270        are not supported.
1271
1272        :return: Parameters either in the site encoding, or ASCII strings
1273        :rtype: dict with values of either str or bytes
1274        """
1275        params = {}
1276        for key, values in self._params.items():
1277            try:
1278                iterator = values.api_iter()
1279            except AttributeError:
1280                if len(values) == 1:
1281                    value = values[0]
1282                    if value is True:
1283                        values = ['']
1284                    elif value is False or value is None:
1285                        # False and None are not included in the http URI
1286                        continue
1287                iterator = iter(values)
1288            value = '|'.join(self._format_value(value) for value in iterator)
1289            # If the value is encodable as ascii, do not encode it.
1290            # This means that any value which can be encoded as ascii
1291            # is presumed to be ascii, and servers using a site encoding
1292            # which is not a superset of ascii may be problematic.
1293            try:
1294                value.encode('ascii')
1295            except UnicodeError:
1296                try:
1297                    value = value.encode(self.site.encoding())
1298                except Exception:
1299                    pywikibot.error(
1300                        "_encoded_items: '{}' could not be encoded as '{}':"
1301                        ' {!r}'.format(key, self.site.encoding(), value))
1302            assert key.encode('ascii')
1303            assert isinstance(key, str)
1304            params[key] = value
1305        return params
1306
1307    def _http_param_string(self):
1308        """
1309        Return the parameters as a HTTP URL query fragment.
1310
1311        URL encodes the parameters provided by _encoded_items()
1312
1313        :note: Not all parameters are sorted, therefore for two given
1314            CachedRequest objects with equal _params, the result of
1315            _http_param_string() is not necessarily equal.
1316        """
1317        return encode_url(self._encoded_items())
1318
1319    def __str__(self):
1320        """Return a string representation."""
1321        return unquote(self.site.scriptpath()
1322                       + '/api.php?'
1323                       + self._http_param_string())
1324
1325    def __repr__(self):
1326        """Return internal representation."""
1327        return '{}.{}<{}->{!r}>'.format(self.__class__.__module__,
1328                                        self.__class__.__name__,
1329                                        self.site, str(self))
1330
1331    def _simulate(self, action):
1332        """Simulate action."""
1333        if action and config.simulate and (
1334                self.write or action in config.actions_to_block):
1335            pywikibot.output(color_format(
1336                '{black;yellow}SIMULATION: {} action blocked.{default}',
1337                action))
1338            # for more realistic simulation
1339            if config.simulate is not True:
1340                pywikibot.sleep(float(config.simulate))
1341            return {
1342                action: {'result': 'Success', 'nochange': ''},
1343
1344                # wikibase results
1345                'pageinfo': {'lastrevid': -1},
1346                'entity': {'lastrevid': -1},
1347            }
1348        return None
1349
1350    def _is_wikibase_error_retryable(self, error):
1351        # dict of error message and current action.
1352        # Value is True if action type is to be ignored
1353        err_msg = {
1354            'edit-already-exists': 'wbeditentity',
1355            'actionthrottledtext': True,  # T192912, T268645
1356        }
1357        messages = error.get('messages')
1358        message = None
1359        # bug T68619; after Wikibase breaking change 1ca9cee we have a
1360        # list of messages
1361        if isinstance(messages, list):
1362            for item in messages:
1363                message = item['name']
1364                action = err_msg.get(message)
1365                if action is True or action == self.action:
1366                    return True
1367
1368            return False
1369
1370        if isinstance(messages, dict):
1371            try:  # behaviour before gerrit 124323 breaking change
1372                message = messages['0']['name']
1373            except KeyError:  # unsure the new output is always a list
1374                message = messages['name']
1375        action = err_msg.get(message)
1376        return action is True or action == self.action
1377
1378    @staticmethod
1379    def _generate_mime_part(key, content, keytype=None, headers=None):
1380        if not keytype:
1381            try:
1382                content.encode('ascii')
1383                keytype = ('text', 'plain')
1384            except (UnicodeError, AttributeError):
1385                keytype = ('application', 'octet-stream')
1386        submsg = MIMENonMultipart(*keytype)
1387        content_headers = {'name': key}
1388        if headers:
1389            content_headers.update(headers)
1390        submsg.add_header('Content-disposition', 'form-data',
1391                          **content_headers)
1392
1393        if keytype != ('text', 'plain'):
1394            submsg['Content-Transfer-Encoding'] = 'binary'
1395
1396        submsg.set_payload(content)
1397        return submsg
1398
1399    def _use_get(self):
1400        """Verify whether 'get' is to be used."""
1401        if (not config.enable_GET_without_SSL
1402                and self.site.protocol() != 'https'
1403                or self.site.is_oauth_token_available()):  # T108182 workaround
1404            use_get = False
1405        elif self.use_get is None:
1406            if self.action == 'query':
1407                # for queries check the query module
1408                modules = set()
1409                for mod_type_name in ('list', 'prop', 'generator'):
1410                    modules.update(self._params.get(mod_type_name, []))
1411            else:
1412                modules = {self.action}
1413            if modules:
1414                self.site._paraminfo.fetch(modules)
1415                use_get = all('mustbeposted' not in self.site._paraminfo[mod]
1416                              for mod in modules)
1417            else:
1418                # If modules is empty, just 'meta' was given, which doesn't
1419                # require POSTs, and is required for ParamInfo
1420                use_get = True
1421        else:
1422            use_get = self.use_get
1423        return use_get
1424
1425    @classmethod
1426    def _build_mime_request(cls, params: dict,
1427                            mime_params: dict) -> Tuple[dict, bytes]:
1428        """
1429        Construct a MIME multipart form post.
1430
1431        :param params: HTTP request params
1432        :param mime_params: HTTP request parts which must be sent in the body
1433        :type mime_params: dict of (content, keytype, headers)
1434        :return: HTTP request headers and body
1435        """
1436        # construct a MIME message containing all API key/values
1437        container = MIMEMultipart(_subtype='form-data')
1438        for key, value in params.items():
1439            submsg = cls._generate_mime_part(key, value)
1440            container.attach(submsg)
1441        for key, value in mime_params.items():
1442            submsg = cls._generate_mime_part(key, *value)
1443            container.attach(submsg)
1444
1445        # strip the headers to get the HTTP message body
1446        body = container.as_bytes()
1447        marker = b'\n\n'  # separates headers from body
1448        eoh = body.find(marker)
1449        body = body[eoh + len(marker):]
1450        # retrieve the headers from the MIME object
1451        headers = dict(container.items())
1452        return headers, body
1453
1454    def _get_request_params(self, use_get, paramstring):
1455        """Get request parameters."""
1456        uri = self.site.apipath()
1457        if self.mime is not None:
1458            (headers, body) = Request._build_mime_request(
1459                self._encoded_items(), self.mime)
1460            use_get = False  # MIME requests require HTTP POST
1461        else:
1462            headers = {'Content-Type': 'application/x-www-form-urlencoded'}
1463            if (not self.site.maximum_GET_length()
1464                    or self.site.maximum_GET_length() < len(paramstring)):
1465                use_get = False
1466            if use_get:
1467                uri = '{}?{}'.format(uri, paramstring)
1468                body = None
1469            else:
1470                body = paramstring
1471
1472        pywikibot.debug('API request to {} (uses get: {}):\n'
1473                        'Headers: {!r}\nURI: {!r}\nBody: {!r}'
1474                        .format(self.site, use_get, headers, uri, body),
1475                        _logger)
1476        return use_get, uri, body, headers
1477
1478    def _http_request(self, use_get: bool, uri: str, data, headers,
1479                      paramstring) -> tuple:
1480        """Get or post a http request with exception handling.
1481
1482        :return: a tuple containing requests.Response object from
1483            http.request and use_get value
1484        """
1485        try:
1486            response = http.request(self.site, uri=uri,
1487                                    method='GET' if use_get else 'POST',
1488                                    data=data, headers=headers)
1489        except Server504Error:
1490            pywikibot.log('Caught HTTP 504 error; retrying')
1491        except Server414Error:
1492            if use_get:
1493                pywikibot.log('Caught HTTP 414 error; retrying')
1494                use_get = False
1495            else:
1496                pywikibot.warning('Caught HTTP 414 error, although not '
1497                                  'using GET.')
1498                raise
1499        except FatalServerError:
1500            # This error is not going to be fixed by just waiting
1501            pywikibot.error(traceback.format_exc())
1502            raise
1503        # TODO: what other exceptions can occur here?
1504        except Exception:
1505            # for any other error on the http request, wait and retry
1506            pywikibot.error(traceback.format_exc())
1507            pywikibot.log('{}, {}'.format(uri, paramstring))
1508        else:
1509            return response, use_get
1510        self.wait()
1511        return None, use_get
1512
1513    def _json_loads(self, response) -> Optional[dict]:
1514        """Return a dict from requests.Response.
1515
1516        :param response: a requests.Response object
1517        :type response: requests.Response
1518        :return: a data dict
1519        :raises pywikibot.exceptions.APIError: unknown action found
1520        :raises pywikibot.exceptions.APIError: unknown query result type
1521        """
1522        try:
1523            result = response.json()
1524        except ValueError:
1525            # if the result isn't valid JSON, there may be a server
1526            # problem. Wait a few seconds and try again
1527            # Show 20 lines of bare text
1528            text = '\n'.join(removeHTMLParts(response.text).splitlines()[:20])
1529            msg = """\
1530Non-JSON response received from server {site} for url
1531{resp.url}
1532The server may be down.
1533Status code: {resp.status_code}
1534
1535The text message is:
1536{text}
1537""".format(site=self.site, resp=response, text=text)
1538
1539            # Do not retry for AutoFamily but raise a SiteDefinitionError
1540            # Note: family.AutoFamily is a function to create that class
1541            if self.site.family.__class__.__name__ == 'AutoFamily':
1542                pywikibot.debug(msg, _logger)
1543                raise SiteDefinitionError('Invalid AutoFamily({!r})'
1544                                          .format(self.site.family.domain))
1545
1546            pywikibot.warning(msg)
1547
1548            # there might also be an overflow, so try a smaller limit
1549            for param in self._params:
1550                if param.endswith('limit'):
1551                    # param values are stored a list of str
1552                    value = self[param][0]
1553                    if value.isdigit():
1554                        self[param] = [str(int(value) // 2)]
1555                        pywikibot.output('Set {} = {}'
1556                                         .format(param, self[param]))
1557        else:
1558            return result or {}
1559        self.wait()
1560        return None
1561
1562    def _relogin(self, message=''):
1563        """Force re-login and inform user."""
1564        pywikibot.error('{}{}Forcing re-login.'.format(message,
1565                                                       ' ' if message else ''))
1566        self.site._relogin()
1567
1568    def _userinfo_query(self, result):
1569        """Handle userinfo query."""
1570        if self.action == 'query' and 'userinfo' in result.get('query', ()):
1571            # if we get passed userinfo in the query result, we can confirm
1572            # that we are logged in as the correct user. If this is not the
1573            # case, force a re-login.
1574            username = result['query']['userinfo']['name']
1575            if (self.site.user() is not None and self.site.user() != username
1576                    and self.site._loginstatus != LoginStatus.IN_PROGRESS):
1577                message = ("Logged in as '{actual}' instead of '{expected}'."
1578                           .format(actual=username, expected=self.site.user()))
1579                self._relogin(message)
1580                return True
1581        return False
1582
1583    def _handle_warnings(self, result):
1584        if 'warnings' in result:
1585            for mod, warning in result['warnings'].items():
1586                if mod == 'info':
1587                    continue
1588                if '*' in warning:
1589                    text = warning['*']
1590                elif 'html' in warning:
1591                    # bug T51978
1592                    text = warning['html']['*']
1593                else:
1594                    pywikibot.warning(
1595                        'API warning ({}) of unknown format: {}'.
1596                        format(mod, warning))
1597                    continue
1598                # multiple warnings are in text separated by a newline
1599                for single_warning in text.splitlines():
1600                    if (not callable(self._warning_handler)
1601                            or not self._warning_handler(mod, single_warning)):
1602                        pywikibot.warning('API warning ({}): {}'
1603                                          .format(mod, single_warning))
1604
1605    def _logged_in(self, code):
1606        """Check whether user is logged in.
1607
1608        Older wikis returned an error instead of a warning when the request
1609        asked for too many values. If we get this error, assume we are not
1610        logged in (we can't check this because the userinfo data is not
1611        present) and force a re-login
1612        """
1613        if code.endswith('limit'):
1614            message = 'Received API limit error.'
1615
1616        # If the user assertion failed, we're probably logged out as well.
1617        elif code == 'assertuserfailed':
1618            message = 'User assertion failed.'
1619
1620        # Lastly, the purge module requires a POST if used as anonymous user,
1621        # but we normally send a GET request. If the API tells us the request
1622        # has to be POSTed, we're probably logged out.
1623        elif code == 'mustbeposted' and self.action == 'purge':
1624            message = "Received unexpected 'mustbeposted' error."
1625
1626        else:
1627            return True
1628
1629        self._relogin(message)
1630        return False
1631
1632    def _internal_api_error(self, code, error, result):
1633        """Check for internal_api_error_ or readonly and retry.
1634
1635        :raises pywikibot.exceptions.APIMWError: internal_api_error or readonly
1636        """
1637        iae = 'internal_api_error_'
1638        if not (code.startswith(iae) or code == 'readonly'):
1639            return False
1640
1641        # T154011
1642        class_name = code if code == 'readonly' else removeprefix(code, iae)
1643
1644        del error['code']  # is added via class_name
1645        e = pywikibot.exceptions.APIMWError(class_name, **error)
1646
1647        # If the error key is in this table, it is probably a temporary
1648        # problem, so we will retry the edit.
1649        # TODO: T154011: 'ReadOnlyError' seems replaced by 'readonly'
1650        retry = class_name in ['DBConnectionError',  # T64974
1651                               'DBQueryError',  # T60158
1652                               'ReadOnlyError',  # T61227
1653                               'readonly',  # T154011
1654                               ]
1655
1656        pywikibot.error('Detected MediaWiki API exception {}{}'
1657                        .format(e, '; retrying' if retry else '; raising'))
1658        param_repr = str(self._params)
1659        pywikibot.log('MediaWiki exception {} details:\n'
1660                      '          query=\n{}\n'
1661                      '          response=\n{}'
1662                      .format(class_name,
1663                              pprint.pformat(param_repr),
1664                              result))
1665        if not retry:
1666            raise e
1667
1668        self.wait()
1669        return True
1670
1671    def _ratelimited(self):
1672        """Handle ratelimited warning."""
1673        ratelimits = self.site.userinfo['ratelimits']
1674        delay = None
1675
1676        ratelimit = ratelimits.get(self.action, {})
1677        # find the lowest wait time for the given action
1678        for limit in ratelimit.values():
1679            seconds = limit['seconds']
1680            hits = limit['hits']
1681            delay = min(delay or seconds, seconds / hits)
1682
1683        if not delay:
1684            pywikibot.warning(
1685                'No rate limit found for action {}'.format(self.action))
1686        self.wait(delay)
1687
1688    def _bad_token(self, code) -> bool:
1689        """Check for bad token."""
1690        if code != 'badtoken':  # Other code not handled here
1691            return False
1692
1693        if self.site._loginstatus == LoginStatus.IN_PROGRESS:
1694            pywikibot.log('Login status: {}'
1695                          .format(self.site._loginstatus.name))
1696            return False
1697
1698        user_tokens = self.site.tokens._tokens[self.site.user()]
1699        # all token values mapped to their type
1700        tokens = {token: t_type for t_type, token in user_tokens.items()}
1701        # determine which tokens are bad
1702        invalid_param = {name: tokens[param[0]]
1703                         for name, param in self._params.items()
1704                         if len(param) == 1 and param[0] in tokens}
1705        # doesn't care about the cache so can directly load them
1706        if invalid_param:
1707            pywikibot.log(
1708                'Bad token error for {}. Tokens for "{}" used in request; '
1709                'invalidated them.'
1710                .format(self.site.user(),
1711                        '", "'.join(sorted(set(invalid_param.values())))))
1712            # invalidate superior wiki cookies (T224712)
1713            _invalidate_superior_cookies(self.site.family)
1714            # request new token(s) instead of invalid
1715            self.site.tokens.load_tokens(set(invalid_param.values()))
1716            # fix parameters; lets hope that it doesn't mistake actual
1717            # parameters as tokens
1718            for name, t_type in invalid_param.items():
1719                self[name] = self.site.tokens[t_type]
1720            return True
1721
1722        # otherwise couldn't find any … weird there is nothing what
1723        # can be done here because it doesn't know which parameters
1724        # to fix
1725        pywikibot.log(
1726            'Bad token error for {} but no parameter is using a '
1727            'token. Current tokens: {}'
1728            .format(self.site.user(),
1729                    ', '.join('{}: {}'.format(*e)
1730                              for e in user_tokens.items())))
1731        return False
1732
1733    def submit(self) -> dict:
1734        """
1735        Submit a query and parse the response.
1736
1737        :return: a dict containing data retrieved from api.php
1738        """
1739        self._add_defaults()
1740        use_get = self._use_get()
1741        retries = 0
1742        while True:
1743            paramstring = self._http_param_string()
1744
1745            simulate = self._simulate(self.action)
1746            if simulate:
1747                return simulate
1748
1749            if self.throttle:
1750                self.site.throttle(write=self.write)
1751            else:
1752                pywikibot.log(
1753                    "Submitting unthrottled action '{}'.".format(self.action))
1754
1755            use_get, uri, body, headers = self._get_request_params(use_get,
1756                                                                   paramstring)
1757            response, use_get = self._http_request(use_get, uri, body, headers,
1758                                                   paramstring)
1759            if response is None:
1760                continue
1761
1762            result = self._json_loads(response)
1763            if result is None:
1764                continue
1765
1766            if self._userinfo_query(result):
1767                continue
1768
1769            self._handle_warnings(result)
1770
1771            if 'error' not in result:
1772                return result
1773
1774            error = result['error'].copy()
1775            for key in result:
1776                if key in ('error', 'warnings'):
1777                    continue
1778                assert key not in error
1779                assert isinstance(result[key], str), \
1780                    'Unexpected {}: {!r}'.format(key, result[key])
1781                error[key] = result[key]
1782
1783            if '*' in result['error']:
1784                # help text returned
1785                result['error']['help'] = result['error'].pop('*')
1786            code = result['error'].setdefault('code', 'Unknown')
1787            info = result['error'].setdefault('info', None)
1788
1789            if not self._logged_in(code):
1790                continue
1791
1792            if code == 'maxlag':
1793                retries += 1
1794                if retries > max(5, pywikibot.config.max_retries):
1795                    break
1796                pywikibot.log('Pausing due to database lag: ' + info)
1797
1798                try:
1799                    lag = result['error']['lag']
1800                except KeyError:
1801                    lag = lagpattern.search(info)
1802                    lag = float(lag.group('lag')) if lag else 0.0
1803
1804                self.site.throttle.lag(lag * retries)
1805                continue
1806
1807            if code == 'help' and self.action == 'help':
1808                # The help module returns an error result with the complete
1809                # API information. As this data was requested, return the
1810                # data instead of raising an exception.
1811                return {'help': {'mime': 'text/plain',
1812                                 'help': result['error']['help']}}
1813
1814            pywikibot.warning('API error {}: {}'.format(code, info))
1815            pywikibot.log('           headers=\n{}'.format(response.headers))
1816
1817            if self._internal_api_error(code, error, result):
1818                continue
1819
1820            # Phab. tickets T48535, T64126, T68494, T68619
1821            if code == 'failed-save' \
1822               and self._is_wikibase_error_retryable(result['error']):
1823                self.wait()
1824                continue
1825
1826            if code == 'ratelimited':
1827                self._ratelimited()
1828                continue
1829
1830            # If readapidenied is returned try to login
1831            if code == 'readapidenied' \
1832               and self.site._loginstatus in (LoginStatus.NOT_ATTEMPTED,
1833                                              LoginStatus.NOT_LOGGED_IN):
1834                self.site.login()
1835                continue
1836
1837            if self._bad_token(code):
1838                continue
1839
1840            if 'mwoauth-invalid-authorization' in code:
1841                if 'Nonce already used' in info:
1842                    pywikibot.error(
1843                        'Retrying failed OAuth authentication for {}: {}'
1844                        .format(self.site, info))
1845                    continue
1846                raise NoUsernameError('Failed OAuth authentication for {}: {}'
1847                                      .format(self.site, info))
1848            if code == 'cirrussearch-too-busy-error':  # T170647
1849                self.wait()
1850                continue
1851
1852            if code == 'urlshortener-blocked':  # T244062
1853                # add additional informations to result['error']
1854                result['error']['current site'] = self.site
1855                if self.site.user():
1856                    result['error']['current user'] = self.site.user()
1857                else:  # not logged in; show the IP
1858                    uinfo = self.site.userinfo
1859                    result['error']['current user'] = uinfo['name']
1860
1861            # raise error
1862            try:
1863                param_repr = str(self._params)
1864                pywikibot.log('API Error: query=\n{}'
1865                              .format(pprint.pformat(param_repr)))
1866                pywikibot.log('           response=\n{}'.format(result))
1867
1868                raise pywikibot.exceptions.APIError(**result['error'])
1869            except TypeError:
1870                raise RuntimeError(result)
1871
1872        msg = 'Maximum retries attempted due to maxlag without success.'
1873        if os.environ.get('PYWIKIBOT_TESTS_RUNNING', '0') == '1':
1874            import unittest
1875            raise unittest.SkipTest(msg)
1876
1877        raise MaxlagTimeoutError(msg)
1878
1879    def wait(self, delay=None):
1880        """Determine how long to wait after a failed request."""
1881        self.current_retries += 1
1882        if self.current_retries > self.max_retries:
1883            raise TimeoutError('Maximum retries attempted without success.')
1884
1885        # double the next wait, but do not exceed config.retry_max seconds
1886        delay = delay or self.retry_wait
1887        delay *= 2 ** (self.current_retries - 1)
1888        delay = min(delay, config.retry_max)
1889
1890        pywikibot.warning('Waiting {:.1f} seconds before retrying.'
1891                          .format(delay))
1892        pywikibot.sleep(delay)
1893
1894
1895class CachedRequest(Request):
1896
1897    """Cached request."""
1898
1899    def __init__(self, expiry, *args, **kwargs):
1900        """Initialize a CachedRequest object.
1901
1902        :param expiry: either a number of days or a datetime.timedelta object
1903        """
1904        assert expiry is not None
1905        super().__init__(*args, **kwargs)
1906        if not isinstance(expiry, datetime.timedelta):
1907            expiry = datetime.timedelta(expiry)
1908        self.expiry = min(expiry, datetime.timedelta(config.API_config_expiry))
1909        self._data = None
1910        self._cachetime = None
1911
1912    @classmethod
1913    def create_simple(cls, req_site, **kwargs):
1914        """Unsupported as it requires at least two parameters."""
1915        raise NotImplementedError('CachedRequest cannot be created simply.')
1916
1917    @classmethod
1918    def _get_cache_dir(cls) -> str:
1919        """
1920        Return the base directory path for cache entries.
1921
1922        The directory will be created if it does not already exist.
1923
1924        :return: base directory path for cache entries
1925        """
1926        path = os.path.join(config.base_dir,
1927                            'apicache-py{0:d}'.format(PYTHON_VERSION[0]))
1928        cls._make_dir(path)
1929        cls._get_cache_dir = classmethod(lambda c: path)  # cache the result
1930        return path
1931
1932    @staticmethod
1933    def _make_dir(dir_name: str) -> str:
1934        """Create directory if it does not exist already.
1935
1936        The directory name (dir_name) is returned unmodified.
1937
1938        :param dir_name: directory path
1939        :return: directory name
1940        """
1941        with suppress(OSError):  # directory already exists
1942            os.makedirs(dir_name)
1943        return dir_name
1944
1945    def _uniquedescriptionstr(self) -> str:
1946        """Return unique description for the cache entry.
1947
1948        If this is modified, please also update
1949        scripts/maintenance/cache.py to support
1950        the new key and all previous keys.
1951        """
1952        login_status = self.site._loginstatus
1953
1954        if login_status >= LoginStatus.AS_USER:
1955            # This uses the format of Page.__repr__, without performing
1956            # config.console_encoding as done by Page.__repr__.
1957            # The returned value can't be encoded to anything other than
1958            # ascii otherwise it creates an exception when _create_file_name()
1959            # tries to encode it as utf-8.
1960            user_key = 'User(User:{})'.format(self.site.userinfo['name'])
1961        else:
1962            user_key = repr(LoginStatus(LoginStatus.NOT_LOGGED_IN))
1963
1964        request_key = repr(sorted(self._encoded_items().items()))
1965        return '{!r}{}{}'.format(self.site, user_key, request_key)
1966
1967    def _create_file_name(self):
1968        """
1969        Return a unique ascii identifier for the cache entry.
1970
1971        :rtype: str (hexadecimal; i.e. characters 0-9 and a-f only)
1972        """
1973        return hashlib.sha256(
1974            self._uniquedescriptionstr().encode('utf-8')
1975        ).hexdigest()
1976
1977    def _cachefile_path(self):
1978        return os.path.join(CachedRequest._get_cache_dir(),
1979                            self._create_file_name())
1980
1981    def _expired(self, dt):
1982        return dt + self.expiry < datetime.datetime.utcnow()
1983
1984    def _load_cache(self) -> bool:
1985        """Load cache entry for request, if available.
1986
1987        :return: Whether the request was loaded from the cache
1988        """
1989        self._add_defaults()
1990        try:
1991            filename = self._cachefile_path()
1992            with open(filename, 'rb') as f:
1993                uniquedescr, self._data, self._cachetime = pickle.load(f)
1994            if uniquedescr != self._uniquedescriptionstr():
1995                raise RuntimeError('Expected unique description for the cache '
1996                                   'entry is different from file entry.')
1997            if self._expired(self._cachetime):
1998                self._data = None
1999                return False
2000            pywikibot.debug('{}: cache hit ({}) for API request: {}'
2001                            .format(self.__class__.__name__, filename,
2002                                    uniquedescr), _logger)
2003            return True
2004        except IOError:
2005            # file not found
2006            return False
2007        except Exception as e:
2008            pywikibot.output('Could not load cache: {!r}'.format(e))
2009            return False
2010
2011    def _write_cache(self, data):
2012        """Write data to self._cachefile_path()."""
2013        data = (self._uniquedescriptionstr(), data, datetime.datetime.utcnow())
2014        with open(self._cachefile_path(), 'wb') as f:
2015            pickle.dump(data, f, protocol=config.pickle_protocol)
2016
2017    def submit(self):
2018        """Submit cached request."""
2019        cached_available = self._load_cache()
2020        if not cached_available:
2021            self._data = super().submit()
2022            self._write_cache(self._data)
2023        else:
2024            self._handle_warnings(self._data)
2025        return self._data
2026
2027
2028class _RequestWrapper:
2029
2030    """A wrapper class to handle the usage of the ``parameters`` parameter."""
2031
2032    def _clean_kwargs(self, kwargs, **mw_api_args):
2033        """Clean kwargs, define site and request class."""
2034        if 'site' not in kwargs:
2035            warn('{} invoked without a site'.format(self.__class__.__name__),
2036                 RuntimeWarning, 3)
2037            kwargs['site'] = pywikibot.Site()
2038        assert(not hasattr(self, 'site') or self.site == kwargs['site'])
2039        self.site = kwargs['site']
2040        self.request_class = kwargs['site']._request_class(kwargs)
2041        kwargs = self.request_class.clean_kwargs(kwargs)
2042        kwargs['parameters'].update(mw_api_args)
2043        return kwargs
2044
2045
2046class APIGenerator(_RequestWrapper):
2047
2048    """
2049    Iterator that handle API responses containing lists.
2050
2051    The iterator will iterate each item in the query response and use the
2052    continue request parameter to retrieve the next portion of items
2053    automatically. If the limit attribute is set, the iterator will stop
2054    after iterating that many values.
2055    """
2056
2057    def __init__(self, action: str, continue_name: str = 'continue',
2058                 limit_name: str = 'limit', data_name: str = 'data', **kwargs):
2059        """
2060        Initialize an APIGenerator object.
2061
2062        kwargs are used to create a Request object; see that object's
2063        documentation for values.
2064
2065        :param action: API action name.
2066        :param continue_name: Name of the continue API parameter.
2067        :param limit_name: Name of the limit API parameter.
2068        :param data_name: Name of the data in API response.
2069        """
2070        kwargs = self._clean_kwargs(kwargs, action=action)
2071
2072        self.continue_name = continue_name
2073        self.limit_name = limit_name
2074        self.data_name = data_name
2075
2076        if config.step > 0:
2077            self.query_increment = config.step
2078        else:
2079            self.query_increment = None
2080        self.limit = None
2081        self.starting_offset = kwargs['parameters'].pop(self.continue_name, 0)
2082        self.request = self.request_class(**kwargs)
2083        self.request[self.limit_name] = self.query_increment
2084
2085    def set_query_increment(self, value: int):
2086        """
2087        Set the maximum number of items to be retrieved per API query.
2088
2089        If not called, the default is config.step.
2090
2091        :param value: The value of maximum number of items to be retrieved
2092            per API request to set.
2093        """
2094        self.query_increment = int(value)
2095        self.request[self.limit_name] = self.query_increment
2096        pywikibot.debug('{}: Set query_increment to {}.'
2097                        .format(self.__class__.__name__,
2098                                self.query_increment), _logger)
2099
2100    def set_maximum_items(self, value: Union[int, str, None]):
2101        """
2102        Set the maximum number of items to be retrieved from the wiki.
2103
2104        If not called, most queries will continue as long as there is
2105        more data to be retrieved from the API.
2106
2107        :param value: The value of maximum number of items to be retrieved
2108            in total to set. Ignores None value.
2109        """
2110        if value is not None and int(value) > 0:
2111            self.limit = int(value)
2112            if self.query_increment and self.limit < self.query_increment:
2113                self.request[self.limit_name] = self.limit
2114                pywikibot.debug('{}: Set request item limit to {}'
2115                                .format(self.__class__.__name__, self.limit),
2116                                _logger)
2117            pywikibot.debug('{}: Set limit (maximum_items) to {}.'
2118                            .format(self.__class__.__name__, self.limit),
2119                            _logger)
2120
2121    def __iter__(self):
2122        """
2123        Submit request and iterate the response.
2124
2125        Continues response as needed until limit (if defined) is reached.
2126        """
2127        offset = self.starting_offset
2128        n = 0
2129        while True:
2130            self.request[self.continue_name] = offset
2131            pywikibot.debug('{}: Request: {}'
2132                            .format(self.__class__.__name__, self.request),
2133                            _logger)
2134            data = self.request.submit()
2135
2136            n_items = len(data[self.data_name])
2137            pywikibot.debug('{}: Retrieved {} items'
2138                            .format(self.__class__.__name__, n_items),
2139                            _logger)
2140            if n_items > 0:
2141                for item in data[self.data_name]:
2142                    yield item
2143                    n += 1
2144                    if self.limit is not None and n >= self.limit:
2145                        pywikibot.debug('%s: Stopped iterating due to '
2146                                        'exceeding item limit.' %
2147                                        self.__class__.__name__, _logger)
2148                        return
2149                offset += n_items
2150            else:
2151                pywikibot.debug('{}: Stopped iterating due to empty list in '
2152                                'response.'.format(self.__class__.__name__),
2153                                _logger)
2154                break
2155
2156
2157class QueryGenerator(_RequestWrapper):
2158
2159    """
2160    Base class for iterators that handle responses to API action=query.
2161
2162    By default, the iterator will iterate each item in the query response,
2163    and use the (query-)continue element, if present, to continue iterating as
2164    long as the wiki returns additional values. However, if the iterator's
2165    limit attribute is set to a positive int, the iterator will stop after
2166    iterating that many values. If limit is negative, the limit parameter
2167    will not be passed to the API at all.
2168
2169    Most common query types are more efficiently handled by subclasses, but
2170    this class can be used directly for custom queries and miscellaneous
2171    types (such as "meta=...") that don't return the usual list of pages or
2172    links. See the API documentation for specific query options.
2173
2174    """
2175
2176    # Should results be filtered during iteration according to set_namespace?
2177    # Used if the API module does not support multiple namespaces.
2178    # Override in subclasses by defining a function that returns True if
2179    # the result's namespace is in self._namespaces.
2180    _check_result_namespace = NotImplemented
2181
2182    # Set of allowed namespaces will be assigned to _namespaces during
2183    # set_namespace call. Only to be used by _check_result_namespace.
2184    _namespaces = None
2185
2186    def __init__(self, **kwargs):
2187        """
2188        Initialize a QueryGenerator object.
2189
2190        kwargs are used to create a Request object; see that object's
2191        documentation for values. 'action'='query' is assumed.
2192
2193        """
2194        if not hasattr(self, 'site'):
2195            kwargs = self._clean_kwargs(kwargs)  # hasn't been called yet
2196        parameters = kwargs['parameters']
2197        if 'action' in parameters and parameters['action'] != 'query':
2198            raise Error("{}: 'action' must be 'query', not {}"
2199                        .format(self.__class__.__name__, kwargs['action']))
2200        parameters['action'] = 'query'
2201        # make sure request type is valid, and get limit key if any
2202        for modtype in ('generator', 'list', 'prop', 'meta'):
2203            if modtype in parameters:
2204                self.modules = parameters[modtype].split('|')
2205                break
2206        else:
2207            raise Error('{}: No query module name found in arguments.'
2208                        .format(self.__class__.__name__))
2209
2210        parameters['indexpageids'] = True  # always ask for list of pageids
2211        self.continue_name = 'continue'
2212        self.continue_update = self._continue
2213        # Explicitly enable the simplified continuation
2214        parameters['continue'] = True
2215        self.request = self.request_class(**kwargs)
2216
2217        self.site._paraminfo.fetch('query+' + mod for mod in self.modules)
2218
2219        limited_modules = {mod for mod in self.modules
2220                           if self.site._paraminfo.parameter('query+' + mod,
2221                                                             'limit')}
2222
2223        if not limited_modules:
2224            self.limited_module = None
2225        elif len(limited_modules) == 1:
2226            self.limited_module = limited_modules.pop()
2227        else:
2228            # Select the first limited module in the request.
2229            # Query will continue as needed until limit (if any) for this
2230            # module is reached.
2231            for module in self.modules:
2232                if module in limited_modules:
2233                    self.limited_module = module
2234                    limited_modules.remove(module)
2235                    break
2236            pywikibot.log('{}: multiple requested query modules support limits'
2237                          "; using the first such module '{}' of {!r}"
2238                          .format(self.__class__.__name__, self.limited_module,
2239                                  self.modules))
2240
2241            # Set limits for all remaining limited modules to max value.
2242            # Default values will only cause more requests and make the query
2243            # slower.
2244            for module in limited_modules:
2245                param = self.site._paraminfo.parameter('query+' + module,
2246                                                       'limit')
2247                prefix = self.site._paraminfo['query+' + module]['prefix']
2248                if self.site.logged_in() \
2249                   and self.site.has_right('apihighlimits'):
2250                    self.request[prefix + 'limit'] = int(param['highmax'])
2251                else:
2252                    self.request[prefix + 'limit'] = int(param['max'])
2253
2254        if config.step > 0:
2255            self.api_limit = config.step
2256        else:
2257            self.api_limit = None
2258
2259        if self.limited_module:
2260            self.prefix = self.site._paraminfo['query+'
2261                                               + self.limited_module]['prefix']
2262            self._update_limit()
2263
2264        if self.api_limit is not None and 'generator' in parameters:
2265            self.prefix = 'g' + self.prefix
2266
2267        self.limit = None
2268        self.query_limit = self.api_limit
2269        if 'generator' in parameters:
2270            # name of the "query" subelement key to look for when iterating
2271            self.resultkey = 'pages'
2272        else:
2273            self.resultkey = self.modules[0]
2274
2275        # usually the (query-)continue key is the same as the querymodule,
2276        # but not always
2277        # API can return more than one query-continue key, if multiple
2278        # properties are requested by the query, e.g.
2279        # "query-continue":{
2280        #     "langlinks":{"llcontinue":"12188973|pt"},
2281        #     "templates":{"tlcontinue":"310820|828|Namespace_detect"}}
2282        # self.continuekey is a list
2283        self.continuekey = self.modules
2284        self._add_slots()
2285
2286    def _add_slots(self):
2287        """Add slots to params if the site supports multi-content revisions.
2288
2289        On MW 1.32+ the following query parameters require slots to be given
2290        when content or contentmodel is requested.
2291
2292        * prop=revisions
2293        * prop=deletedrevisions or
2294        * list=allrevisions
2295        * list=alldeletedrevisions
2296
2297        More info:
2298        https://lists.wikimedia.org/hyperkitty/list/mediawiki-api-announce@lists.wikimedia.org/message/AXO4G4OOMTG7CEUU5TGAWXBI2LD4G3BC/
2299        """
2300        if self.site.mw_version < '1.32':
2301            return
2302        request = self.request
2303        # If using any deprecated_params, do not add slots. Usage of
2304        # these parameters together with slots is forbidden and the user will
2305        # get an API warning anyway.
2306        props = request.get('prop')
2307        if props:
2308            if 'revisions' in props:
2309                deprecated_params = {
2310                    'rvexpandtemplates', 'rvparse', 'rvdiffto', 'rvdifftotext',
2311                    'rvdifftotextpst', 'rvcontentformat', 'parsetree'}
2312                if not set(request) & deprecated_params:
2313                    request['rvslots'] = '*'
2314            if 'deletedrevisions' in props:
2315                deprecated_params = {
2316                    'drvexpandtemplates', 'drvparse', 'drvdiffto',
2317                    'drvdifftotext', 'drvdifftotextpst', 'drvcontentformat',
2318                    'parsetree'}
2319                if not set(request) & deprecated_params:
2320                    request['drvslots'] = '*'
2321        lists = request.get('list')
2322        if lists:
2323            if 'allrevisions' in lists:
2324                deprecated_params = {
2325                    'arvexpandtemplates', 'arvparse', 'arvdiffto',
2326                    'arvdifftotext', 'arvdifftotextpst', 'arvcontentformat',
2327                    'parsetree'}
2328                if not set(request) & deprecated_params:
2329                    request['arvslots'] = '*'
2330            if 'alldeletedrevisions' in lists:
2331                deprecated_params = {
2332                    'adrexpandtemplates', 'adrparse', 'adrdiffto',
2333                    'adrdifftotext', 'adrdifftotextpst', 'adrcontentformat',
2334                    'parsetree'}
2335                if not set(request) & deprecated_params:
2336                    request['adrslots'] = '*'
2337
2338    def set_query_increment(self, value):
2339        """Set the maximum number of items to be retrieved per API query.
2340
2341        If not called, the default is to ask for "max" items and let the
2342        API decide how many to send.
2343        """
2344        limit = int(value)
2345
2346        # don't update if limit is greater than maximum allowed by API
2347        if self.api_limit is None:
2348            self.query_limit = limit
2349        else:
2350            self.query_limit = min(self.api_limit, limit)
2351        pywikibot.debug('{}: Set query_limit to {}.'
2352                        .format(self.__class__.__name__,
2353                                self.query_limit), _logger)
2354
2355    def set_maximum_items(self, value: Union[int, str, None]):
2356        """Set the maximum number of items to be retrieved from the wiki.
2357
2358        If not called, most queries will continue as long as there is
2359        more data to be retrieved from the API.
2360
2361        If set to -1 (or any negative value), the "limit" parameter will be
2362        omitted from the request. For some request types (such as
2363        prop=revisions), this is necessary to signal that only current
2364        revision is to be returned.
2365
2366        :param value: The value of maximum number of items to be retrieved
2367            in total to set. Ignores None value.
2368        """
2369        if value is not None:
2370            self.limit = int(value)
2371
2372    def _update_limit(self):
2373        """Set query limit for self.module based on api response."""
2374        param = self.site._paraminfo.parameter('query+' + self.limited_module,
2375                                               'limit')
2376        if self.site.logged_in() and self.site.has_right('apihighlimits'):
2377            limit = int(param['highmax'])
2378        else:
2379            limit = int(param['max'])
2380        if self.api_limit is None or limit < self.api_limit:
2381            self.api_limit = limit
2382            pywikibot.debug(
2383                '{}: Set query_limit to {}.'.format(self.__class__.__name__,
2384                                                    self.api_limit),
2385                _logger)
2386
2387    def support_namespace(self) -> bool:
2388        """Check if namespace is a supported parameter on this query.
2389
2390        Note: this function will be removed when self.set_namespace() will
2391              throw TypeError() instead of just giving a warning.
2392              See T196619.
2393
2394        :return: True if yes, False otherwise
2395        """
2396        assert self.limited_module  # some modules do not have a prefix
2397        return bool(
2398            self.site._paraminfo.parameter('query+' + self.limited_module,
2399                                           'namespace'))
2400
2401    def set_namespace(self, namespaces):
2402        """Set a namespace filter on this query.
2403
2404        :param namespaces: namespace identifiers to limit query results
2405        :type namespaces: iterable of str or Namespace key, or a single
2406            instance of those types. May be a '|' separated list of
2407            namespace identifiers. An empty iterator clears any
2408            namespace restriction.
2409        :raises KeyError: a namespace identifier was not resolved
2410
2411        # TODO: T196619
2412        # @raises TypeError: module does not support a namespace parameter
2413        #    or a namespace identifier has an inappropriate
2414        #    type such as NoneType or bool, or more than one namespace
2415        #    if the API module does not support multiple namespaces
2416        """
2417        assert self.limited_module  # some modules do not have a prefix
2418        param = self.site._paraminfo.parameter('query+' + self.limited_module,
2419                                               'namespace')
2420        if not param:
2421            pywikibot.warning('{} module does not support a namespace '
2422                              'parameter'.format(self.limited_module))
2423            warn('set_namespace() will be modified to raise TypeError '
2424                 'when namespace parameter is not supported. '
2425                 'It will be a Breaking Change, please update your code '
2426                 'ASAP, due date July, 31st 2019.', FutureWarning, 2)
2427
2428            # TODO: T196619
2429            # raise TypeError('{} module does not support a namespace '
2430            #                 'parameter'.format(self.limited_module))
2431
2432            return False
2433
2434        if isinstance(namespaces, str):
2435            namespaces = namespaces.split('|')
2436
2437        # Use Namespace id (int) here; Request will cast int to str
2438        namespaces = [ns.id for ns in
2439                      self.site.namespaces.resolve(namespaces)]
2440
2441        if 'multi' not in param and len(namespaces) != 1:
2442            if self._check_result_namespace is NotImplemented:
2443                raise TypeError('{} module does not support multiple '
2444                                'namespaces'.format(self.limited_module))
2445            self._namespaces = set(namespaces)
2446            namespaces = None
2447
2448        if namespaces:
2449            self.request[self.prefix + 'namespace'] = namespaces
2450        elif self.prefix + 'namespace' in self.request:
2451            del self.request[self.prefix + 'namespace']
2452
2453        return None
2454
2455    def _query_continue(self):
2456        if all(key not in self.data[self.continue_name]
2457               for key in self.continuekey):
2458            pywikibot.log(
2459                "Missing '{}' key(s) in ['{}'] value."
2460                .format(self.continuekey, self.continue_name))
2461            return True
2462
2463        for query_continue_pair in self.data['query-continue'].values():
2464            self._add_continues(query_continue_pair)
2465        return False  # a new request with query-continue is needed
2466
2467    def _continue(self):
2468        self._add_continues(self.data['continue'])
2469        return False  # a new request with continue is needed
2470
2471    def _add_continues(self, continue_pair):
2472        for key, value in continue_pair.items():
2473            # query-continue can return ints (continue too?)
2474            if isinstance(value, int):
2475                value = str(value)
2476            self.request[key] = value
2477
2478    def _handle_query_limit(self, prev_limit, new_limit, had_data):
2479        """Handle query limit."""
2480        if self.query_limit is None:
2481            return prev_limit, new_limit
2482
2483        prev_limit = new_limit
2484        if self.limit is None:
2485            new_limit = self.query_limit
2486        elif self.limit > 0:
2487            if had_data:
2488                # self.resultkey in data in last request.submit()
2489                new_limit = min(self.query_limit, self.limit - self._count)
2490            else:
2491                # only "(query-)continue" returned. See Bug T74209.
2492                # increase new_limit to advance faster until new
2493                # useful data are found again.
2494                new_limit = min(new_limit * 2, self.query_limit)
2495        else:
2496            new_limit = None
2497
2498        if new_limit and 'rvprop' in self.request \
2499                and 'content' in self.request['rvprop']:
2500            # queries that retrieve page content have lower limits
2501            # Note: although API allows up to 500 pages for content
2502            #       queries, these sometimes result in server-side errors
2503            #       so use 250 as a safer limit
2504            new_limit = min(new_limit, self.api_limit // 10, 250)
2505
2506        if new_limit is not None:
2507            self.request[self.prefix + 'limit'] = str(new_limit)
2508
2509        if prev_limit != new_limit:
2510            pywikibot.debug(
2511                '{name}: query_limit: {query}, api_limit: {api}, '
2512                'limit: {limit}, new_limit: {new}, count: {count}\n'
2513                '{name}: {prefix}limit: {value}'
2514                .format(name=self.__class__.__name__,
2515                        query=self.query_limit,
2516                        api=self.api_limit,
2517                        limit=self.limit,
2518                        new=new_limit,
2519                        count=self._count,
2520                        prefix=self.prefix,
2521                        value=self.request[self.prefix + 'limit']),
2522                _logger)
2523        return prev_limit, new_limit
2524
2525    def _get_resultdata(self):
2526        """Get resultdata and verify result."""
2527        resultdata = keys = self.data['query'][self.resultkey]
2528        if isinstance(resultdata, dict):
2529            keys = list(resultdata.keys())
2530            if 'results' in resultdata:
2531                resultdata = resultdata['results']
2532            elif 'pageids' in self.data['query']:
2533                # this ensures that page data will be iterated
2534                # in the same order as received from server
2535                resultdata = [resultdata[k]
2536                              for k in self.data['query']['pageids']]
2537            else:
2538                resultdata = [resultdata[k]
2539                              for k in sorted(resultdata.keys())]
2540        pywikibot.debug('{name} received {keys}; limit={limit}'
2541                        .format(name=self.__class__.__name__,
2542                                keys=keys, limit=self.limit),
2543                        _logger)
2544        return resultdata
2545
2546    def _extract_results(self, resultdata):
2547        """Extract results from resultdata."""
2548        for item in resultdata:
2549            result = self.result(item)
2550            if self._namespaces:
2551                if not self._check_result_namespace(result):
2552                    continue
2553            yield result
2554            if isinstance(item, dict) \
2555                    and set(self.continuekey) & set(item.keys()):
2556                # if we need to count elements contained in items in
2557                # self.data["query"]["pages"], we want to count
2558                # item[self.continuekey] (e.g. 'revisions') and not
2559                # self.resultkey (i.e. 'pages')
2560                for key in set(self.continuekey) & set(item.keys()):
2561                    self._count += len(item[key])
2562            # otherwise we proceed as usual
2563            else:
2564                self._count += 1
2565            # note: self.limit could be -1
2566            if self.limit and 0 < self.limit <= self._count:
2567                raise RuntimeError(
2568                    'QueryGenerator._extract_results reached the limit')
2569
2570    def __iter__(self):
2571        """Submit request and iterate the response based on self.resultkey.
2572
2573        Continues response as needed until limit (if any) is reached.
2574
2575        """
2576        previous_result_had_data = True
2577        prev_limit = new_limit = None
2578
2579        self._count = 0
2580        while True:
2581            prev_limit, new_limit = self._handle_query_limit(
2582                prev_limit, new_limit, previous_result_had_data)
2583            if not hasattr(self, 'data'):
2584                self.data = self.request.submit()
2585            if not self.data or not isinstance(self.data, dict):
2586                pywikibot.debug(
2587                    '{}: stopped iteration because no dict retrieved from api.'
2588                    .format(self.__class__.__name__),
2589                    _logger)
2590                return
2591            if 'query' in self.data and self.resultkey in self.data['query']:
2592                resultdata = self._get_resultdata()
2593                if 'normalized' in self.data['query']:
2594                    self.normalized = {
2595                        item['to']: item['from']
2596                        for item in self.data['query']['normalized']}
2597                else:
2598                    self.normalized = {}
2599                try:
2600                    yield from self._extract_results(resultdata)
2601                except RuntimeError:
2602                    return
2603                # self.resultkey in data in last request.submit()
2604                previous_result_had_data = True
2605            else:
2606                if 'query' not in self.data:
2607                    pywikibot.log("%s: 'query' not found in api response." %
2608                                  self.__class__.__name__)
2609                    pywikibot.log(str(self.data))
2610                # if (query-)continue is present, self.resultkey might not have
2611                # been fetched yet
2612                if self.continue_name not in self.data:
2613                    # No results.
2614                    return
2615                # self.resultkey not in data in last request.submit()
2616                # only "(query-)continue" was retrieved.
2617                previous_result_had_data = False
2618            if self.modules[0] == 'random':
2619                # "random" module does not return "(query-)continue"
2620                # now we loop for a new random query
2621                del self.data  # a new request is needed
2622                continue
2623            if self.continue_name not in self.data:
2624                return
2625            if self.continue_update():
2626                return
2627
2628            del self.data  # a new request with (query-)continue is needed
2629
2630    def result(self, data):
2631        """Process result data as needed for particular subclass."""
2632        return data
2633
2634
2635class PageGenerator(QueryGenerator):
2636
2637    """Iterator for response to a request of type action=query&generator=foo.
2638
2639    This class can be used for any of the query types that are listed in the
2640    API documentation as being able to be used as a generator. Instances of
2641    this class iterate Page objects.
2642
2643    """
2644
2645    def __init__(self, generator: str, g_content=False, **kwargs):
2646        """
2647        Initializer.
2648
2649        Required and optional parameters are as for ``Request``, except that
2650        action=query is assumed and generator is required.
2651
2652        :param generator: the "generator=" type from api.php
2653        :param g_content: if True, retrieve the contents of the current
2654            version of each Page (default False)
2655
2656        """
2657        # If possible, use self.request after __init__ instead of appendParams
2658        def append_params(params, key, value):
2659            if key in params:
2660                params[key] += '|' + value
2661            else:
2662                params[key] = value
2663        kwargs = self._clean_kwargs(kwargs)
2664        parameters = kwargs['parameters']
2665        # get some basic information about every page generated
2666        append_params(parameters, 'prop', 'info|imageinfo|categoryinfo')
2667        if g_content:
2668            # retrieve the current revision
2669            append_params(parameters, 'prop', 'revisions')
2670            append_params(parameters, 'rvprop',
2671                          'ids|timestamp|flags|comment|user|content')
2672        if not ('inprop' in parameters
2673                and 'protection' in parameters['inprop']):
2674            append_params(parameters, 'inprop', 'protection')
2675        append_params(parameters, 'iiprop',
2676                      'timestamp|user|comment|url|size|sha1|metadata')
2677        append_params(parameters, 'iilimit', 'max')  # T194233
2678        parameters['generator'] = generator
2679        super().__init__(**kwargs)
2680        self.resultkey = 'pages'  # element to look for in result
2681        self.props = self.request['prop']
2682
2683    def result(self, pagedata):
2684        """Convert page dict entry from api to Page object.
2685
2686        This can be overridden in subclasses to return a different type
2687        of object.
2688
2689        """
2690        p = pywikibot.Page(self.site, pagedata['title'], pagedata['ns'])
2691        ns = pagedata['ns']
2692        # Upcast to proper Page subclass.
2693        if ns == 2:
2694            p = pywikibot.User(p)
2695        elif ns == 6:
2696            p = pywikibot.FilePage(p)
2697        elif ns == 14:
2698            p = pywikibot.Category(p)
2699        update_page(p, pagedata, self.props)
2700        return p
2701
2702
2703class PropertyGenerator(QueryGenerator):
2704
2705    """Iterator for queries of type action=query&prop=foo.
2706
2707    See the API documentation for types of page properties that can be
2708    queried.
2709
2710    This iterator yields one or more dict object(s) corresponding
2711    to each "page" item(s) from the API response; the calling module has to
2712    decide what to do with the contents of the dict. There will be one
2713    dict for each page queried via a titles= or ids= parameter (which must
2714    be supplied when instantiating this class).
2715
2716    """
2717
2718    def __init__(self, prop: str, **kwargs):
2719        """
2720        Initializer.
2721
2722        Required and optional parameters are as for ``Request``, except that
2723        action=query is assumed and prop is required.
2724
2725        :param prop: the "prop=" type from api.php
2726        """
2727        kwargs = self._clean_kwargs(kwargs, prop=prop)
2728        super().__init__(**kwargs)
2729        self._props = frozenset(prop.split('|'))
2730        self.resultkey = 'pages'
2731
2732    @property
2733    def props(self):
2734        """The requested property names."""
2735        return self._props
2736
2737    def __iter__(self):
2738        """Yield results."""
2739        self._previous_dicts = {}
2740        yield from super().__iter__()
2741        yield from self._previous_dicts.values()
2742
2743    def _extract_results(self, resultdata):
2744        """Yield completed page_data of consecutive API requests."""
2745        yield from self._fully_retrieved_data_dicts(resultdata)
2746        for data_dict in super()._extract_results(resultdata):
2747            if 'title' in data_dict:
2748                d = self._previous_dicts.setdefault(data_dict['title'],
2749                                                    data_dict)
2750                if d is not data_dict:
2751                    self._update_old_result_dict(d, data_dict)
2752            else:
2753                pywikibot.warn('Skipping result without title: '
2754                               + str(data_dict))
2755
2756    def _fully_retrieved_data_dicts(self, resultdata):
2757        """Yield items of self._previous_dicts that are not in resultdata."""
2758        resultdata_titles = {d['title'] for d in resultdata if 'title' in d}
2759        for prev_title, prev_dict in self._previous_dicts.copy().items():
2760            if prev_title not in resultdata_titles:
2761                yield prev_dict
2762                del self._previous_dicts[prev_title]
2763
2764    @staticmethod
2765    def _update_old_result_dict(old_dict, new_dict):
2766        """Update old result dict with new_dict."""
2767        for k, v in new_dict.items():
2768            if k not in old_dict:
2769                old_dict[k] = v
2770                continue
2771            if isinstance(v, list):
2772                old_dict[k].extend(v)
2773                continue
2774            assert isinstance(v, (str, int)), (
2775                'continued API result had an unexpected type: {}'.format(v))
2776
2777
2778class ListGenerator(QueryGenerator):
2779
2780    """Iterator for queries of type action=query&list=foo.
2781
2782    See the API documentation for types of lists that can be queried. Lists
2783    include both site-wide information (such as 'allpages') and page-specific
2784    information (such as 'backlinks').
2785
2786    This iterator yields a dict object for each member of the list returned
2787    by the API, with the format of the dict depending on the particular list
2788    command used. For those lists that contain page information, it may be
2789    easier to use the PageGenerator class instead, as that will convert the
2790    returned information into a Page object.
2791
2792    """
2793
2794    def __init__(self, listaction: str, **kwargs):
2795        """
2796        Initializer.
2797
2798        Required and optional parameters are as for ``Request``, except that
2799        action=query is assumed and listaction is required.
2800
2801        :param listaction: the "list=" type from api.php
2802        """
2803        kwargs = self._clean_kwargs(kwargs, list=listaction)
2804        super().__init__(**kwargs)
2805
2806
2807class LogEntryListGenerator(ListGenerator):
2808
2809    """
2810    Iterator for queries of list 'logevents'.
2811
2812    Yields LogEntry objects instead of dicts.
2813    """
2814
2815    def __init__(self, logtype=None, **kwargs):
2816        """Initializer."""
2817        super().__init__('logevents', **kwargs)
2818
2819        from pywikibot import logentries
2820        self.entryFactory = logentries.LogEntryFactory(self.site, logtype)
2821
2822    def result(self, pagedata):
2823        """Instantiate LogEntry from data from api."""
2824        return self.entryFactory.create(pagedata)
2825
2826    def _check_result_namespace(self, result):
2827        """Return True if result.ns() is in self._namespaces."""
2828        return result.ns() in self._namespaces
2829
2830
2831class LoginManager(login.LoginManager):
2832
2833    """Supply login_to_site method to use API interface."""
2834
2835    # API login parameters mapping
2836    mapping = {
2837        'user': ('lgname', 'username'),
2838        'password': ('lgpassword', 'password'),
2839        'ldap': ('lgdomain', 'domain'),
2840        'token': ('lgtoken', 'logintoken'),
2841        'result': ('result', 'status'),
2842        'success': ('Success', 'PASS'),
2843        'fail': ('Failed', 'FAIL'),
2844        'reason': ('reason', 'message')
2845    }
2846
2847    def keyword(self, key):
2848        """Get API keyword from mapping."""
2849        return self.mapping[key][self.action != 'login']
2850
2851    def _login_parameters(self, *, botpassword: bool = False
2852                          ) -> Dict[str, str]:
2853        """Return login parameters."""
2854        # Since MW 1.27 only for bot passwords.
2855        self.action = 'login'
2856        if not botpassword:
2857            # get token using meta=tokens if supported
2858            token = self.get_login_token()
2859            if token:
2860                # Standard login request since MW 1.27
2861                self.action = 'clientlogin'
2862
2863        # prepare default login parameters
2864        parameters = {'action': self.action,
2865                      self.keyword('user'): self.login_name,
2866                      self.keyword('password'): self.password}
2867
2868        if self.action == 'clientlogin':
2869            # clientlogin requires non-empty loginreturnurl
2870            parameters['loginreturnurl'] = 'https://example.com'
2871            parameters['rememberMe'] = '1'
2872            parameters['logintoken'] = token
2873
2874        if self.site.family.ldapDomain:
2875            parameters[self.keyword('ldap')] = self.site.family.ldapDomain
2876
2877        return parameters
2878
2879    def login_to_site(self) -> None:
2880        """Login to the site.
2881
2882        Note, this doesn't do anything with cookies. The http module
2883        takes care of all the cookie stuff. Throws exception on failure.
2884        """
2885        self.below_mw_1_27 = False
2886        if hasattr(self, '_waituntil'):
2887            if datetime.datetime.now() < self._waituntil:
2888                diff = self._waituntil - datetime.datetime.now()
2889                pywikibot.warning(
2890                    'Too many tries, waiting {} seconds before retrying.'
2891                    .format(diff.seconds))
2892                pywikibot.sleep(diff.seconds)
2893
2894        self.site._loginstatus = LoginStatus.IN_PROGRESS
2895
2896        # Bot passwords username contains @,
2897        # otherwise @ is not allowed in usernames.
2898        # @ in bot password is deprecated,
2899        # but we don't want to break bots using it.
2900        parameters = self._login_parameters(
2901            botpassword='@' in self.login_name or '@' in self.password)
2902
2903        # base login request
2904        login_request = self.site._request(use_get=False,
2905                                           parameters=parameters)
2906        while True:
2907            # try to login
2908            try:
2909                login_result = login_request.submit()
2910            except pywikibot.exceptions.APIError as e:
2911                login_result = {'error': e.__dict__}
2912
2913            # clientlogin response can be clientlogin or error
2914            if self.action in login_result:
2915                response = login_result[self.action]
2916                result_key = self.keyword('result')
2917            elif 'error' in login_result:
2918                response = login_result['error']
2919                result_key = 'code'
2920            else:
2921                raise RuntimeError('Unexpected API login response key.')
2922
2923            status = response[result_key]
2924            fail_reason = response.get(self.keyword('reason'), '')
2925            if status == self.keyword('success'):
2926                return
2927
2928            if status in ('NeedToken', 'WrongToken', 'badtoken'):
2929                token = response.get('token')
2930                if token and self.below_mw_1_27:
2931                    # fetched token using action=login
2932                    login_request['lgtoken'] = token
2933                    pywikibot.log('Received login token, proceed with login.')
2934                else:
2935                    # if incorrect login token was used,
2936                    # force relogin and generate fresh one
2937                    pywikibot.error('Received incorrect login token. '
2938                                    'Forcing re-login.')
2939                    # invalidate superior wiki cookies (T224712)
2940                    _invalidate_superior_cookies(self.site.family)
2941                    login_request[
2942                        self.keyword('token')] = self.get_login_token()
2943                continue
2944
2945            # messagecode was introduced with 1.29.0-wmf.14
2946            # but older wikis are still supported
2947            login_throttled = response.get('messagecode') == 'login-throttled'
2948
2949            if (status == 'Throttled' or status == self.keyword('fail')
2950                    and (login_throttled or 'wait' in fail_reason)):
2951                wait = response.get('wait')
2952                if wait:
2953                    delta = datetime.timedelta(seconds=int(wait))
2954                else:
2955                    match = re.search(r'(\d+) (seconds|minutes)', fail_reason)
2956                    if match:
2957                        delta = datetime.timedelta(
2958                            **{match.group(2): int(match.group(1))})
2959                    else:
2960                        delta = datetime.timedelta()
2961                self._waituntil = datetime.datetime.now() + delta
2962
2963            break
2964
2965        if 'error' in login_result:
2966            raise pywikibot.exceptions.APIError(**response)
2967
2968        raise pywikibot.exceptions.APIError(code=status, info=fail_reason)
2969
2970    def get_login_token(self) -> Optional[str]:
2971        """Fetch login token for MediaWiki 1.27+.
2972
2973        :return: login token
2974        """
2975        login_token_request = self.site._request(
2976            use_get=False,
2977            parameters={'action': 'query', 'meta': 'tokens', 'type': 'login'},
2978        )
2979        login_token_result = login_token_request.submit()
2980        # check if we have to use old implementation of mw < 1.27
2981        if 'query' in login_token_result:
2982            return login_token_result['query']['tokens'].get('logintoken')
2983
2984        self.below_mw_1_27 = True
2985        return None
2986
2987
2988def encode_url(query) -> str:
2989    """
2990    Encode parameters to pass with a url.
2991
2992    Reorder parameters so that token parameters go last and call wraps
2993    :py:obj:`urlencode`. Return an HTTP URL query fragment which complies with
2994    https://www.mediawiki.org/wiki/API:Edit#Parameters
2995    (See the 'token' bullet.)
2996
2997    :param query: keys and values to be uncoded for passing with a url
2998    :type query: mapping object or a sequence of two-element tuples
2999    :return: encoded parameters with token parameters at the end
3000    """
3001    if hasattr(query, 'items'):
3002        query = list(query.items())
3003
3004    # parameters ending on 'token' should go last
3005    # wpEditToken should go very last
3006    query.sort(key=lambda x: x[0].lower().endswith('token')
3007               + (x[0] == 'wpEditToken'))
3008    return urlencode(query)
3009
3010
3011def _update_pageid(page, pagedict: dict):
3012    """Update pageid."""
3013    if 'pageid' in pagedict:
3014        page._pageid = int(pagedict['pageid'])
3015    elif 'missing' in pagedict:
3016        page._pageid = 0  # Non-existent page
3017    else:
3018        # Something is wrong.
3019        if page.site.sametitle(page.title(), pagedict['title']):
3020            if 'invalid' in pagedict:
3021                raise InvalidTitleError('{}: {}'
3022                                        .format(page,
3023                                                pagedict['invalidreason']))
3024        if int(pagedict['ns']) < 0:
3025            raise UnsupportedPageError(page)
3026        raise RuntimeError(
3027            "Page {} has neither 'pageid' nor 'missing' attribute"
3028            .format(pagedict['title']))
3029
3030
3031def _update_contentmodel(page, pagedict: dict):
3032    """Update page content model."""
3033    page._contentmodel = pagedict.get('contentmodel')  # can be None
3034
3035    if (page._contentmodel
3036            and page._contentmodel == 'proofread-page'
3037            and 'proofread' in pagedict):
3038        page._quality = pagedict['proofread']['quality']
3039        page._quality_text = pagedict['proofread']['quality_text']
3040
3041
3042def _update_protection(page, pagedict: dict):
3043    """Update page protection."""
3044    if 'restrictiontypes' in pagedict:
3045        page._applicable_protections = set(pagedict['restrictiontypes'])
3046    else:
3047        page._applicable_protections = None
3048    page._protection = {item['type']: (item['level'], item['expiry'])
3049                        for item in pagedict['protection']}
3050
3051
3052def _update_revisions(page, revisions):
3053    """Update page revisions."""
3054    for rev in revisions:
3055        page._revisions[rev['revid']] = pywikibot.page.Revision(**rev)
3056
3057
3058def _update_templates(page, templates):
3059    """Update page templates."""
3060    templ_pages = [pywikibot.Page(page.site, tl['title']) for tl in templates]
3061    if hasattr(page, '_templates'):
3062        page._templates.extend(templ_pages)
3063    else:
3064        page._templates = templ_pages
3065
3066
3067def _update_langlinks(page, langlinks):
3068    """Update page langlinks."""
3069    links = [pywikibot.Link.langlinkUnsafe(link['lang'], link['*'],
3070                                           source=page.site)
3071             for link in langlinks]
3072
3073    if hasattr(page, '_langlinks'):
3074        page._langlinks.extend(links)
3075    else:
3076        page._langlinks = links
3077
3078
3079def _update_coordinates(page, coordinates):
3080    """Update page coordinates."""
3081    coords = []
3082    for co in coordinates:
3083        coord = pywikibot.Coordinate(lat=co['lat'],
3084                                     lon=co['lon'],
3085                                     typ=co.get('type', ''),
3086                                     name=co.get('name', ''),
3087                                     dim=int(co.get('dim', 0)) or None,
3088                                     globe=co['globe'],  # See [[gerrit:67886]]
3089                                     primary='primary' in co
3090                                     )
3091        coords.append(coord)
3092    page._coords = coords
3093
3094
3095def update_page(page, pagedict: dict, props=None):
3096    """Update attributes of Page object page, based on query data in pagedict.
3097
3098    :param page: object to be updated
3099    :type page: pywikibot.page.Page
3100    :param pagedict: the contents of a "page" element of a query response
3101    :param props: the property names which resulted in pagedict. If a missing
3102        value in pagedict can indicate both 'false' and 'not present' the
3103        property which would make the value present must be in the props
3104        parameter.
3105    :type props: iterable of string
3106    :raises pywikibot.exceptions.InvalidTitleError: Page title is invalid
3107    :raises pywikibot.exceptions.UnsupportedPageError: Page with namespace < 0
3108        is not supported yet
3109    """
3110    _update_pageid(page, pagedict)
3111    _update_contentmodel(page, pagedict)
3112
3113    props = props or []
3114    if 'info' in props:
3115        page._isredir = 'redirect' in pagedict
3116
3117    if 'touched' in pagedict:
3118        page._timestamp = pagedict['touched']
3119
3120    if 'protection' in pagedict:
3121        _update_protection(page, pagedict)
3122
3123    if 'revisions' in pagedict:
3124        _update_revisions(page, pagedict['revisions'])
3125
3126    if 'lastrevid' in pagedict:
3127        page.latest_revision_id = pagedict['lastrevid']
3128
3129    if 'imageinfo' in pagedict:
3130        if not isinstance(page, pywikibot.FilePage):
3131            raise RuntimeError(
3132                '"imageinfo" found but {} is not a FilePage object'
3133                .format(page))
3134        page._load_file_revisions(pagedict['imageinfo'])
3135
3136    if 'categoryinfo' in pagedict:
3137        page._catinfo = pagedict['categoryinfo']
3138
3139    if 'templates' in pagedict:
3140        _update_templates(page, pagedict['templates'])
3141    elif 'templates' in props:
3142        page._templates = []
3143
3144    if 'langlinks' in pagedict:
3145        _update_langlinks(page, pagedict['langlinks'])
3146    elif 'langlinks' in props:
3147        page._langlinks = []
3148
3149    if 'coordinates' in pagedict:
3150        _update_coordinates(page, pagedict['coordinates'])
3151
3152    if 'pageimage' in pagedict:
3153        page._pageimage = pywikibot.FilePage(page.site, pagedict['pageimage'])
3154
3155    if 'pageprops' in pagedict:
3156        page._pageprops = pagedict['pageprops']
3157    elif 'pageprops' in props:
3158        page._pageprops = {}
3159
3160    if 'preload' in pagedict:
3161        page._preloadedtext = pagedict['preload']
3162
3163    if 'flowinfo' in pagedict:
3164        page._flowinfo = pagedict['flowinfo']['flow']
3165
3166    if 'lintId' in pagedict:
3167        page._lintinfo = pagedict
3168        page._lintinfo.pop('pageid')
3169        page._lintinfo.pop('title')
3170        page._lintinfo.pop('ns')
3171
3172
3173wrapper = ModuleDeprecationWrapper(__name__)
3174wrapper.add_deprecated_attr(
3175    'APIError', replacement_name='pywikibot.exceptions.APIError',
3176    since='20210423')
3177wrapper.add_deprecated_attr(
3178    'UploadWarning', replacement_name='pywikibot.exceptions.UploadError',
3179    since='20210423')
3180wrapper.add_deprecated_attr(
3181    'APIMWException', replacement_name='pywikibot.exceptions.APIMWError',
3182    since='20210423')
3183