1"""Objects representing API generators to MediaWiki site."""
2#
3# (C) Pywikibot team, 2008-2021
4#
5# Distributed under the terms of the MIT license.
6#
7import heapq
8import itertools
9import typing
10from contextlib import suppress
11from itertools import zip_longest
12from typing import Any, Optional, Union
13from warnings import warn
14
15import pywikibot
16import pywikibot.family
17from pywikibot.backports import Dict, List
18from pywikibot.data import api
19from pywikibot.exceptions import (
20    APIError,
21    Error,
22    InconsistentTitleError,
23    InvalidTitleError,
24    NoPageError,
25    UserRightsError,
26)
27from pywikibot.site._decorators import need_right, need_version
28from pywikibot.tools import (
29    deprecated,
30    deprecated_args,
31    filter_unique,
32    is_ip_address,
33    issue_deprecation_warning,
34    itergroup,
35    remove_last_args,
36)
37
38
39_logger = 'wiki.apisite'
40
41
42class GeneratorsMixin:
43
44    """API generators mixin to MediaWiki site."""
45
46    def load_pages_from_pageids(self, pageids):
47        """
48        Return a page generator from pageids.
49
50        Pages are iterated in the same order than in the underlying pageids.
51
52        Pageids are filtered and only one page is returned in case of
53        duplicate pageids.
54
55        :param pageids: an iterable that returns pageids (str or int),
56            or a comma- or pipe-separated string of pageids
57            (e.g. '945097,1483753, 956608' or '945097|483753|956608')
58        """
59        if not pageids:
60            return
61        if isinstance(pageids, str):
62            pageids = pageids.replace('|', ',')
63            pageids = pageids.split(',')
64            pageids = [p.strip() for p in pageids]
65
66        # Validate pageids.
67        gen = (str(int(p)) for p in pageids if int(p) > 0)
68
69        # Find out how many pages can be specified at a time.
70        parameter = self._paraminfo.parameter('query+info', 'prop')
71        if self.logged_in() and self.has_right('apihighlimits'):
72            groupsize = int(parameter['highlimit'])
73        else:
74            groupsize = int(parameter['limit'])
75
76        for sublist in itergroup(filter_unique(gen), groupsize):
77            # Store the order of the input data.
78            priority_dict = dict(zip(sublist, range(len(sublist))))
79
80            prio_queue = []
81            next_prio = 0
82            params = {'pageids': sublist, }
83            rvgen = api.PropertyGenerator('info', site=self, parameters=params)
84
85            for pagedata in rvgen:
86                title = pagedata['title']
87                pageid = str(pagedata['pageid'])
88                page = pywikibot.Page(pywikibot.Link(title, source=self))
89                api.update_page(page, pagedata)
90                priority, page = heapq.heappushpop(prio_queue,
91                                                   (priority_dict[pageid],
92                                                    page))
93                # Smallest priority matches expected one; yield early.
94                if priority == next_prio:
95                    yield page
96                    next_prio += 1
97                else:
98                    # Push onto the heap.
99                    heapq.heappush(prio_queue, (priority, page))
100
101            # Extract data in the same order of the input data.
102            while prio_queue:
103                priority, page = heapq.heappop(prio_queue)
104                yield page
105
106    def preloadpages(self, pagelist, *, groupsize=50, templates=False,
107                     langlinks=False, pageprops=False):
108        """Return a generator to a list of preloaded pages.
109
110        Pages are iterated in the same order than in the underlying pagelist.
111        In case of duplicates in a groupsize batch, return the first entry.
112
113        :param pagelist: an iterable that returns Page objects
114        :param groupsize: how many Pages to query at a time
115        :type groupsize: int
116        :param templates: preload pages (typically templates) transcluded in
117            the provided pages
118        :type templates: bool
119        :param langlinks: preload all language links from the provided pages
120            to other languages
121        :type langlinks: bool
122        :param pageprops: preload various properties defined in page content
123        :type pageprops: bool
124
125        """
126        props = 'revisions|info|categoryinfo'
127        if templates:
128            props += '|templates'
129        if langlinks:
130            props += '|langlinks'
131        if pageprops:
132            props += '|pageprops'
133
134        parameter = self._paraminfo.parameter('query+info', 'prop')
135        if self.logged_in() and self.has_right('apihighlimits'):
136            max_ids = int(parameter['highlimit'])
137        else:
138            max_ids = int(parameter['limit'])  # T78333, T161783
139
140        for sublist in itergroup(pagelist, min(groupsize, max_ids)):
141            # Do not use p.pageid property as it will force page loading.
142            pageids = [str(p._pageid) for p in sublist
143                       if hasattr(p, '_pageid') and p._pageid > 0]
144            cache = {}
145            # In case of duplicates, return the first entry.
146            for priority, page in enumerate(sublist):
147                try:
148                    cache.setdefault(page.title(with_section=False),
149                                     (priority, page))
150                except InvalidTitleError:
151                    pywikibot.exception()
152
153            prio_queue = []
154            next_prio = 0
155            rvgen = api.PropertyGenerator(props, site=self)
156            rvgen.set_maximum_items(-1)  # suppress use of "rvlimit" parameter
157
158            if len(pageids) == len(sublist) and len(set(pageids)) <= max_ids:
159                # only use pageids if all pages have them
160                rvgen.request['pageids'] = set(pageids)
161            else:
162                rvgen.request['titles'] = list(cache.keys())
163            rvgen.request['rvprop'] = self._rvprops(content=True)
164            pywikibot.output('Retrieving {} pages from {}.'
165                             .format(len(cache), self))
166
167            for pagedata in rvgen:
168                pywikibot.debug('Preloading {}'.format(pagedata), _logger)
169                try:
170                    if pagedata['title'] not in cache:
171                        # API always returns a "normalized" title which is
172                        # usually the same as the canonical form returned by
173                        # page.title(), but sometimes not (e.g.,
174                        # gender-specific localizations of "User" namespace).
175                        # This checks to see if there is a normalized title in
176                        # the response that corresponds to the canonical form
177                        # used in the query.
178                        for key in cache:
179                            if self.sametitle(key, pagedata['title']):
180                                cache[pagedata['title']] = cache[key]
181                                break
182                        else:
183                            pywikibot.warning(
184                                'preloadpages: Query returned unexpected '
185                                "title '{}'".format(pagedata['title']))
186                            continue
187                except KeyError:
188                    pywikibot.debug("No 'title' in {}"
189                                    .format(pagedata), _logger)
190                    pywikibot.debug('pageids={}'.format(pageids), _logger)
191                    pywikibot.debug('titles={}'
192                                    .format(list(cache.keys())), _logger)
193                    continue
194                priority, page = cache[pagedata['title']]
195                api.update_page(page, pagedata, rvgen.props)
196                priority, page = heapq.heappushpop(prio_queue,
197                                                   (priority, page))
198                # Smallest priority matches expected one; yield.
199                if priority == next_prio:
200                    yield page
201                    next_prio += 1
202                else:
203                    # Push back onto the heap.
204                    heapq.heappush(prio_queue, (priority, page))
205
206            # Empty the heap.
207            while prio_queue:
208                priority, page = heapq.heappop(prio_queue)
209                yield page
210
211    @deprecated_args(
212        followRedirects='follow_redirects', filterRedirects='filter_redirects')
213    def pagebacklinks(self, page, *, follow_redirects=False,
214                      filter_redirects=None, namespaces=None, total=None,
215                      content=False):
216        """Iterate all pages that link to the given page.
217
218        :see: https://www.mediawiki.org/wiki/API:Backlinks
219
220        :param page: The Page to get links to.
221        :param follow_redirects: Also return links to redirects pointing to
222            the given page.
223        :param filter_redirects: If True, only return redirects to the given
224            page. If False, only return non-redirect links. If None, return
225            both (no filtering).
226        :param namespaces: If present, only return links from the namespaces
227            in this list.
228        :type namespaces: iterable of str or Namespace key,
229            or a single instance of those types. May be a '|' separated
230            list of namespace identifiers.
231        :param total: Maximum number of pages to retrieve in total.
232        :param content: if True, load the current content of each iterated page
233            (default False)
234        :rtype: typing.Iterable[pywikibot.Page]
235        :raises KeyError: a namespace identifier was not resolved
236        :raises TypeError: a namespace identifier has an inappropriate
237            type such as NoneType or bool
238        """
239        bltitle = page.title(with_section=False).encode(self.encoding())
240        blargs = {'gbltitle': bltitle}
241        if filter_redirects is not None:
242            blargs['gblfilterredir'] = ('redirects' if filter_redirects
243                                        else 'nonredirects')
244        blgen = self._generator(api.PageGenerator, type_arg='backlinks',
245                                namespaces=namespaces, total=total,
246                                g_content=content, **blargs)
247        if follow_redirects:
248            # links identified by MediaWiki as redirects may not really be,
249            # so we have to check each "redirect" page and see if it
250            # really redirects to this page
251            # see fixed MediaWiki bug T9304
252            redirgen = self._generator(api.PageGenerator,
253                                       type_arg='backlinks',
254                                       gbltitle=bltitle,
255                                       gblfilterredir='redirects')
256            genlist = {None: blgen}
257            for redir in redirgen:
258                if redir == page:
259                    # if a wiki contains pages whose titles contain
260                    # namespace aliases that existed before those aliases
261                    # were defined (example: [[WP:Sandbox]] existed as a
262                    # redirect to [[Wikipedia:Sandbox]] before the WP: alias
263                    # was created) they can be returned as redirects to
264                    # themselves; skip these
265                    continue
266                if redir.getRedirectTarget() == page:
267                    genlist[redir.title()] = self.pagebacklinks(
268                        redir, follow_redirects=True,
269                        filter_redirects=filter_redirects,
270                        namespaces=namespaces,
271                        content=content
272                    )
273            return itertools.chain(*genlist.values())
274        return blgen
275
276    @deprecated_args(step=True, filterRedirects='filter_redirects')
277    def page_embeddedin(self, page, *, filter_redirects=None, namespaces=None,
278                        total=None, content=False):
279        """Iterate all pages that embedded the given page as a template.
280
281        :see: https://www.mediawiki.org/wiki/API:Embeddedin
282
283        :param page: The Page to get inclusions for.
284        :param filter_redirects: If True, only return redirects that embed
285            the given page. If False, only return non-redirect links. If
286            None, return both (no filtering).
287        :param namespaces: If present, only return links from the namespaces
288            in this list.
289        :type namespaces: iterable of str or Namespace key,
290            or a single instance of those types. May be a '|' separated
291            list of namespace identifiers.
292        :param content: if True, load the current content of each iterated page
293            (default False)
294        :rtype: typing.Iterable[pywikibot.Page]
295        :raises KeyError: a namespace identifier was not resolved
296        :raises TypeError: a namespace identifier has an inappropriate
297            type such as NoneType or bool
298        """
299        eiargs = {'geititle':
300                  page.title(with_section=False).encode(self.encoding())}
301        if filter_redirects is not None:
302            eiargs['geifilterredir'] = ('redirects' if filter_redirects
303                                        else 'nonredirects')
304        return self._generator(api.PageGenerator, type_arg='embeddedin',
305                               namespaces=namespaces, total=total,
306                               g_content=content, **eiargs)
307
308    @deprecated_args(
309        step=None, followRedirects='follow_redirects',
310        filterRedirects='filter_redirects',
311        onlyTemplateInclusion='only_template_inclusion',
312        withTemplateInclusion='with_template_inclusion')
313    def pagereferences(self, page, *, follow_redirects=False,
314                       filter_redirects=None, with_template_inclusion=True,
315                       only_template_inclusion=False, namespaces=None,
316                       total=None, content=False):
317        """
318        Convenience method combining pagebacklinks and page_embeddedin.
319
320        :param namespaces: If present, only return links from the namespaces
321            in this list.
322        :type namespaces: iterable of str or Namespace key,
323            or a single instance of those types. May be a '|' separated
324            list of namespace identifiers.
325        :rtype: typing.Iterable[pywikibot.Page]
326        :raises KeyError: a namespace identifier was not resolved
327        :raises TypeError: a namespace identifier has an inappropriate
328            type such as NoneType or bool
329        """
330        if only_template_inclusion:
331            return self.page_embeddedin(page,
332                                        filter_redirects=filter_redirects,
333                                        namespaces=namespaces, total=total,
334                                        content=content)
335        if not with_template_inclusion:
336            return self.pagebacklinks(page, follow_redirects=follow_redirects,
337                                      filter_redirects=filter_redirects,
338                                      namespaces=namespaces, total=total,
339                                      content=content)
340        return itertools.islice(
341            itertools.chain(
342                self.pagebacklinks(
343                    page, follow_redirects=follow_redirects,
344                    filter_redirects=filter_redirects,
345                    namespaces=namespaces, content=content),
346                self.page_embeddedin(
347                    page, filter_redirects=filter_redirects,
348                    namespaces=namespaces, content=content)
349            ), total)
350
351    @deprecated_args(step=True)
352    def pagelinks(self, page, *, namespaces=None, follow_redirects=False,
353                  total=None, content=False):
354        """Iterate internal wikilinks contained (or transcluded) on page.
355
356        :see: https://www.mediawiki.org/wiki/API:Links
357
358        :param namespaces: Only iterate pages in these namespaces
359            (default: all)
360        :type namespaces: iterable of str or Namespace key,
361            or a single instance of those types. May be a '|' separated
362            list of namespace identifiers.
363        :param follow_redirects: if True, yields the target of any redirects,
364            rather than the redirect page
365        :param content: if True, load the current content of each iterated page
366            (default False)
367        :raises KeyError: a namespace identifier was not resolved
368        :raises TypeError: a namespace identifier has an inappropriate
369            type such as NoneType or bool
370        """
371        plargs = {}
372        if hasattr(page, '_pageid'):
373            plargs['pageids'] = str(page._pageid)
374        else:
375            pltitle = page.title(with_section=False).encode(self.encoding())
376            plargs['titles'] = pltitle
377        return self._generator(api.PageGenerator, type_arg='links',
378                               namespaces=namespaces, total=total,
379                               g_content=content, redirects=follow_redirects,
380                               **plargs)
381
382    # Sortkey doesn't work with generator
383    @deprecated_args(withSortKey=True, step=True)
384    def pagecategories(self, page, *, total=None, content=False):
385        """Iterate categories to which page belongs.
386
387        :see: https://www.mediawiki.org/wiki/API:Categories
388
389        :param content: if True, load the current content of each iterated page
390            (default False); note that this means the contents of the
391            category description page, not the pages contained in the category
392        """
393        clargs = {}
394        if hasattr(page, '_pageid'):
395            clargs['pageids'] = str(page._pageid)
396        else:
397            clargs['titles'] = page.title(
398                with_section=False).encode(self.encoding())
399        return self._generator(api.PageGenerator,
400                               type_arg='categories', total=total,
401                               g_content=content, **clargs)
402
403    @deprecated_args(step=True)
404    def pageimages(self, page, *, total=None, content=False):
405        """Iterate images used (not just linked) on the page.
406
407        :see: https://www.mediawiki.org/wiki/API:Images
408
409        :param content: if True, load the current content of each iterated page
410            (default False); note that this means the content of the image
411            description page, not the image itself
412
413        """
414        imtitle = page.title(with_section=False).encode(self.encoding())
415        return self._generator(api.PageGenerator, type_arg='images',
416                               titles=imtitle, total=total,
417                               g_content=content)
418
419    @deprecated_args(step=True)
420    def pagetemplates(self, page, *, namespaces=None, total=None,
421                      content=False):
422        """Iterate templates transcluded (not just linked) on the page.
423
424        :see: https://www.mediawiki.org/wiki/API:Templates
425
426        :param namespaces: Only iterate pages in these namespaces
427        :type namespaces: iterable of str or Namespace key,
428            or a single instance of those types. May be a '|' separated
429            list of namespace identifiers.
430        :param content: if True, load the current content of each iterated page
431            (default False)
432
433        :raises KeyError: a namespace identifier was not resolved
434        :raises TypeError: a namespace identifier has an inappropriate
435            type such as NoneType or bool
436        """
437        tltitle = page.title(with_section=False).encode(self.encoding())
438        return self._generator(api.PageGenerator, type_arg='templates',
439                               titles=tltitle, namespaces=namespaces,
440                               total=total, g_content=content)
441
442    @deprecated_args(step=True, startsort=True, endsort=True)
443    def categorymembers(self, category, *,
444                        namespaces=None,
445                        sortby: Optional[str] = None,
446                        reverse: bool = False,
447                        starttime=None,
448                        endtime=None,
449                        total: Optional[int] = None,
450                        content: bool = False,
451                        member_type=None,
452                        startprefix: Optional[str] = None,
453                        endprefix: Optional[str] = None):
454        """Iterate members of specified category.
455
456        :see: https://www.mediawiki.org/wiki/API:Categorymembers
457
458        :param category: The Category to iterate.
459        :param namespaces: If present, only return category members from
460            these namespaces. To yield subcategories or files, use
461            parameter member_type instead.
462        :type namespaces: iterable of str or Namespace key,
463            or a single instance of those types. May be a '|' separated
464            list of namespace identifiers.
465        :param sortby: determines the order in which results are generated,
466            valid values are "sortkey" (default, results ordered by category
467            sort key) or "timestamp" (results ordered by time page was
468            added to the category)
469        :param reverse: if True, generate results in reverse order
470            (default False)
471        :param starttime: if provided, only generate pages added after this
472            time; not valid unless sortby="timestamp"
473        :type starttime: pywikibot.Timestamp
474        :param endtime: if provided, only generate pages added before this
475            time; not valid unless sortby="timestamp"
476        :param startprefix: if provided, only generate pages >= this title
477            lexically; not valid if sortby="timestamp"
478        :param endprefix: if provided, only generate pages < this title
479            lexically; not valid if sortby="timestamp"
480        :param content: if True, load the current content of each iterated page
481            (default False)
482        :param member_type: member type; if member_type includes 'page' and is
483            used in conjunction with sortby="timestamp", the API may limit
484            results to only pages in the first 50 namespaces.
485        :type member_type: str or iterable of str;
486            values: page, subcat, file
487        :rtype: typing.Iterable[pywikibot.Page]
488        :raises KeyError: a namespace identifier was not resolved
489        :raises TypeError: a namespace identifier has an inappropriate
490            type such as NoneType or bool
491        """
492        if category.namespace() != 14:
493            raise Error('categorymembers: non-Category page {!r} specified'
494                        .format(category))
495
496        cmtitle = category.title(with_section=False).encode(self.encoding())
497        cmargs = {
498            'type_arg': 'categorymembers',
499            'gcmtitle': cmtitle,
500            'gcmprop': 'ids|title|sortkey'
501        }
502
503        if sortby in ['sortkey', 'timestamp']:
504            cmargs['gcmsort'] = sortby
505        elif sortby:
506            raise ValueError('categorymembers: invalid sortby value {!r}'
507                             .format(sortby))
508
509        if starttime and endtime and starttime > endtime:
510            raise ValueError(
511                'categorymembers: starttime must be before endtime')
512        if startprefix and endprefix and startprefix > endprefix:
513            raise ValueError(
514                'categorymembers: startprefix must be less than endprefix')
515
516        if isinstance(member_type, str):
517            member_type = {member_type}
518
519        if member_type and sortby == 'timestamp':
520            # Covert namespaces to a known type
521            namespaces = set(self.namespaces.resolve(namespaces or []))
522
523            if 'page' in member_type:
524                excluded_namespaces = set()
525                if 'file' not in member_type:
526                    excluded_namespaces.add(6)
527                if 'subcat' not in member_type:
528                    excluded_namespaces.add(14)
529
530                if namespaces:
531                    if excluded_namespaces.intersection(namespaces):
532                        raise ValueError(
533                            'incompatible namespaces {!r} and member_type {!r}'
534                            .format(namespaces, member_type))
535                    # All excluded namespaces are not present in `namespaces`.
536                else:
537                    # If the number of namespaces is greater than permitted by
538                    # the API, it will issue a warning and use the namespaces
539                    # up until the limit, which will usually be sufficient.
540                    # TODO: QueryGenerator should detect when the number of
541                    # namespaces requested is higher than available, and split
542                    # the request into several batches.
543                    excluded_namespaces.update([-1, -2])
544                    namespaces = set(self.namespaces) - excluded_namespaces
545            else:
546                if 'file' in member_type:
547                    namespaces.add(6)
548                if 'subcat' in member_type:
549                    namespaces.add(14)
550
551            member_type = None
552
553        if member_type:
554            cmargs['gcmtype'] = member_type
555
556        if reverse:
557            cmargs['gcmdir'] = 'desc'
558            # API wants start/end params in opposite order if using descending
559            # sort; we take care of this reversal for the user
560            starttime, endtime = endtime, starttime
561            startprefix, endprefix = endprefix, startprefix
562
563        if starttime and sortby == 'timestamp':
564            cmargs['gcmstart'] = starttime
565        elif starttime:
566            raise ValueError('categorymembers: '
567                             "invalid combination of 'sortby' and 'starttime'")
568
569        if endtime and sortby == 'timestamp':
570            cmargs['gcmend'] = endtime
571        elif endtime:
572            raise ValueError('categorymembers: '
573                             "invalid combination of 'sortby' and 'endtime'")
574
575        if startprefix and sortby != 'timestamp':
576            cmargs['gcmstartsortkeyprefix'] = startprefix
577        elif startprefix:
578            raise ValueError('categorymembers: invalid combination of '
579                             "'sortby' and 'startprefix'")
580
581        if endprefix and sortby != 'timestamp':
582            cmargs['gcmendsortkeyprefix'] = endprefix
583        elif endprefix:
584            raise ValueError('categorymembers: '
585                             "invalid combination of 'sortby' and 'endprefix'")
586
587        return self._generator(api.PageGenerator, namespaces=namespaces,
588                               total=total, g_content=content, **cmargs)
589
590    def _rvprops(self, content: bool = False) -> List[str]:
591        """Setup rvprop items for loadrevisions and preloadpages.
592
593        :return: rvprop items
594        """
595        props = ['comment', 'contentmodel', 'flags', 'ids', 'parsedcomment',
596                 'sha1', 'size', 'tags', 'timestamp', 'user', 'userid']
597        if content:
598            props.append('content')
599        if self.mw_version >= '1.32':
600            props.append('roles')
601        return props
602
603    @deprecated_args(getText='content', sysop=True)
604    @remove_last_args(['rollback'])
605    def loadrevisions(self, page, *, content=False, section=None, **kwargs):
606        """Retrieve revision information and store it in page object.
607
608        By default, retrieves the last (current) revision of the page,
609        unless any of the optional parameters revids, startid, endid,
610        starttime, endtime, rvdir, user, excludeuser, or total are
611        specified. Unless noted below, all parameters not specified
612        default to False.
613
614        If rvdir is False or not specified, startid must be greater than
615        endid if both are specified; likewise, starttime must be greater
616        than endtime. If rvdir is True, these relationships are reversed.
617
618        :see: https://www.mediawiki.org/wiki/API:Revisions
619
620        :param page: retrieve revisions of this Page and hold the data.
621        :type page: pywikibot.Page
622        :param content: if True, retrieve the wiki-text of each revision;
623            otherwise, only retrieve the revision metadata (default)
624        :type content: bool
625        :param section: if specified, retrieve only this section of the text
626            (content must be True); section must be given by number (top of
627            the article is section 0), not name
628        :type section: int
629        :keyword revids: retrieve only the specified revision ids (raise
630            Exception if any of revids does not correspond to page)
631        :type revids: an int, a str or a list of ints or strings
632        :keyword startid: retrieve revisions starting with this revid
633        :keyword endid: stop upon retrieving this revid
634        :keyword starttime: retrieve revisions starting at this Timestamp
635        :keyword endtime: stop upon reaching this Timestamp
636        :keyword rvdir: if false, retrieve newest revisions first (default);
637            if true, retrieve oldest first
638        :keyword user: retrieve only revisions authored by this user
639        :keyword excludeuser: retrieve all revisions not authored by this user
640        :keyword total: number of revisions to retrieve
641        :raises ValueError: invalid startid/endid or starttime/endtime values
642        :raises pywikibot.exceptions.Error: revids belonging to a different
643            page
644        """
645        latest = all(val is None for val in kwargs.values())
646
647        revids = kwargs.get('revids')
648        startid = kwargs.get('startid')
649        starttime = kwargs.get('starttime')
650        endid = kwargs.get('endid')
651        endtime = kwargs.get('endtime')
652        rvdir = kwargs.get('rvdir')
653        user = kwargs.get('user')
654        step = kwargs.get('step')
655
656        # check for invalid argument combinations
657        if (startid is not None or endid is not None) \
658           and (starttime is not None or endtime is not None):
659            raise ValueError(
660                'loadrevisions: startid/endid combined with starttime/endtime')
661
662        if starttime is not None and endtime is not None:
663            if rvdir and starttime >= endtime:
664                raise ValueError(
665                    'loadrevisions: starttime > endtime with rvdir=True')
666
667            if not rvdir and endtime >= starttime:
668                raise ValueError(
669                    'loadrevisions: endtime > starttime with rvdir=False')
670
671        if startid is not None and endid is not None:
672            if rvdir and startid >= endid:
673                raise ValueError(
674                    'loadrevisions: startid > endid with rvdir=True')
675            if not rvdir and endid >= startid:
676                raise ValueError(
677                    'loadrevisions: endid > startid with rvdir=False')
678
679        rvargs = {'type_arg': 'info|revisions'}
680        rvargs['rvprop'] = self._rvprops(content=content)
681
682        if content and section is not None:
683            rvargs['rvsection'] = str(section)
684
685        if revids is None:
686            rvtitle = page.title(with_section=False).encode(self.encoding())
687            rvargs['titles'] = rvtitle
688        else:
689            if isinstance(revids, (int, str)):
690                ids = str(revids)
691            else:
692                ids = '|'.join(str(r) for r in revids)
693            rvargs['revids'] = ids
694
695        if rvdir:
696            rvargs['rvdir'] = 'newer'
697        elif rvdir is not None:
698            rvargs['rvdir'] = 'older'
699
700        if startid:
701            rvargs['rvstartid'] = startid
702        if endid:
703            rvargs['rvendid'] = endid
704        if starttime:
705            rvargs['rvstart'] = starttime
706        if endtime:
707            rvargs['rvend'] = endtime
708
709        if user:
710            rvargs['rvuser'] = user
711        else:
712            rvargs['rvexcludeuser'] = kwargs.get('excludeuser')
713
714        # assemble API request
715        rvgen = self._generator(api.PropertyGenerator,
716                                total=kwargs.get('total'), **rvargs)
717
718        if step:
719            rvgen.set_query_increment = step
720
721        if latest or 'revids' in rvgen.request:
722            rvgen.set_maximum_items(-1)  # suppress use of rvlimit parameter
723
724        for pagedata in rvgen:
725            if not self.sametitle(pagedata['title'],
726                                  page.title(with_section=False)):
727                raise InconsistentTitleError(page, pagedata['title'])
728            if 'missing' in pagedata:
729                raise NoPageError(page)
730            api.update_page(page, pagedata, rvgen.props)
731
732    @deprecated_args(step=True)
733    def pagelanglinks(self, page, *,
734                      total: Optional[int] = None,
735                      include_obsolete: bool = False,
736                      include_empty_titles: bool = False):
737        """Iterate all interlanguage links on page, yielding Link objects.
738
739        *New in version 6.2:* *include_empty_titles* parameter was added.
740
741        :see: https://www.mediawiki.org/wiki/API:Langlinks
742
743        :param include_obsolete: if true, yield even Link objects whose
744            site is obsolete
745        :param include_empty_titles: if true, yield even Link objects whose
746            title is empty but redirects to a site like [[en:]]
747        """
748        lltitle = page.title(with_section=False)
749        llquery = self._generator(api.PropertyGenerator,
750                                  type_arg='langlinks',
751                                  titles=lltitle.encode(self.encoding()),
752                                  total=total)
753        for pageitem in llquery:
754            if not self.sametitle(pageitem['title'], lltitle):
755                raise InconsistentTitleError(page, pageitem['title'])
756            if 'langlinks' not in pageitem:
757                continue
758            for linkdata in pageitem['langlinks']:
759                link = pywikibot.Link.langlinkUnsafe(linkdata['lang'],
760                                                     linkdata['*'],
761                                                     source=self)
762                if link.site.obsolete and not include_obsolete:
763                    continue
764
765                if link.title or include_empty_titles:
766                    yield link
767
768    @deprecated_args(step=True)
769    def page_extlinks(self, page, *, total=None):
770        """Iterate all external links on page, yielding URL strings.
771
772        :see: https://www.mediawiki.org/wiki/API:Extlinks
773        """
774        eltitle = page.title(with_section=False)
775        elquery = self._generator(api.PropertyGenerator, type_arg='extlinks',
776                                  titles=eltitle.encode(self.encoding()),
777                                  total=total)
778        for pageitem in elquery:
779            if not self.sametitle(pageitem['title'], eltitle):
780                raise InconsistentTitleError(page, pageitem['title'])
781            if 'extlinks' not in pageitem:
782                continue
783            for linkdata in pageitem['extlinks']:
784                yield linkdata['*']
785
786    @deprecated_args(throttle=True, limit='total', step=True,
787                     includeredirects='filterredir')
788    def allpages(self, start='!', prefix='', namespace=0, filterredir=None,
789                 filterlanglinks=None, minsize=None, maxsize=None,
790                 protect_type=None, protect_level=None, reverse=False,
791                 total=None, content=False):
792        """Iterate pages in a single namespace.
793
794        :see: https://www.mediawiki.org/wiki/API:Allpages
795
796        :param start: Start at this title (page need not exist).
797        :param prefix: Only yield pages starting with this string.
798        :param namespace: Iterate pages from this (single) namespace
799        :type namespace: int or Namespace.
800        :param filterredir: if True, only yield redirects; if False (and not
801            None), only yield non-redirects (default: yield both)
802        :param filterlanglinks: if True, only yield pages with language links;
803            if False (and not None), only yield pages without language links
804            (default: yield both)
805        :param minsize: if present, only yield pages at least this many
806            bytes in size
807        :param maxsize: if present, only yield pages at most this many bytes
808            in size
809        :param protect_type: only yield pages that have a protection of the
810            specified type
811        :type protect_type: str
812        :param protect_level: only yield pages that have protection at this
813            level; can only be used if protect_type is specified
814        :param reverse: if True, iterate in reverse Unicode lexigraphic
815            order (default: iterate in forward order)
816        :param content: if True, load the current content of each iterated page
817            (default False)
818        :raises KeyError: the namespace identifier was not resolved
819        :raises TypeError: the namespace identifier has an inappropriate
820            type such as bool, or an iterable with more than one namespace
821        """
822        # backward compatibility test
823        if filterredir not in (True, False, None):
824            old = filterredir
825            if filterredir:
826                if filterredir == 'only':
827                    filterredir = True
828                else:
829                    filterredir = None
830            else:
831                filterredir = False
832            warn('The value "{0!r}" for "filterredir" is deprecated; use '
833                 '{1} instead.'.format(old, filterredir),
834                 DeprecationWarning, 3)
835
836        apgen = self._generator(api.PageGenerator, type_arg='allpages',
837                                namespaces=namespace,
838                                gapfrom=start, total=total,
839                                g_content=content)
840        if prefix:
841            apgen.request['gapprefix'] = prefix
842        if filterredir is not None:
843            apgen.request['gapfilterredir'] = ('redirects' if filterredir else
844                                               'nonredirects')
845        if filterlanglinks is not None:
846            apgen.request['gapfilterlanglinks'] = ('withlanglinks'
847                                                   if filterlanglinks else
848                                                   'withoutlanglinks')
849        if isinstance(minsize, int):
850            apgen.request['gapminsize'] = str(minsize)
851        if isinstance(maxsize, int):
852            apgen.request['gapmaxsize'] = str(maxsize)
853        if isinstance(protect_type, str):
854            apgen.request['gapprtype'] = protect_type
855            if isinstance(protect_level, str):
856                apgen.request['gapprlevel'] = protect_level
857        if reverse:
858            apgen.request['gapdir'] = 'descending'
859        return apgen
860
861    @deprecated_args(step=True)
862    def alllinks(self, start='!', prefix='', namespace=0, unique=False,
863                 fromids=False, total=None):
864        """Iterate all links to pages (which need not exist) in one namespace.
865
866        Note that, in practice, links that were found on pages that have
867        been deleted may not have been removed from the links table, so this
868        method can return false positives.
869
870        :see: https://www.mediawiki.org/wiki/API:Alllinks
871
872        :param start: Start at this title (page need not exist).
873        :param prefix: Only yield pages starting with this string.
874        :param namespace: Iterate pages from this (single) namespace
875        :type namespace: int or Namespace
876        :param unique: If True, only iterate each link title once (default:
877            iterate once for each linking page)
878        :param fromids: if True, include the pageid of the page containing
879            each link (default: False) as the '_fromid' attribute of the Page;
880            cannot be combined with unique
881        :raises KeyError: the namespace identifier was not resolved
882        :raises TypeError: the namespace identifier has an inappropriate
883            type such as bool, or an iterable with more than one namespace
884        """
885        if unique and fromids:
886            raise Error('alllinks: unique and fromids cannot both be True.')
887        algen = self._generator(api.ListGenerator, type_arg='alllinks',
888                                namespaces=namespace, alfrom=start,
889                                total=total, alunique=unique)
890        if prefix:
891            algen.request['alprefix'] = prefix
892        if fromids:
893            algen.request['alprop'] = 'title|ids'
894        for link in algen:
895            p = pywikibot.Page(self, link['title'], link['ns'])
896            if fromids:
897                p._fromid = link['fromid']
898            yield p
899
900    @deprecated_args(step=True)
901    def allcategories(self, start='!', prefix='', total=None,
902                      reverse=False, content=False):
903        """Iterate categories used (which need not have a Category page).
904
905        Iterator yields Category objects. Note that, in practice, links that
906        were found on pages that have been deleted may not have been removed
907        from the database table, so this method can return false positives.
908
909        :see: https://www.mediawiki.org/wiki/API:Allcategories
910
911        :param start: Start at this category title (category need not exist).
912        :param prefix: Only yield categories starting with this string.
913        :param reverse: if True, iterate in reverse Unicode lexigraphic
914            order (default: iterate in forward order)
915        :param content: if True, load the current content of each iterated page
916            (default False); note that this means the contents of the category
917            description page, not the pages that are members of the category
918        """
919        acgen = self._generator(api.PageGenerator,
920                                type_arg='allcategories', gacfrom=start,
921                                total=total, g_content=content)
922        if prefix:
923            acgen.request['gacprefix'] = prefix
924        if reverse:
925            acgen.request['gacdir'] = 'descending'
926        return acgen
927
928    @deprecated_args(step=True)
929    def botusers(self, total=None):
930        """Iterate bot users.
931
932        Iterated values are dicts containing 'name', 'userid', 'editcount',
933        'registration', and 'groups' keys. 'groups' will be present only if
934        the user is a member of at least 1 group, and will be a list of
935        str; all the other values are str and should always be present.
936        """
937        if not hasattr(self, '_bots'):
938            self._bots = {}
939
940        if not self._bots:
941            for item in self.allusers(group='bot', total=total):
942                self._bots.setdefault(item['name'], item)
943
944        yield from self._bots.values()
945
946    @deprecated_args(step=True)
947    def allusers(self, start='!', prefix='', group=None, total=None):
948        """Iterate registered users, ordered by username.
949
950        Iterated values are dicts containing 'name', 'editcount',
951        'registration', and (sometimes) 'groups' keys. 'groups' will be
952        present only if the user is a member of at least 1 group, and
953        will be a list of str; all the other values are str and should
954        always be present.
955
956        :see: https://www.mediawiki.org/wiki/API:Allusers
957
958        :param start: start at this username (name need not exist)
959        :param prefix: only iterate usernames starting with this substring
960        :param group: only iterate users that are members of this group
961        :type group: str
962        """
963        augen = self._generator(api.ListGenerator, type_arg='allusers',
964                                auprop='editcount|groups|registration',
965                                aufrom=start, total=total)
966        if prefix:
967            augen.request['auprefix'] = prefix
968        if group:
969            augen.request['augroup'] = group
970        return augen
971
972    @deprecated_args(step=True)
973    def allimages(self, start='!', prefix='', minsize=None, maxsize=None,
974                  reverse=False, sha1=None, sha1base36=None,
975                  total=None, content=False):
976        """Iterate all images, ordered by image title.
977
978        Yields FilePages, but these pages need not exist on the wiki.
979
980        :see: https://www.mediawiki.org/wiki/API:Allimages
981
982        :param start: start at this title (name need not exist)
983        :param prefix: only iterate titles starting with this substring
984        :param minsize: only iterate images of at least this many bytes
985        :param maxsize: only iterate images of no more than this many bytes
986        :param reverse: if True, iterate in reverse lexigraphic order
987        :param sha1: only iterate image (it is theoretically possible there
988            could be more than one) with this sha1 hash
989        :param sha1base36: same as sha1 but in base 36
990        :param content: if True, load the current content of each iterated page
991            (default False); note that this means the content of the image
992            description page, not the image itself
993        """
994        aigen = self._generator(api.PageGenerator,
995                                type_arg='allimages', gaifrom=start,
996                                total=total, g_content=content)
997        if prefix:
998            aigen.request['gaiprefix'] = prefix
999        if isinstance(minsize, int):
1000            aigen.request['gaiminsize'] = str(minsize)
1001        if isinstance(maxsize, int):
1002            aigen.request['gaimaxsize'] = str(maxsize)
1003        if reverse:
1004            aigen.request['gaidir'] = 'descending'
1005        if sha1:
1006            aigen.request['gaisha1'] = sha1
1007        if sha1base36:
1008            aigen.request['gaisha1base36'] = sha1base36
1009        return aigen
1010
1011    @deprecated_args(limit='total')  # ignore falimit setting
1012    def filearchive(self, start=None, end=None, reverse=False, total=None,
1013                    **kwargs):
1014        """Iterate archived files.
1015
1016        Yields dict of file archive informations.
1017
1018        :see: https://www.mediawiki.org/wiki/API:filearchive
1019
1020        :param start: start at this title (name need not exist)
1021        :param end: end at this title (name need not exist)
1022        :param reverse: if True, iterate in reverse lexigraphic order
1023        :param total: maximum number of pages to retrieve in total
1024        :keyword prefix: only iterate titles starting with this substring
1025        :keyword sha1: only iterate image with this sha1 hash
1026        :keyword sha1base36: same as sha1 but in base 36
1027        :keyword prop: Image information to get. Default is timestamp
1028        """
1029        if start and end:
1030            self.assert_valid_iter_params(
1031                'filearchive', start, end, reverse, is_ts=False)
1032        fagen = self._generator(api.ListGenerator,
1033                                type_arg='filearchive',
1034                                fafrom=start,
1035                                fato=end,
1036                                total=total)
1037        for k, v in kwargs.items():
1038            fagen.request['fa' + k] = v
1039        if reverse:
1040            fagen.request['fadir'] = 'descending'
1041        return fagen
1042
1043    @deprecated_args(step=True)
1044    def blocks(self, starttime=None, endtime=None, reverse=False,
1045               blockids=None, users=None, iprange: Optional[str] = None,
1046               total: Optional[int] = None):
1047        """Iterate all current blocks, in order of creation.
1048
1049        The iterator yields dicts containing keys corresponding to the
1050        block properties.
1051
1052        :see: https://www.mediawiki.org/wiki/API:Blocks
1053
1054        :note: logevents only logs user blocks, while this method
1055            iterates all blocks including IP ranges.
1056        :note: ``iprange`` parameter cannot be used together with ``users``.
1057
1058        :param starttime: start iterating at this Timestamp
1059        :type starttime: pywikibot.Timestamp
1060        :param endtime: stop iterating at this Timestamp
1061        :type endtime: pywikibot.Timestamp
1062        :param reverse: if True, iterate oldest blocks first (default: newest)
1063        :type reverse: bool
1064        :param blockids: only iterate blocks with these id numbers. Numbers
1065            must be separated by '|' if given by a str.
1066        :type blockids: str, tuple or list
1067        :param users: only iterate blocks affecting these usernames or IPs
1068        :type users: str, tuple or list
1069        :param iprange: a single IP or an IP range. Ranges broader than
1070            IPv4/16 or IPv6/19 are not accepted.
1071        :param total: total amount of block entries
1072        """
1073        if starttime and endtime:
1074            self.assert_valid_iter_params('blocks', starttime, endtime,
1075                                          reverse)
1076        bkgen = self._generator(api.ListGenerator, type_arg='blocks',
1077                                total=total)
1078        bkgen.request['bkprop'] = ['id', 'user', 'by', 'timestamp', 'expiry',
1079                                   'reason', 'range', 'flags', 'userid']
1080        if starttime:
1081            bkgen.request['bkstart'] = starttime
1082        if endtime:
1083            bkgen.request['bkend'] = endtime
1084        if reverse:
1085            bkgen.request['bkdir'] = 'newer'
1086        if blockids:
1087            bkgen.request['bkids'] = blockids
1088        if users:
1089            if isinstance(users, str):
1090                users = users.split('|')
1091
1092            # actual IPv6 addresses (anonymous users) are uppercase, but they
1093            # have never a :: in the username (so those are registered users)
1094            users = [user.upper() if is_ip_address(user) and '::' not in user
1095                     else user for user in users]
1096            bkgen.request['bkusers'] = users
1097        elif iprange:
1098            bkgen.request['bkip'] = iprange
1099        return bkgen
1100
1101    @deprecated_args(step=True)
1102    def exturlusage(self, url: Optional[str] = None,
1103                    protocol: Optional[str] = None, namespaces=None,
1104                    total: Optional[int] = None, content=False):
1105        """Iterate Pages that contain links to the given URL.
1106
1107        :see: https://www.mediawiki.org/wiki/API:Exturlusage
1108
1109        :param url: The URL to search for (with or without the protocol
1110            prefix); this may include a '*' as a wildcard, only at the start
1111            of the hostname
1112        :param namespaces: list of namespace numbers to fetch contribs from
1113        :type namespaces: list of int
1114        :param total: Maximum number of pages to retrieve in total
1115        :param protocol: Protocol to search for, likely http or https, http by
1116                default. Full list shown on Special:LinkSearch wikipage
1117        """
1118        if url is not None:
1119            found_protocol, _, url = url.rpartition('://')
1120
1121            # If url is * we make it None in order to search for every page
1122            # with any URL.
1123            if url == '*':
1124                url = None
1125
1126            if found_protocol:
1127                if protocol and protocol != found_protocol:
1128                    raise ValueError('Protocol was specified, but a different '
1129                                     'one was found in searched url')
1130                protocol = found_protocol
1131
1132        if not protocol:
1133            protocol = 'http'
1134
1135        return self._generator(api.PageGenerator, type_arg='exturlusage',
1136                               geuquery=url, geuprotocol=protocol,
1137                               namespaces=namespaces,
1138                               total=total, g_content=content)
1139
1140    @deprecated_args(step=True)
1141    def imageusage(self, image, namespaces=None, filterredir=None,
1142                   total=None, content=False):
1143        """Iterate Pages that contain links to the given FilePage.
1144
1145        :see: https://www.mediawiki.org/wiki/API:Imageusage
1146
1147        :param image: the image to search for (FilePage need not exist on
1148            the wiki)
1149        :type image: pywikibot.FilePage
1150        :param namespaces: If present, only iterate pages in these namespaces
1151        :type namespaces: iterable of str or Namespace key,
1152            or a single instance of those types. May be a '|' separated
1153            list of namespace identifiers.
1154        :param filterredir: if True, only yield redirects; if False (and not
1155            None), only yield non-redirects (default: yield both)
1156        :param content: if True, load the current content of each iterated page
1157            (default False)
1158        :raises KeyError: a namespace identifier was not resolved
1159        :raises TypeError: a namespace identifier has an inappropriate
1160            type such as NoneType or bool
1161        """
1162        iuargs = {'giutitle': image.title(with_section=False)}
1163        if filterredir is not None:
1164            iuargs['giufilterredir'] = ('redirects' if filterredir else
1165                                        'nonredirects')
1166        return self._generator(api.PageGenerator, type_arg='imageusage',
1167                               namespaces=namespaces,
1168                               total=total, g_content=content, **iuargs)
1169
1170    @deprecated_args(step=True)
1171    def logevents(self, logtype: Optional[str] = None,
1172                  user: Optional[str] = None, page=None,
1173                  namespace=None, start=None, end=None,
1174                  reverse: bool = False, tag: Optional[str] = None,
1175                  total: Optional[int] = None):
1176        """Iterate all log entries.
1177
1178        :see: https://www.mediawiki.org/wiki/API:Logevents
1179
1180        :note: logevents with logtype='block' only logs user blocks whereas
1181            site.blocks iterates all blocks including IP ranges.
1182
1183        :param logtype: only iterate entries of this type
1184            (see mediawiki api documentation for available types)
1185        :param user: only iterate entries that match this user name
1186        :param page: only iterate entries affecting this page
1187        :type page: pywikibot.Page or str
1188        :param namespace: namespace(s) to retrieve logevents from
1189        :type namespace: int or Namespace or an iterable of them
1190        :note: due to an API limitation, if namespace param contains multiple
1191            namespaces, log entries from all namespaces will be fetched from
1192            the API and will be filtered later during iteration.
1193        :param start: only iterate entries from and after this Timestamp
1194        :type start: Timestamp or ISO date string
1195        :param end: only iterate entries up to and through this Timestamp
1196        :type end: Timestamp or ISO date string
1197        :param reverse: if True, iterate oldest entries first (default: newest)
1198        :param tag: only iterate entries tagged with this tag
1199        :param total: maximum number of events to iterate
1200        :rtype: iterable
1201
1202        :raises KeyError: the namespace identifier was not resolved
1203        :raises TypeError: the namespace identifier has an inappropriate
1204            type such as bool, or an iterable with more than one namespace
1205        """
1206        if start and end:
1207            self.assert_valid_iter_params('logevents', start, end, reverse)
1208
1209        legen = self._generator(api.LogEntryListGenerator, type_arg=logtype,
1210                                total=total)
1211        if logtype is not None:
1212            legen.request['letype'] = logtype
1213        if user is not None:
1214            legen.request['leuser'] = user
1215        if page is not None:
1216            legen.request['letitle'] = page
1217        if start is not None:
1218            legen.request['lestart'] = start
1219        if end is not None:
1220            legen.request['leend'] = end
1221        if reverse:
1222            legen.request['ledir'] = 'newer'
1223        if namespace is not None:
1224            legen.set_namespace(namespace)
1225        if tag:
1226            legen.request['letag'] = tag
1227
1228        return legen
1229
1230    @deprecated_args(includeredirects='redirect', namespace='namespaces',
1231                     number='total', rcend='end', rclimit='total',
1232                     rcnamespace='namespaces', rcstart='start',
1233                     rctype='changetype', showAnon='anon', showBot='bot',
1234                     showMinor='minor', showPatrolled='patrolled',
1235                     showRedirects='redirect', topOnly='top_only')
1236    def recentchanges(self, *,
1237                      start=None,
1238                      end=None,
1239                      reverse: bool = False,
1240                      namespaces=None,
1241                      changetype: Optional[str] = None,
1242                      minor: Optional[bool] = None,
1243                      bot: Optional[bool] = None,
1244                      anon: Optional[bool] = None,
1245                      redirect: Optional[bool] = None,
1246                      patrolled: Optional[bool] = None,
1247                      top_only: bool = False,
1248                      total: Optional[int] = None,
1249                      user: Union[str, List[str], None] = None,
1250                      excludeuser: Union[str, List[str], None] = None,
1251                      tag: Optional[str] = None):
1252        """Iterate recent changes.
1253
1254        :see: https://www.mediawiki.org/wiki/API:RecentChanges
1255
1256        :param start: Timestamp to start listing from
1257        :type start: pywikibot.Timestamp
1258        :param end: Timestamp to end listing at
1259        :type end: pywikibot.Timestamp
1260        :param reverse: if True, start with oldest changes (default: newest)
1261        :param namespaces: only iterate pages in these namespaces
1262        :type namespaces: iterable of str or Namespace key,
1263            or a single instance of those types. May be a '|' separated
1264            list of namespace identifiers.
1265        :param changetype: only iterate changes of this type ("edit" for
1266            edits to existing pages, "new" for new pages, "log" for log
1267            entries)
1268        :param minor: if True, only list minor edits; if False, only list
1269            non-minor edits; if None, list all
1270        :param bot: if True, only list bot edits; if False, only list
1271            non-bot edits; if None, list all
1272        :param anon: if True, only list anon edits; if False, only list
1273            non-anon edits; if None, list all
1274        :param redirect: if True, only list edits to redirect pages; if
1275            False, only list edits to non-redirect pages; if None, list all
1276        :param patrolled: if True, only list patrolled edits; if False,
1277            only list non-patrolled edits; if None, list all
1278        :param top_only: if True, only list changes that are the latest
1279            revision (default False)
1280        :param user: if not None, only list edits by this user or users
1281        :param excludeuser: if not None, exclude edits by this user or users
1282        :param tag: a recent changes tag
1283        :raises KeyError: a namespace identifier was not resolved
1284        :raises TypeError: a namespace identifier has an inappropriate
1285            type such as NoneType or bool
1286        """
1287        if start and end:
1288            self.assert_valid_iter_params('recentchanges', start, end, reverse)
1289
1290        rcgen = self._generator(api.ListGenerator, type_arg='recentchanges',
1291                                rcprop='user|comment|timestamp|title|ids'
1292                                       '|sizes|redirect|loginfo|flags|tags',
1293                                namespaces=namespaces,
1294                                total=total, rctoponly=top_only)
1295        if start is not None:
1296            rcgen.request['rcstart'] = start
1297        if end is not None:
1298            rcgen.request['rcend'] = end
1299        if reverse:
1300            rcgen.request['rcdir'] = 'newer'
1301        if changetype:
1302            rcgen.request['rctype'] = changetype
1303        filters = {'minor': minor,
1304                   'bot': bot,
1305                   'anon': anon,
1306                   'redirect': redirect,
1307                   }
1308        if patrolled is not None and (
1309                self.has_right('patrol') or self.has_right('patrolmarks')):
1310            rcgen.request['rcprop'] += ['patrolled']
1311            filters['patrolled'] = patrolled
1312        rcgen.request['rcshow'] = api.OptionSet(self, 'recentchanges', 'show',
1313                                                filters)
1314
1315        if user:
1316            rcgen.request['rcuser'] = user
1317
1318        if excludeuser:
1319            rcgen.request['rcexcludeuser'] = excludeuser
1320        rcgen.request['rctag'] = tag
1321        return rcgen
1322
1323    @deprecated_args(number='total', step=True, key='searchstring',
1324                     getredirects=True, get_redirects=True)
1325    def search(self, searchstring: str, *,
1326               namespaces=None,
1327               where: str = 'text',
1328               total: Optional[int] = None,
1329               content: bool = False):
1330        """Iterate Pages that contain the searchstring.
1331
1332        Note that this may include non-existing Pages if the wiki's database
1333        table contains outdated entries.
1334
1335        :see: https://www.mediawiki.org/wiki/API:Search
1336
1337        :param searchstring: the text to search for
1338        :param where: Where to search; value must be "text", "title" or
1339            "nearmatch" (many wikis do not support title or nearmatch search)
1340        :param namespaces: search only in these namespaces (defaults to all)
1341        :type namespaces: iterable of str or Namespace key,
1342            or a single instance of those types. May be a '|' separated
1343            list of namespace identifiers.
1344        :param content: if True, load the current content of each iterated page
1345            (default False)
1346        :raises KeyError: a namespace identifier was not resolved
1347        :raises TypeError: a namespace identifier has an inappropriate
1348            type such as NoneType or bool
1349        """
1350        where_types = ['nearmatch', 'text', 'title', 'titles']
1351        if not searchstring:
1352            raise Error('search: searchstring cannot be empty')
1353        if where not in where_types:
1354            raise Error("search: unrecognized 'where' value: {}".format(where))
1355        if where in ('title', 'titles'):
1356            if where == 'titles':
1357                issue_deprecation_warning("where='titles'", "where='title'",
1358                                          since='20160224')
1359                where = 'title'
1360
1361            if self.has_extension('CirrusSearch') and \
1362               isinstance(self.family, pywikibot.family.WikimediaFamily):
1363                # 'title' search was disabled, use intitle instead
1364                searchstring = 'intitle:' + searchstring
1365                issue_deprecation_warning(
1366                    "where='{}'".format(where),
1367                    "searchstring='{}'".format(searchstring),
1368                    since='20160224')
1369
1370                where = None  # default
1371
1372        if not namespaces and namespaces != 0:
1373            namespaces = [ns_id for ns_id in self.namespaces if ns_id >= 0]
1374        srgen = self._generator(api.PageGenerator, type_arg='search',
1375                                gsrsearch=searchstring, gsrwhat=where,
1376                                namespaces=namespaces,
1377                                total=total, g_content=content)
1378        return srgen
1379
1380    @deprecated_args(step=True, showMinor='minor')
1381    def usercontribs(self, user=None, userprefix=None, start=None, end=None,
1382                     reverse=False, namespaces=None, minor=None,
1383                     total: Optional[int] = None, top_only=False):
1384        """Iterate contributions by a particular user.
1385
1386        Iterated values are in the same format as recentchanges.
1387
1388        :see: https://www.mediawiki.org/wiki/API:Usercontribs
1389
1390        :param user: Iterate contributions by this user (name or IP)
1391        :param userprefix: Iterate contributions by all users whose names
1392            or IPs start with this substring
1393        :param start: Iterate contributions starting at this Timestamp
1394        :param end: Iterate contributions ending at this Timestamp
1395        :param reverse: Iterate oldest contributions first (default: newest)
1396        :param namespaces: only iterate pages in these namespaces
1397        :type namespaces: iterable of str or Namespace key,
1398            or a single instance of those types. May be a '|' separated
1399            list of namespace identifiers.
1400        :param minor: if True, iterate only minor edits; if False and
1401            not None, iterate only non-minor edits (default: iterate both)
1402        :param total: limit result to this number of pages
1403        :param top_only: if True, iterate only edits which are the latest
1404            revision (default: False)
1405        :raises pywikibot.exceptions.Error: either user or userprefix must be
1406            non-empty
1407        :raises KeyError: a namespace identifier was not resolved
1408        :raises TypeError: a namespace identifier has an inappropriate
1409            type such as NoneType or bool
1410        """
1411        if not (user or userprefix):
1412            raise Error(
1413                'usercontribs: either user or userprefix must be non-empty')
1414
1415        if start and end:
1416            self.assert_valid_iter_params('usercontribs', start, end, reverse)
1417
1418        ucgen = self._generator(api.ListGenerator, type_arg='usercontribs',
1419                                ucprop='ids|title|timestamp|comment|flags',
1420                                namespaces=namespaces,
1421                                total=total, uctoponly=top_only)
1422        if user:
1423            ucgen.request['ucuser'] = user
1424        if userprefix:
1425            ucgen.request['ucuserprefix'] = userprefix
1426        if start is not None:
1427            ucgen.request['ucstart'] = str(start)
1428        if end is not None:
1429            ucgen.request['ucend'] = str(end)
1430        if reverse:
1431            ucgen.request['ucdir'] = 'newer'
1432        option_set = api.OptionSet(self, 'usercontribs', 'show')
1433        option_set['minor'] = minor
1434        ucgen.request['ucshow'] = option_set
1435        return ucgen
1436
1437    @deprecated_args(step=True, showMinor='minor', showAnon='anon',
1438                     showBot='bot')
1439    def watchlist_revs(self, start=None, end=None, reverse=False,
1440                       namespaces=None, minor=None, bot=None,
1441                       anon=None, total=None):
1442        """Iterate revisions to pages on the bot user's watchlist.
1443
1444        Iterated values will be in same format as recentchanges.
1445
1446        :see: https://www.mediawiki.org/wiki/API:Watchlist
1447
1448        :param start: Iterate revisions starting at this Timestamp
1449        :param end: Iterate revisions ending at this Timestamp
1450        :param reverse: Iterate oldest revisions first (default: newest)
1451        :param namespaces: only iterate pages in these namespaces
1452        :type namespaces: iterable of str or Namespace key,
1453            or a single instance of those types. May be a '|' separated
1454            list of namespace identifiers.
1455        :param minor: if True, only list minor edits; if False (and not
1456            None), only list non-minor edits
1457        :param bot: if True, only list bot edits; if False (and not
1458            None), only list non-bot edits
1459        :param anon: if True, only list anon edits; if False (and not
1460            None), only list non-anon edits
1461        :raises KeyError: a namespace identifier was not resolved
1462        :raises TypeError: a namespace identifier has an inappropriate
1463            type such as NoneType or bool
1464        """
1465        if start and end:
1466            self.assert_valid_iter_params(
1467                'watchlist_revs', start, end, reverse)
1468
1469        wlgen = self._generator(
1470            api.ListGenerator, type_arg='watchlist',
1471            wlprop='user|comment|timestamp|title|ids|flags',
1472            wlallrev='', namespaces=namespaces, total=total)
1473        # TODO: allow users to ask for "patrol" as well?
1474        if start is not None:
1475            wlgen.request['wlstart'] = start
1476        if end is not None:
1477            wlgen.request['wlend'] = end
1478        if reverse:
1479            wlgen.request['wldir'] = 'newer'
1480        filters = {'minor': minor, 'bot': bot, 'anon': anon}
1481        wlgen.request['wlshow'] = api.OptionSet(self, 'watchlist', 'show',
1482                                                filters)
1483        return wlgen
1484
1485    def _check_view_deleted(self, msg_prefix: str, prop: List[str]) -> None:
1486        """Check if the user can view deleted comments and content.
1487
1488        :param msg_prefix: The calling method name
1489        :param prop: Requested props to check
1490        :raises UserRightsError: user cannot view a requested prop
1491        """
1492        err = '{}: User:{} not authorized to view '.format(msg_prefix,
1493                                                           self.user())
1494        if not self.has_right('deletedhistory'):
1495            if self.mw_version < '1.34':
1496                raise UserRightsError(err + 'deleted revisions.')
1497            if 'comment' in prop or 'parsedcomment' in prop:
1498                raise UserRightsError(err + 'comments of deleted revisions.')
1499        if ('content' in prop and not (self.has_right('deletedtext')
1500                                       or self.has_right('undelete'))):
1501            raise UserRightsError(err + 'deleted content.')
1502
1503    @deprecated_args(step=True, get_text='content', page='titles',
1504                     limit='total')
1505    def deletedrevs(self, titles=None, start=None, end=None,
1506                    reverse: bool = False,
1507                    content=False, total=None, **kwargs):
1508        """Iterate deleted revisions.
1509
1510        Each value returned by the iterator will be a dict containing the
1511        'title' and 'ns' keys for a particular Page and a 'revisions' key
1512        whose value is a list of revisions in the same format as
1513        recentchanges plus a 'content' element with key '*' if requested
1514        when 'content' parameter is set. For older wikis a 'token' key is
1515        also given with the content request.
1516
1517        :see: https://www.mediawiki.org/wiki/API:Deletedrevisions
1518
1519        :param titles: The page titles to check for deleted revisions
1520        :type titles: str (multiple titles delimited with '|')
1521            or pywikibot.Page or typing.Iterable[pywikibot.Page]
1522            or typing.Iterable[str]
1523        :keyword revids: Get revisions by their ID
1524
1525        :note: either titles or revids must be set but not both
1526
1527        :param start: Iterate revisions starting at this Timestamp
1528        :param end: Iterate revisions ending at this Timestamp
1529        :param reverse: Iterate oldest revisions first (default: newest)
1530        :param content: If True, retrieve the content of each revision
1531        :param total: number of revisions to retrieve
1532        :keyword user: List revisions by this user
1533        :keyword excludeuser: Exclude revisions by this user
1534        :keyword tag: Only list revision tagged with this tag
1535        :keyword prop: Which properties to get. Defaults are ids, user,
1536            comment, flags and timestamp
1537        """
1538        def handle_props(props):
1539            """Translate deletedrev props to deletedrevisions props."""
1540            if isinstance(props, str):
1541                props = props.split('|')
1542            if self.mw_version >= '1.25':
1543                return props
1544
1545            old_props = []
1546            for item in props:
1547                if item == 'ids':
1548                    old_props += ['revid', 'parentid']
1549                elif item == 'flags':
1550                    old_props.append('minor')
1551                elif item != 'timestamp':
1552                    old_props.append(item)
1553                    if item == 'content' and self.mw_version < '1.24':
1554                        old_props.append('token')
1555            return old_props
1556
1557        # set default properties
1558        prop = kwargs.pop('prop',
1559                          ['ids', 'user', 'comment', 'flags', 'timestamp'])
1560        if content:
1561            prop.append('content')
1562
1563        if start and end:
1564            self.assert_valid_iter_params('deletedrevs', start, end, reverse)
1565
1566        self._check_view_deleted('deletedrevs', prop)
1567
1568        revids = kwargs.pop('revids', None)
1569        if not (bool(titles) ^ (revids is not None)):
1570            raise Error('deletedrevs: either "titles" or "revids" parameter '
1571                        'must be given.')
1572        if revids and self.mw_version < '1.25':
1573            raise NotImplementedError(
1574                'deletedrevs: "revid" is not implemented with MediaWiki {}'
1575                .format(self.mw_version))
1576
1577        if self.mw_version >= '1.25':
1578            pre = 'drv'
1579            type_arg = 'deletedrevisions'
1580            generator = api.PropertyGenerator
1581        else:
1582            pre = 'dr'
1583            type_arg = 'deletedrevs'
1584            generator = api.ListGenerator
1585
1586        gen = self._generator(generator, type_arg=type_arg,
1587                              titles=titles, revids=revids,
1588                              total=total)
1589
1590        gen.request[pre + 'start'] = start
1591        gen.request[pre + 'end'] = end
1592        gen.request[pre + 'prop'] = handle_props(prop)
1593
1594        # handle other parameters like user
1595        for k, v in kwargs.items():
1596            gen.request[pre + k] = v
1597
1598        if reverse:
1599            gen.request[pre + 'dir'] = 'newer'
1600
1601        if self.mw_version < '1.25':
1602            yield from gen
1603
1604        else:
1605            # The dict result is different for both generators
1606            for data in gen:
1607                with suppress(KeyError):
1608                    data['revisions'] = data.pop('deletedrevisions')
1609                    yield data
1610
1611    @need_version('1.25')
1612    def alldeletedrevisions(
1613        self,
1614        *,
1615        namespaces=None,
1616        reverse: bool = False,
1617        content: bool = False,
1618        total: Optional[int] = None,
1619        **kwargs
1620    ) -> typing.Iterable[Dict[str, Any]]:
1621        """
1622        Iterate all deleted revisions.
1623
1624        :see: https://www.mediawiki.org/wiki/API:Alldeletedrevisions
1625
1626        :param namespaces: Only iterate pages in these namespaces
1627        :type namespaces: iterable of str or Namespace key,
1628            or a single instance of those types. May be a '|' separated
1629            list of namespace identifiers.
1630        :param reverse: Iterate oldest revisions first (default: newest)
1631        :param content: If True, retrieve the content of each revision
1632        :param total: Number of revisions to retrieve
1633        :keyword from: Start listing at this title
1634        :keyword to: Stop listing at this title
1635        :keyword prefix: Search for all page titles that begin with this value
1636        :keyword excludeuser: Exclude revisions by this user
1637        :keyword tag: Only list revisions tagged with this tag
1638        :keyword user: List revisions by this user
1639        :keyword start: Iterate revisions starting at this Timestamp
1640        :keyword end: Iterate revisions ending at this Timestamp
1641        :keyword prop: Which properties to get. Defaults are ids, timestamp,
1642            flags, user, and comment (if you have the right to view).
1643        :type prop: List[str]
1644        """
1645        if 'start' in kwargs and 'end' in kwargs:
1646            self.assert_valid_iter_params('alldeletedrevisions',
1647                                          kwargs['start'],
1648                                          kwargs['end'],
1649                                          reverse)
1650        prop = kwargs.pop('prop', [])
1651        parameters = {'adr' + k: v for k, v in kwargs.items()}
1652        if not prop:
1653            prop = ['ids', 'timestamp', 'flags', 'user']
1654            if self.has_right('deletedhistory'):
1655                prop.append('comment')
1656        if content:
1657            prop.append('content')
1658        self._check_view_deleted('alldeletedrevisions', prop)
1659        parameters['adrprop'] = prop
1660        if reverse:
1661            parameters['adrdir'] = 'newer'
1662        yield from self._generator(api.ListGenerator,
1663                                   type_arg='alldeletedrevisions',
1664                                   namespaces=namespaces,
1665                                   total=total,
1666                                   parameters=parameters)
1667
1668    def users(self, usernames):
1669        """Iterate info about a list of users by name or IP.
1670
1671        :see: https://www.mediawiki.org/wiki/API:Users
1672
1673        :param usernames: a list of user names
1674        :type usernames: list, or other iterable, of str
1675        """
1676        usprop = ['blockinfo', 'gender', 'groups', 'editcount', 'registration',
1677                  'rights', 'emailable']
1678        usgen = api.ListGenerator(
1679            'users', site=self, parameters={
1680                'ususers': usernames, 'usprop': usprop})
1681        return usgen
1682
1683    @deprecated_args(step=True)
1684    def randompages(self, total=None, namespaces=None,
1685                    redirects=False, content=False):
1686        """Iterate a number of random pages.
1687
1688        :see: https://www.mediawiki.org/wiki/API:Random
1689
1690        Pages are listed in a fixed sequence, only the starting point is
1691        random.
1692
1693        :param total: the maximum number of pages to iterate
1694        :param namespaces: only iterate pages in these namespaces.
1695        :type namespaces: iterable of str or Namespace key,
1696            or a single instance of those types. May be a '|' separated
1697            list of namespace identifiers.
1698        :param redirects: if True, include only redirect pages in results,
1699            False does not include redirects and None (MW 1.26+) include both
1700            types. (default: False)
1701        :type redirects: bool or None
1702        :param content: if True, load the current content of each iterated page
1703            (default False)
1704        :raises KeyError: a namespace identifier was not resolved
1705        :raises TypeError: a namespace identifier has an inappropriate
1706            type such as NoneType or bool
1707        :raises AssertError: unsupported redirects parameter
1708        """
1709        mapping = {False: None, True: 'redirects', None: 'all'}
1710        assert redirects in mapping
1711        redirects = mapping[redirects]
1712        params = {}
1713        if redirects is not None:
1714            if self.mw_version < '1.26':
1715                if redirects == 'all':
1716                    warn("parameter redirects=None to retrieve 'all' random"
1717                         'page types is not supported by mw version {}. '
1718                         'Using default.'.format(self.mw_version),
1719                         UserWarning)
1720                params['grnredirect'] = redirects == 'redirects'
1721            else:
1722                params['grnfilterredir'] = redirects
1723        return self._generator(api.PageGenerator, type_arg='random',
1724                               namespaces=namespaces, total=total,
1725                               g_content=content, **params)
1726
1727    # TODO: implement undelete
1728
1729    _patrol_errors = {
1730        'nosuchrcid': 'There is no change with rcid {rcid}',
1731        'nosuchrevid': 'There is no change with revid {revid}',
1732        'patroldisabled': 'Patrolling is disabled on {site} wiki',
1733        'noautopatrol': 'User {user} has no permission to patrol its own '
1734                        'changes, "autopatrol" is needed',
1735        'notpatrollable':
1736            "The revision {revid} can't be patrolled as it's too old."
1737    }
1738
1739    @need_right('patrol')
1740    @deprecated_args(token=True)
1741    def patrol(self, rcid=None, revid=None, revision=None):
1742        """Return a generator of patrolled pages.
1743
1744        :see: https://www.mediawiki.org/wiki/API:Patrol
1745
1746        Pages to be patrolled are identified by rcid, revid or revision.
1747        At least one of the parameters is mandatory.
1748        See https://www.mediawiki.org/wiki/API:Patrol.
1749
1750        :param rcid: an int/string/iterable/iterator providing rcid of pages
1751            to be patrolled.
1752        :type rcid: iterable/iterator which returns a number or string which
1753             contains only digits; it also supports a string (as above) or int
1754        :param revid: an int/string/iterable/iterator providing revid of pages
1755            to be patrolled.
1756        :type revid: iterable/iterator which returns a number or string which
1757             contains only digits; it also supports a string (as above) or int.
1758        :param revision: an Revision/iterable/iterator providing Revision
1759            object of pages to be patrolled.
1760        :type revision: iterable/iterator which returns a Revision object; it
1761            also supports a single Revision.
1762        :rtype: iterator of dict with 'rcid', 'ns' and 'title'
1763            of the patrolled page.
1764
1765        """
1766        # If patrol is not enabled, attr will be set the first time a
1767        # request is done.
1768        if hasattr(self, '_patroldisabled'):
1769            if self._patroldisabled:
1770                return
1771
1772        if all(_ is None for _ in [rcid, revid, revision]):
1773            raise Error('No rcid, revid or revision provided.')
1774
1775        if isinstance(rcid, (int, str)):
1776            rcid = {rcid}
1777        if isinstance(revid, (int, str)):
1778            revid = {revid}
1779        if isinstance(revision, pywikibot.page.Revision):
1780            revision = {revision}
1781
1782        # Handle param=None.
1783        rcid = rcid or set()
1784        revid = revid or set()
1785        revision = revision or set()
1786
1787        combined_revid = set(revid) | {r.revid for r in revision}
1788
1789        gen = itertools.chain(
1790            zip_longest(rcid, [], fillvalue='rcid'),
1791            zip_longest(combined_revid, [], fillvalue='revid'))
1792
1793        token = self.tokens['patrol']
1794
1795        for idvalue, idtype in gen:
1796            req = self._request(parameters={'action': 'patrol',
1797                                            'token': token,
1798                                            idtype: idvalue})
1799
1800            try:
1801                result = req.submit()
1802            except APIError as err:
1803                # patrol is disabled, store in attr to avoid other requests
1804                if err.code == 'patroldisabled':
1805                    self._patroldisabled = True
1806                    return
1807
1808                errdata = {
1809                    'site': self,
1810                    'user': self.user(),
1811                }
1812                errdata[idtype] = idvalue
1813                if err.code in self._patrol_errors:
1814                    raise Error(self._patrol_errors[err.code]
1815                                .format_map(errdata))
1816                pywikibot.debug("protect: Unexpected error code '{}' received."
1817                                .format(err.code),
1818                                _logger)
1819                raise
1820
1821            yield result['patrol']
1822
1823    @deprecated_args(number='total', repeat=True, namespace='namespaces',
1824                     rcshow=True, rc_show=True, get_redirect=True, step=True,
1825                     showBot='bot', showRedirects='redirect',
1826                     showPatrolled='patrolled')
1827    def newpages(self, user=None, returndict=False,
1828                 start=None, end=None, reverse=False, bot=False,
1829                 redirect=False, excludeuser=None,
1830                 patrolled=None, namespaces=None, total=None):
1831        """Yield new articles (as Page objects) from recent changes.
1832
1833        Starts with the newest article and fetches the number of articles
1834        specified in the first argument.
1835
1836        The objects yielded are dependent on parameter returndict.
1837        When true, it yields a tuple composed of a Page object and a dict of
1838        attributes.
1839        When false, it yields a tuple composed of the Page object,
1840        timestamp (str), length (int), an empty string, username or IP
1841        address (str), comment (str).
1842
1843        :param namespaces: only iterate pages in these namespaces
1844        :type namespaces: iterable of str or Namespace key,
1845            or a single instance of those types. May be a '|' separated
1846            list of namespace identifiers.
1847        :raises KeyError: a namespace identifier was not resolved
1848        :raises TypeError: a namespace identifier has an inappropriate
1849            type such as NoneType or bool
1850        """
1851        # TODO: update docstring
1852
1853        # N.B. API still provides no way to access Special:Newpages content
1854        # directly, so we get new pages indirectly through 'recentchanges'
1855
1856        gen = self.recentchanges(
1857            start=start, end=end, reverse=reverse,
1858            namespaces=namespaces, changetype='new', user=user,
1859            excludeuser=excludeuser, bot=bot,
1860            redirect=redirect, patrolled=patrolled,
1861            total=total
1862        )
1863        for pageitem in gen:
1864            newpage = pywikibot.Page(self, pageitem['title'])
1865            if returndict:
1866                yield (newpage, pageitem)
1867            else:
1868                yield (newpage, pageitem['timestamp'], pageitem['newlen'],
1869                       '', pageitem['user'], pageitem['comment'])
1870
1871    @deprecated('APISite.logevents(logtype="upload")', since='20170619')
1872    @deprecated_args(lestart='start', leend='end', leuser='user', letitle=True,
1873                     repeat=True, number='total', step=True)
1874    def newfiles(self, user=None, start=None, end=None, reverse=False,
1875                 total=None):
1876        """Yield information about newly uploaded files.
1877
1878        DEPRECATED: Use logevents(logtype='upload') instead.
1879
1880        Yields a tuple of FilePage, Timestamp, user(str), comment(str).
1881
1882        N.B. the API does not provide direct access to Special:Newimages, so
1883        this is derived from the "upload" log events instead.
1884        """
1885        for event in self.logevents(logtype='upload', user=user,
1886                                    start=start, end=end, reverse=reverse,
1887                                    total=total):
1888            filepage = event.page()
1889            date = event.timestamp()
1890            user = event.user()
1891            comment = event.comment() or ''
1892            yield (filepage, date, user, comment)
1893
1894    def querypage(self, special_page, total=True):
1895        """Yield Page objects retrieved from Special:{special_page}.
1896
1897        :see: https://www.mediawiki.org/wiki/API:Querypage
1898
1899        Generic function for all special pages supported by the site MW API.
1900
1901        :param special_page: Special page to query
1902        :param total: number of pages to return
1903        :raise AssertionError: special_page is not supported in SpecialPages.
1904        """
1905        param = self._paraminfo.parameter('query+querypage', 'page')
1906        assert special_page in param['type'], (
1907            '{} not in {}'.format(special_page, param['type']))
1908
1909        return self._generator(api.PageGenerator,
1910                               type_arg='querypage', gqppage=special_page,
1911                               total=total)
1912
1913    @deprecated_args(number='total', step=True, repeat=True)
1914    def longpages(self, total=None):
1915        """Yield Pages and lengths from Special:Longpages.
1916
1917        Yields a tuple of Page object, length(int).
1918
1919        :param total: number of pages to return
1920        """
1921        lpgen = self._generator(api.ListGenerator,
1922                                type_arg='querypage', qppage='Longpages',
1923                                total=total)
1924        for pageitem in lpgen:
1925            yield (pywikibot.Page(self, pageitem['title']),
1926                   int(pageitem['value']))
1927
1928    @deprecated_args(number='total', step=True, repeat=True)
1929    def shortpages(self, total=None):
1930        """Yield Pages and lengths from Special:Shortpages.
1931
1932        Yields a tuple of Page object, length(int).
1933
1934        :param total: number of pages to return
1935        """
1936        spgen = self._generator(api.ListGenerator,
1937                                type_arg='querypage', qppage='Shortpages',
1938                                total=total)
1939        for pageitem in spgen:
1940            yield (pywikibot.Page(self, pageitem['title']),
1941                   int(pageitem['value']))
1942
1943    @deprecated_args(number='total', step=True, repeat=True)
1944    def deadendpages(self, total=None):
1945        """Yield Page objects retrieved from Special:Deadendpages.
1946
1947        :param total: number of pages to return
1948        """
1949        return self.querypage('Deadendpages', total)
1950
1951    @deprecated_args(number='total', step=True, repeat=True)
1952    def ancientpages(self, total=None):
1953        """Yield Pages, datestamps from Special:Ancientpages.
1954
1955        :param total: number of pages to return
1956        """
1957        apgen = self._generator(api.ListGenerator,
1958                                type_arg='querypage', qppage='Ancientpages',
1959                                total=total)
1960        for pageitem in apgen:
1961            yield (pywikibot.Page(self, pageitem['title']),
1962                   pywikibot.Timestamp.fromISOformat(pageitem['timestamp']))
1963
1964    @deprecated_args(number='total', step=True, repeat=True)
1965    def lonelypages(self, total=None):
1966        """Yield Pages retrieved from Special:Lonelypages.
1967
1968        :param total: number of pages to return
1969        """
1970        return self.querypage('Lonelypages', total)
1971
1972    @deprecated_args(number='total', step=True, repeat=True)
1973    def unwatchedpages(self, total=None):
1974        """Yield Pages from Special:Unwatchedpages (requires Admin privileges).
1975
1976        :param total: number of pages to return
1977        """
1978        return self.querypage('Unwatchedpages', total)
1979
1980    @deprecated_args(step=True)
1981    def wantedpages(self, total=None):
1982        """Yield Pages from Special:Wantedpages.
1983
1984        :param total: number of pages to return
1985        """
1986        return self.querypage('Wantedpages', total)
1987
1988    def wantedfiles(self, total=None):
1989        """Yield Pages from Special:Wantedfiles.
1990
1991        :param total: number of pages to return
1992        """
1993        return self.querypage('Wantedfiles', total)
1994
1995    def wantedtemplates(self, total=None):
1996        """Yield Pages from Special:Wantedtemplates.
1997
1998        :param total: number of pages to return
1999        """
2000        return self.querypage('Wantedtemplates', total)
2001
2002    @deprecated_args(number='total', step=True, repeat=True)
2003    def wantedcategories(self, total=None):
2004        """Yield Pages from Special:Wantedcategories.
2005
2006        :param total: number of pages to return
2007        """
2008        return self.querypage('Wantedcategories', total)
2009
2010    @deprecated_args(number='total', step=True, repeat=True)
2011    def uncategorizedcategories(self, total=None):
2012        """Yield Categories from Special:Uncategorizedcategories.
2013
2014        :param total: number of pages to return
2015        """
2016        return self.querypage('Uncategorizedcategories', total)
2017
2018    @deprecated_args(number='total', step=True, repeat=True)
2019    def uncategorizedimages(self, total=None):
2020        """Yield FilePages from Special:Uncategorizedimages.
2021
2022        :param total: number of pages to return
2023        """
2024        return self.querypage('Uncategorizedimages', total)
2025
2026    # synonym
2027    uncategorizedfiles = uncategorizedimages
2028
2029    @deprecated_args(number='total', step=True, repeat=True)
2030    def uncategorizedpages(self, total=None):
2031        """Yield Pages from Special:Uncategorizedpages.
2032
2033        :param total: number of pages to return
2034        """
2035        return self.querypage('Uncategorizedpages', total)
2036
2037    @deprecated_args(number='total', step=True, repeat=True)
2038    def uncategorizedtemplates(self, total=None):
2039        """Yield Pages from Special:Uncategorizedtemplates.
2040
2041        :param total: number of pages to return
2042        """
2043        return self.querypage('Uncategorizedtemplates', total)
2044
2045    @deprecated_args(number='total', step=True, repeat=True)
2046    def unusedcategories(self, total=None):
2047        """Yield Category objects from Special:Unusedcategories.
2048
2049        :param total: number of pages to return
2050        """
2051        return self.querypage('Unusedcategories', total)
2052
2053    @deprecated_args(extension=True, number='total', step=True, repeat=True)
2054    def unusedfiles(self, total=None):
2055        """Yield FilePage objects from Special:Unusedimages.
2056
2057        :param total: number of pages to return
2058        """
2059        return self.querypage('Unusedimages', total)
2060
2061    @deprecated_args(number='total', step=True, repeat=True)
2062    def withoutinterwiki(self, total=None):
2063        """Yield Pages without language links from Special:Withoutinterwiki.
2064
2065        :param total: number of pages to return
2066        """
2067        return self.querypage('Withoutinterwiki', total)
2068
2069    @deprecated_args(step=True)
2070    def broken_redirects(self, total=None):
2071        """Yield Pages with broken redirects from Special:BrokenRedirects.
2072
2073        :param total: number of pages to return
2074        """
2075        return self.querypage('BrokenRedirects', total)
2076
2077    @deprecated_args(step=True)
2078    def double_redirects(self, total=None):
2079        """Yield Pages with double redirects from Special:DoubleRedirects.
2080
2081        :param total: number of pages to return
2082        """
2083        return self.querypage('DoubleRedirects', total)
2084
2085    @deprecated_args(step=True)
2086    def redirectpages(self, total=None):
2087        """Yield redirect pages from Special:ListRedirects.
2088
2089        :param total: number of pages to return
2090        """
2091        return self.querypage('Listredirects', total)
2092
2093    @deprecated_args(lvl='level')
2094    def protectedpages(self, namespace=0, type='edit', level=False,
2095                       total=None):
2096        """
2097        Return protected pages depending on protection level and type.
2098
2099        For protection types which aren't 'create' it uses
2100        :py:obj:`APISite.allpages`, while it uses for 'create' the
2101        'query+protectedtitles' module.
2102
2103        :see: https://www.mediawiki.org/wiki/API:Protectedtitles
2104
2105        :param namespace: The searched namespace.
2106        :type namespace: int or Namespace or str
2107        :param type: The protection type to search for (default 'edit').
2108        :type type: str
2109        :param level: The protection level (like 'autoconfirmed'). If False it
2110            shows all protection levels.
2111        :type level: str or False
2112        :return: The pages which are protected.
2113        :rtype: typing.Iterable[pywikibot.Page]
2114        """
2115        namespaces = self.namespaces.resolve(namespace)
2116        # always assert that, so we are be sure that type could be 'create'
2117        assert 'create' in self.protection_types(), \
2118            "'create' should be a valid protection type."
2119        if type == 'create':
2120            return self._generator(
2121                api.PageGenerator, type_arg='protectedtitles',
2122                namespaces=namespaces, gptlevel=level, total=total)
2123        return self.allpages(namespace=namespaces[0], protect_level=level,
2124                             protect_type=type, total=total)
2125
2126    def pages_with_property(self, propname: str, *,
2127                            total: Optional[int] = None):
2128        """Yield Page objects from Special:PagesWithProp.
2129
2130        :see: https://www.mediawiki.org/wiki/API:Pageswithprop
2131
2132        :param propname: must be a valid property.
2133        :param total: number of pages to return
2134        :return: return a generator of Page objects
2135        :rtype: iterator
2136        """
2137        if propname not in self.get_property_names():
2138            raise NotImplementedError(
2139                '"{}" is not a valid page property'.format(propname))
2140        return self._generator(api.PageGenerator, type_arg='pageswithprop',
2141                               gpwppropname=propname, total=total)
2142
2143    @deprecated_args(step=True, sysop=True)
2144    def watched_pages(self, force=False, total=None):
2145        """
2146        Return watchlist.
2147
2148        :see: https://www.mediawiki.org/wiki/API:Watchlistraw
2149
2150        :param force: Reload watchlist
2151        :type force: bool
2152        :param total: if not None, limit the generator to yielding this many
2153            items in total
2154        :type total: int
2155        :return: list of pages in watchlist
2156        :rtype: list of pywikibot.Page objects
2157        """
2158        expiry = None if force else pywikibot.config.API_config_expiry
2159        gen = api.PageGenerator(site=self, generator='watchlistraw',
2160                                expiry=expiry)
2161        gen.set_maximum_items(total)
2162        return gen
2163