1"""Objects representing API generators to MediaWiki site.""" 2# 3# (C) Pywikibot team, 2008-2021 4# 5# Distributed under the terms of the MIT license. 6# 7import heapq 8import itertools 9import typing 10from contextlib import suppress 11from itertools import zip_longest 12from typing import Any, Optional, Union 13from warnings import warn 14 15import pywikibot 16import pywikibot.family 17from pywikibot.backports import Dict, List 18from pywikibot.data import api 19from pywikibot.exceptions import ( 20 APIError, 21 Error, 22 InconsistentTitleError, 23 InvalidTitleError, 24 NoPageError, 25 UserRightsError, 26) 27from pywikibot.site._decorators import need_right, need_version 28from pywikibot.tools import ( 29 deprecated, 30 deprecated_args, 31 filter_unique, 32 is_ip_address, 33 issue_deprecation_warning, 34 itergroup, 35 remove_last_args, 36) 37 38 39_logger = 'wiki.apisite' 40 41 42class GeneratorsMixin: 43 44 """API generators mixin to MediaWiki site.""" 45 46 def load_pages_from_pageids(self, pageids): 47 """ 48 Return a page generator from pageids. 49 50 Pages are iterated in the same order than in the underlying pageids. 51 52 Pageids are filtered and only one page is returned in case of 53 duplicate pageids. 54 55 :param pageids: an iterable that returns pageids (str or int), 56 or a comma- or pipe-separated string of pageids 57 (e.g. '945097,1483753, 956608' or '945097|483753|956608') 58 """ 59 if not pageids: 60 return 61 if isinstance(pageids, str): 62 pageids = pageids.replace('|', ',') 63 pageids = pageids.split(',') 64 pageids = [p.strip() for p in pageids] 65 66 # Validate pageids. 67 gen = (str(int(p)) for p in pageids if int(p) > 0) 68 69 # Find out how many pages can be specified at a time. 70 parameter = self._paraminfo.parameter('query+info', 'prop') 71 if self.logged_in() and self.has_right('apihighlimits'): 72 groupsize = int(parameter['highlimit']) 73 else: 74 groupsize = int(parameter['limit']) 75 76 for sublist in itergroup(filter_unique(gen), groupsize): 77 # Store the order of the input data. 78 priority_dict = dict(zip(sublist, range(len(sublist)))) 79 80 prio_queue = [] 81 next_prio = 0 82 params = {'pageids': sublist, } 83 rvgen = api.PropertyGenerator('info', site=self, parameters=params) 84 85 for pagedata in rvgen: 86 title = pagedata['title'] 87 pageid = str(pagedata['pageid']) 88 page = pywikibot.Page(pywikibot.Link(title, source=self)) 89 api.update_page(page, pagedata) 90 priority, page = heapq.heappushpop(prio_queue, 91 (priority_dict[pageid], 92 page)) 93 # Smallest priority matches expected one; yield early. 94 if priority == next_prio: 95 yield page 96 next_prio += 1 97 else: 98 # Push onto the heap. 99 heapq.heappush(prio_queue, (priority, page)) 100 101 # Extract data in the same order of the input data. 102 while prio_queue: 103 priority, page = heapq.heappop(prio_queue) 104 yield page 105 106 def preloadpages(self, pagelist, *, groupsize=50, templates=False, 107 langlinks=False, pageprops=False): 108 """Return a generator to a list of preloaded pages. 109 110 Pages are iterated in the same order than in the underlying pagelist. 111 In case of duplicates in a groupsize batch, return the first entry. 112 113 :param pagelist: an iterable that returns Page objects 114 :param groupsize: how many Pages to query at a time 115 :type groupsize: int 116 :param templates: preload pages (typically templates) transcluded in 117 the provided pages 118 :type templates: bool 119 :param langlinks: preload all language links from the provided pages 120 to other languages 121 :type langlinks: bool 122 :param pageprops: preload various properties defined in page content 123 :type pageprops: bool 124 125 """ 126 props = 'revisions|info|categoryinfo' 127 if templates: 128 props += '|templates' 129 if langlinks: 130 props += '|langlinks' 131 if pageprops: 132 props += '|pageprops' 133 134 parameter = self._paraminfo.parameter('query+info', 'prop') 135 if self.logged_in() and self.has_right('apihighlimits'): 136 max_ids = int(parameter['highlimit']) 137 else: 138 max_ids = int(parameter['limit']) # T78333, T161783 139 140 for sublist in itergroup(pagelist, min(groupsize, max_ids)): 141 # Do not use p.pageid property as it will force page loading. 142 pageids = [str(p._pageid) for p in sublist 143 if hasattr(p, '_pageid') and p._pageid > 0] 144 cache = {} 145 # In case of duplicates, return the first entry. 146 for priority, page in enumerate(sublist): 147 try: 148 cache.setdefault(page.title(with_section=False), 149 (priority, page)) 150 except InvalidTitleError: 151 pywikibot.exception() 152 153 prio_queue = [] 154 next_prio = 0 155 rvgen = api.PropertyGenerator(props, site=self) 156 rvgen.set_maximum_items(-1) # suppress use of "rvlimit" parameter 157 158 if len(pageids) == len(sublist) and len(set(pageids)) <= max_ids: 159 # only use pageids if all pages have them 160 rvgen.request['pageids'] = set(pageids) 161 else: 162 rvgen.request['titles'] = list(cache.keys()) 163 rvgen.request['rvprop'] = self._rvprops(content=True) 164 pywikibot.output('Retrieving {} pages from {}.' 165 .format(len(cache), self)) 166 167 for pagedata in rvgen: 168 pywikibot.debug('Preloading {}'.format(pagedata), _logger) 169 try: 170 if pagedata['title'] not in cache: 171 # API always returns a "normalized" title which is 172 # usually the same as the canonical form returned by 173 # page.title(), but sometimes not (e.g., 174 # gender-specific localizations of "User" namespace). 175 # This checks to see if there is a normalized title in 176 # the response that corresponds to the canonical form 177 # used in the query. 178 for key in cache: 179 if self.sametitle(key, pagedata['title']): 180 cache[pagedata['title']] = cache[key] 181 break 182 else: 183 pywikibot.warning( 184 'preloadpages: Query returned unexpected ' 185 "title '{}'".format(pagedata['title'])) 186 continue 187 except KeyError: 188 pywikibot.debug("No 'title' in {}" 189 .format(pagedata), _logger) 190 pywikibot.debug('pageids={}'.format(pageids), _logger) 191 pywikibot.debug('titles={}' 192 .format(list(cache.keys())), _logger) 193 continue 194 priority, page = cache[pagedata['title']] 195 api.update_page(page, pagedata, rvgen.props) 196 priority, page = heapq.heappushpop(prio_queue, 197 (priority, page)) 198 # Smallest priority matches expected one; yield. 199 if priority == next_prio: 200 yield page 201 next_prio += 1 202 else: 203 # Push back onto the heap. 204 heapq.heappush(prio_queue, (priority, page)) 205 206 # Empty the heap. 207 while prio_queue: 208 priority, page = heapq.heappop(prio_queue) 209 yield page 210 211 @deprecated_args( 212 followRedirects='follow_redirects', filterRedirects='filter_redirects') 213 def pagebacklinks(self, page, *, follow_redirects=False, 214 filter_redirects=None, namespaces=None, total=None, 215 content=False): 216 """Iterate all pages that link to the given page. 217 218 :see: https://www.mediawiki.org/wiki/API:Backlinks 219 220 :param page: The Page to get links to. 221 :param follow_redirects: Also return links to redirects pointing to 222 the given page. 223 :param filter_redirects: If True, only return redirects to the given 224 page. If False, only return non-redirect links. If None, return 225 both (no filtering). 226 :param namespaces: If present, only return links from the namespaces 227 in this list. 228 :type namespaces: iterable of str or Namespace key, 229 or a single instance of those types. May be a '|' separated 230 list of namespace identifiers. 231 :param total: Maximum number of pages to retrieve in total. 232 :param content: if True, load the current content of each iterated page 233 (default False) 234 :rtype: typing.Iterable[pywikibot.Page] 235 :raises KeyError: a namespace identifier was not resolved 236 :raises TypeError: a namespace identifier has an inappropriate 237 type such as NoneType or bool 238 """ 239 bltitle = page.title(with_section=False).encode(self.encoding()) 240 blargs = {'gbltitle': bltitle} 241 if filter_redirects is not None: 242 blargs['gblfilterredir'] = ('redirects' if filter_redirects 243 else 'nonredirects') 244 blgen = self._generator(api.PageGenerator, type_arg='backlinks', 245 namespaces=namespaces, total=total, 246 g_content=content, **blargs) 247 if follow_redirects: 248 # links identified by MediaWiki as redirects may not really be, 249 # so we have to check each "redirect" page and see if it 250 # really redirects to this page 251 # see fixed MediaWiki bug T9304 252 redirgen = self._generator(api.PageGenerator, 253 type_arg='backlinks', 254 gbltitle=bltitle, 255 gblfilterredir='redirects') 256 genlist = {None: blgen} 257 for redir in redirgen: 258 if redir == page: 259 # if a wiki contains pages whose titles contain 260 # namespace aliases that existed before those aliases 261 # were defined (example: [[WP:Sandbox]] existed as a 262 # redirect to [[Wikipedia:Sandbox]] before the WP: alias 263 # was created) they can be returned as redirects to 264 # themselves; skip these 265 continue 266 if redir.getRedirectTarget() == page: 267 genlist[redir.title()] = self.pagebacklinks( 268 redir, follow_redirects=True, 269 filter_redirects=filter_redirects, 270 namespaces=namespaces, 271 content=content 272 ) 273 return itertools.chain(*genlist.values()) 274 return blgen 275 276 @deprecated_args(step=True, filterRedirects='filter_redirects') 277 def page_embeddedin(self, page, *, filter_redirects=None, namespaces=None, 278 total=None, content=False): 279 """Iterate all pages that embedded the given page as a template. 280 281 :see: https://www.mediawiki.org/wiki/API:Embeddedin 282 283 :param page: The Page to get inclusions for. 284 :param filter_redirects: If True, only return redirects that embed 285 the given page. If False, only return non-redirect links. If 286 None, return both (no filtering). 287 :param namespaces: If present, only return links from the namespaces 288 in this list. 289 :type namespaces: iterable of str or Namespace key, 290 or a single instance of those types. May be a '|' separated 291 list of namespace identifiers. 292 :param content: if True, load the current content of each iterated page 293 (default False) 294 :rtype: typing.Iterable[pywikibot.Page] 295 :raises KeyError: a namespace identifier was not resolved 296 :raises TypeError: a namespace identifier has an inappropriate 297 type such as NoneType or bool 298 """ 299 eiargs = {'geititle': 300 page.title(with_section=False).encode(self.encoding())} 301 if filter_redirects is not None: 302 eiargs['geifilterredir'] = ('redirects' if filter_redirects 303 else 'nonredirects') 304 return self._generator(api.PageGenerator, type_arg='embeddedin', 305 namespaces=namespaces, total=total, 306 g_content=content, **eiargs) 307 308 @deprecated_args( 309 step=None, followRedirects='follow_redirects', 310 filterRedirects='filter_redirects', 311 onlyTemplateInclusion='only_template_inclusion', 312 withTemplateInclusion='with_template_inclusion') 313 def pagereferences(self, page, *, follow_redirects=False, 314 filter_redirects=None, with_template_inclusion=True, 315 only_template_inclusion=False, namespaces=None, 316 total=None, content=False): 317 """ 318 Convenience method combining pagebacklinks and page_embeddedin. 319 320 :param namespaces: If present, only return links from the namespaces 321 in this list. 322 :type namespaces: iterable of str or Namespace key, 323 or a single instance of those types. May be a '|' separated 324 list of namespace identifiers. 325 :rtype: typing.Iterable[pywikibot.Page] 326 :raises KeyError: a namespace identifier was not resolved 327 :raises TypeError: a namespace identifier has an inappropriate 328 type such as NoneType or bool 329 """ 330 if only_template_inclusion: 331 return self.page_embeddedin(page, 332 filter_redirects=filter_redirects, 333 namespaces=namespaces, total=total, 334 content=content) 335 if not with_template_inclusion: 336 return self.pagebacklinks(page, follow_redirects=follow_redirects, 337 filter_redirects=filter_redirects, 338 namespaces=namespaces, total=total, 339 content=content) 340 return itertools.islice( 341 itertools.chain( 342 self.pagebacklinks( 343 page, follow_redirects=follow_redirects, 344 filter_redirects=filter_redirects, 345 namespaces=namespaces, content=content), 346 self.page_embeddedin( 347 page, filter_redirects=filter_redirects, 348 namespaces=namespaces, content=content) 349 ), total) 350 351 @deprecated_args(step=True) 352 def pagelinks(self, page, *, namespaces=None, follow_redirects=False, 353 total=None, content=False): 354 """Iterate internal wikilinks contained (or transcluded) on page. 355 356 :see: https://www.mediawiki.org/wiki/API:Links 357 358 :param namespaces: Only iterate pages in these namespaces 359 (default: all) 360 :type namespaces: iterable of str or Namespace key, 361 or a single instance of those types. May be a '|' separated 362 list of namespace identifiers. 363 :param follow_redirects: if True, yields the target of any redirects, 364 rather than the redirect page 365 :param content: if True, load the current content of each iterated page 366 (default False) 367 :raises KeyError: a namespace identifier was not resolved 368 :raises TypeError: a namespace identifier has an inappropriate 369 type such as NoneType or bool 370 """ 371 plargs = {} 372 if hasattr(page, '_pageid'): 373 plargs['pageids'] = str(page._pageid) 374 else: 375 pltitle = page.title(with_section=False).encode(self.encoding()) 376 plargs['titles'] = pltitle 377 return self._generator(api.PageGenerator, type_arg='links', 378 namespaces=namespaces, total=total, 379 g_content=content, redirects=follow_redirects, 380 **plargs) 381 382 # Sortkey doesn't work with generator 383 @deprecated_args(withSortKey=True, step=True) 384 def pagecategories(self, page, *, total=None, content=False): 385 """Iterate categories to which page belongs. 386 387 :see: https://www.mediawiki.org/wiki/API:Categories 388 389 :param content: if True, load the current content of each iterated page 390 (default False); note that this means the contents of the 391 category description page, not the pages contained in the category 392 """ 393 clargs = {} 394 if hasattr(page, '_pageid'): 395 clargs['pageids'] = str(page._pageid) 396 else: 397 clargs['titles'] = page.title( 398 with_section=False).encode(self.encoding()) 399 return self._generator(api.PageGenerator, 400 type_arg='categories', total=total, 401 g_content=content, **clargs) 402 403 @deprecated_args(step=True) 404 def pageimages(self, page, *, total=None, content=False): 405 """Iterate images used (not just linked) on the page. 406 407 :see: https://www.mediawiki.org/wiki/API:Images 408 409 :param content: if True, load the current content of each iterated page 410 (default False); note that this means the content of the image 411 description page, not the image itself 412 413 """ 414 imtitle = page.title(with_section=False).encode(self.encoding()) 415 return self._generator(api.PageGenerator, type_arg='images', 416 titles=imtitle, total=total, 417 g_content=content) 418 419 @deprecated_args(step=True) 420 def pagetemplates(self, page, *, namespaces=None, total=None, 421 content=False): 422 """Iterate templates transcluded (not just linked) on the page. 423 424 :see: https://www.mediawiki.org/wiki/API:Templates 425 426 :param namespaces: Only iterate pages in these namespaces 427 :type namespaces: iterable of str or Namespace key, 428 or a single instance of those types. May be a '|' separated 429 list of namespace identifiers. 430 :param content: if True, load the current content of each iterated page 431 (default False) 432 433 :raises KeyError: a namespace identifier was not resolved 434 :raises TypeError: a namespace identifier has an inappropriate 435 type such as NoneType or bool 436 """ 437 tltitle = page.title(with_section=False).encode(self.encoding()) 438 return self._generator(api.PageGenerator, type_arg='templates', 439 titles=tltitle, namespaces=namespaces, 440 total=total, g_content=content) 441 442 @deprecated_args(step=True, startsort=True, endsort=True) 443 def categorymembers(self, category, *, 444 namespaces=None, 445 sortby: Optional[str] = None, 446 reverse: bool = False, 447 starttime=None, 448 endtime=None, 449 total: Optional[int] = None, 450 content: bool = False, 451 member_type=None, 452 startprefix: Optional[str] = None, 453 endprefix: Optional[str] = None): 454 """Iterate members of specified category. 455 456 :see: https://www.mediawiki.org/wiki/API:Categorymembers 457 458 :param category: The Category to iterate. 459 :param namespaces: If present, only return category members from 460 these namespaces. To yield subcategories or files, use 461 parameter member_type instead. 462 :type namespaces: iterable of str or Namespace key, 463 or a single instance of those types. May be a '|' separated 464 list of namespace identifiers. 465 :param sortby: determines the order in which results are generated, 466 valid values are "sortkey" (default, results ordered by category 467 sort key) or "timestamp" (results ordered by time page was 468 added to the category) 469 :param reverse: if True, generate results in reverse order 470 (default False) 471 :param starttime: if provided, only generate pages added after this 472 time; not valid unless sortby="timestamp" 473 :type starttime: pywikibot.Timestamp 474 :param endtime: if provided, only generate pages added before this 475 time; not valid unless sortby="timestamp" 476 :param startprefix: if provided, only generate pages >= this title 477 lexically; not valid if sortby="timestamp" 478 :param endprefix: if provided, only generate pages < this title 479 lexically; not valid if sortby="timestamp" 480 :param content: if True, load the current content of each iterated page 481 (default False) 482 :param member_type: member type; if member_type includes 'page' and is 483 used in conjunction with sortby="timestamp", the API may limit 484 results to only pages in the first 50 namespaces. 485 :type member_type: str or iterable of str; 486 values: page, subcat, file 487 :rtype: typing.Iterable[pywikibot.Page] 488 :raises KeyError: a namespace identifier was not resolved 489 :raises TypeError: a namespace identifier has an inappropriate 490 type such as NoneType or bool 491 """ 492 if category.namespace() != 14: 493 raise Error('categorymembers: non-Category page {!r} specified' 494 .format(category)) 495 496 cmtitle = category.title(with_section=False).encode(self.encoding()) 497 cmargs = { 498 'type_arg': 'categorymembers', 499 'gcmtitle': cmtitle, 500 'gcmprop': 'ids|title|sortkey' 501 } 502 503 if sortby in ['sortkey', 'timestamp']: 504 cmargs['gcmsort'] = sortby 505 elif sortby: 506 raise ValueError('categorymembers: invalid sortby value {!r}' 507 .format(sortby)) 508 509 if starttime and endtime and starttime > endtime: 510 raise ValueError( 511 'categorymembers: starttime must be before endtime') 512 if startprefix and endprefix and startprefix > endprefix: 513 raise ValueError( 514 'categorymembers: startprefix must be less than endprefix') 515 516 if isinstance(member_type, str): 517 member_type = {member_type} 518 519 if member_type and sortby == 'timestamp': 520 # Covert namespaces to a known type 521 namespaces = set(self.namespaces.resolve(namespaces or [])) 522 523 if 'page' in member_type: 524 excluded_namespaces = set() 525 if 'file' not in member_type: 526 excluded_namespaces.add(6) 527 if 'subcat' not in member_type: 528 excluded_namespaces.add(14) 529 530 if namespaces: 531 if excluded_namespaces.intersection(namespaces): 532 raise ValueError( 533 'incompatible namespaces {!r} and member_type {!r}' 534 .format(namespaces, member_type)) 535 # All excluded namespaces are not present in `namespaces`. 536 else: 537 # If the number of namespaces is greater than permitted by 538 # the API, it will issue a warning and use the namespaces 539 # up until the limit, which will usually be sufficient. 540 # TODO: QueryGenerator should detect when the number of 541 # namespaces requested is higher than available, and split 542 # the request into several batches. 543 excluded_namespaces.update([-1, -2]) 544 namespaces = set(self.namespaces) - excluded_namespaces 545 else: 546 if 'file' in member_type: 547 namespaces.add(6) 548 if 'subcat' in member_type: 549 namespaces.add(14) 550 551 member_type = None 552 553 if member_type: 554 cmargs['gcmtype'] = member_type 555 556 if reverse: 557 cmargs['gcmdir'] = 'desc' 558 # API wants start/end params in opposite order if using descending 559 # sort; we take care of this reversal for the user 560 starttime, endtime = endtime, starttime 561 startprefix, endprefix = endprefix, startprefix 562 563 if starttime and sortby == 'timestamp': 564 cmargs['gcmstart'] = starttime 565 elif starttime: 566 raise ValueError('categorymembers: ' 567 "invalid combination of 'sortby' and 'starttime'") 568 569 if endtime and sortby == 'timestamp': 570 cmargs['gcmend'] = endtime 571 elif endtime: 572 raise ValueError('categorymembers: ' 573 "invalid combination of 'sortby' and 'endtime'") 574 575 if startprefix and sortby != 'timestamp': 576 cmargs['gcmstartsortkeyprefix'] = startprefix 577 elif startprefix: 578 raise ValueError('categorymembers: invalid combination of ' 579 "'sortby' and 'startprefix'") 580 581 if endprefix and sortby != 'timestamp': 582 cmargs['gcmendsortkeyprefix'] = endprefix 583 elif endprefix: 584 raise ValueError('categorymembers: ' 585 "invalid combination of 'sortby' and 'endprefix'") 586 587 return self._generator(api.PageGenerator, namespaces=namespaces, 588 total=total, g_content=content, **cmargs) 589 590 def _rvprops(self, content: bool = False) -> List[str]: 591 """Setup rvprop items for loadrevisions and preloadpages. 592 593 :return: rvprop items 594 """ 595 props = ['comment', 'contentmodel', 'flags', 'ids', 'parsedcomment', 596 'sha1', 'size', 'tags', 'timestamp', 'user', 'userid'] 597 if content: 598 props.append('content') 599 if self.mw_version >= '1.32': 600 props.append('roles') 601 return props 602 603 @deprecated_args(getText='content', sysop=True) 604 @remove_last_args(['rollback']) 605 def loadrevisions(self, page, *, content=False, section=None, **kwargs): 606 """Retrieve revision information and store it in page object. 607 608 By default, retrieves the last (current) revision of the page, 609 unless any of the optional parameters revids, startid, endid, 610 starttime, endtime, rvdir, user, excludeuser, or total are 611 specified. Unless noted below, all parameters not specified 612 default to False. 613 614 If rvdir is False or not specified, startid must be greater than 615 endid if both are specified; likewise, starttime must be greater 616 than endtime. If rvdir is True, these relationships are reversed. 617 618 :see: https://www.mediawiki.org/wiki/API:Revisions 619 620 :param page: retrieve revisions of this Page and hold the data. 621 :type page: pywikibot.Page 622 :param content: if True, retrieve the wiki-text of each revision; 623 otherwise, only retrieve the revision metadata (default) 624 :type content: bool 625 :param section: if specified, retrieve only this section of the text 626 (content must be True); section must be given by number (top of 627 the article is section 0), not name 628 :type section: int 629 :keyword revids: retrieve only the specified revision ids (raise 630 Exception if any of revids does not correspond to page) 631 :type revids: an int, a str or a list of ints or strings 632 :keyword startid: retrieve revisions starting with this revid 633 :keyword endid: stop upon retrieving this revid 634 :keyword starttime: retrieve revisions starting at this Timestamp 635 :keyword endtime: stop upon reaching this Timestamp 636 :keyword rvdir: if false, retrieve newest revisions first (default); 637 if true, retrieve oldest first 638 :keyword user: retrieve only revisions authored by this user 639 :keyword excludeuser: retrieve all revisions not authored by this user 640 :keyword total: number of revisions to retrieve 641 :raises ValueError: invalid startid/endid or starttime/endtime values 642 :raises pywikibot.exceptions.Error: revids belonging to a different 643 page 644 """ 645 latest = all(val is None for val in kwargs.values()) 646 647 revids = kwargs.get('revids') 648 startid = kwargs.get('startid') 649 starttime = kwargs.get('starttime') 650 endid = kwargs.get('endid') 651 endtime = kwargs.get('endtime') 652 rvdir = kwargs.get('rvdir') 653 user = kwargs.get('user') 654 step = kwargs.get('step') 655 656 # check for invalid argument combinations 657 if (startid is not None or endid is not None) \ 658 and (starttime is not None or endtime is not None): 659 raise ValueError( 660 'loadrevisions: startid/endid combined with starttime/endtime') 661 662 if starttime is not None and endtime is not None: 663 if rvdir and starttime >= endtime: 664 raise ValueError( 665 'loadrevisions: starttime > endtime with rvdir=True') 666 667 if not rvdir and endtime >= starttime: 668 raise ValueError( 669 'loadrevisions: endtime > starttime with rvdir=False') 670 671 if startid is not None and endid is not None: 672 if rvdir and startid >= endid: 673 raise ValueError( 674 'loadrevisions: startid > endid with rvdir=True') 675 if not rvdir and endid >= startid: 676 raise ValueError( 677 'loadrevisions: endid > startid with rvdir=False') 678 679 rvargs = {'type_arg': 'info|revisions'} 680 rvargs['rvprop'] = self._rvprops(content=content) 681 682 if content and section is not None: 683 rvargs['rvsection'] = str(section) 684 685 if revids is None: 686 rvtitle = page.title(with_section=False).encode(self.encoding()) 687 rvargs['titles'] = rvtitle 688 else: 689 if isinstance(revids, (int, str)): 690 ids = str(revids) 691 else: 692 ids = '|'.join(str(r) for r in revids) 693 rvargs['revids'] = ids 694 695 if rvdir: 696 rvargs['rvdir'] = 'newer' 697 elif rvdir is not None: 698 rvargs['rvdir'] = 'older' 699 700 if startid: 701 rvargs['rvstartid'] = startid 702 if endid: 703 rvargs['rvendid'] = endid 704 if starttime: 705 rvargs['rvstart'] = starttime 706 if endtime: 707 rvargs['rvend'] = endtime 708 709 if user: 710 rvargs['rvuser'] = user 711 else: 712 rvargs['rvexcludeuser'] = kwargs.get('excludeuser') 713 714 # assemble API request 715 rvgen = self._generator(api.PropertyGenerator, 716 total=kwargs.get('total'), **rvargs) 717 718 if step: 719 rvgen.set_query_increment = step 720 721 if latest or 'revids' in rvgen.request: 722 rvgen.set_maximum_items(-1) # suppress use of rvlimit parameter 723 724 for pagedata in rvgen: 725 if not self.sametitle(pagedata['title'], 726 page.title(with_section=False)): 727 raise InconsistentTitleError(page, pagedata['title']) 728 if 'missing' in pagedata: 729 raise NoPageError(page) 730 api.update_page(page, pagedata, rvgen.props) 731 732 @deprecated_args(step=True) 733 def pagelanglinks(self, page, *, 734 total: Optional[int] = None, 735 include_obsolete: bool = False, 736 include_empty_titles: bool = False): 737 """Iterate all interlanguage links on page, yielding Link objects. 738 739 *New in version 6.2:* *include_empty_titles* parameter was added. 740 741 :see: https://www.mediawiki.org/wiki/API:Langlinks 742 743 :param include_obsolete: if true, yield even Link objects whose 744 site is obsolete 745 :param include_empty_titles: if true, yield even Link objects whose 746 title is empty but redirects to a site like [[en:]] 747 """ 748 lltitle = page.title(with_section=False) 749 llquery = self._generator(api.PropertyGenerator, 750 type_arg='langlinks', 751 titles=lltitle.encode(self.encoding()), 752 total=total) 753 for pageitem in llquery: 754 if not self.sametitle(pageitem['title'], lltitle): 755 raise InconsistentTitleError(page, pageitem['title']) 756 if 'langlinks' not in pageitem: 757 continue 758 for linkdata in pageitem['langlinks']: 759 link = pywikibot.Link.langlinkUnsafe(linkdata['lang'], 760 linkdata['*'], 761 source=self) 762 if link.site.obsolete and not include_obsolete: 763 continue 764 765 if link.title or include_empty_titles: 766 yield link 767 768 @deprecated_args(step=True) 769 def page_extlinks(self, page, *, total=None): 770 """Iterate all external links on page, yielding URL strings. 771 772 :see: https://www.mediawiki.org/wiki/API:Extlinks 773 """ 774 eltitle = page.title(with_section=False) 775 elquery = self._generator(api.PropertyGenerator, type_arg='extlinks', 776 titles=eltitle.encode(self.encoding()), 777 total=total) 778 for pageitem in elquery: 779 if not self.sametitle(pageitem['title'], eltitle): 780 raise InconsistentTitleError(page, pageitem['title']) 781 if 'extlinks' not in pageitem: 782 continue 783 for linkdata in pageitem['extlinks']: 784 yield linkdata['*'] 785 786 @deprecated_args(throttle=True, limit='total', step=True, 787 includeredirects='filterredir') 788 def allpages(self, start='!', prefix='', namespace=0, filterredir=None, 789 filterlanglinks=None, minsize=None, maxsize=None, 790 protect_type=None, protect_level=None, reverse=False, 791 total=None, content=False): 792 """Iterate pages in a single namespace. 793 794 :see: https://www.mediawiki.org/wiki/API:Allpages 795 796 :param start: Start at this title (page need not exist). 797 :param prefix: Only yield pages starting with this string. 798 :param namespace: Iterate pages from this (single) namespace 799 :type namespace: int or Namespace. 800 :param filterredir: if True, only yield redirects; if False (and not 801 None), only yield non-redirects (default: yield both) 802 :param filterlanglinks: if True, only yield pages with language links; 803 if False (and not None), only yield pages without language links 804 (default: yield both) 805 :param minsize: if present, only yield pages at least this many 806 bytes in size 807 :param maxsize: if present, only yield pages at most this many bytes 808 in size 809 :param protect_type: only yield pages that have a protection of the 810 specified type 811 :type protect_type: str 812 :param protect_level: only yield pages that have protection at this 813 level; can only be used if protect_type is specified 814 :param reverse: if True, iterate in reverse Unicode lexigraphic 815 order (default: iterate in forward order) 816 :param content: if True, load the current content of each iterated page 817 (default False) 818 :raises KeyError: the namespace identifier was not resolved 819 :raises TypeError: the namespace identifier has an inappropriate 820 type such as bool, or an iterable with more than one namespace 821 """ 822 # backward compatibility test 823 if filterredir not in (True, False, None): 824 old = filterredir 825 if filterredir: 826 if filterredir == 'only': 827 filterredir = True 828 else: 829 filterredir = None 830 else: 831 filterredir = False 832 warn('The value "{0!r}" for "filterredir" is deprecated; use ' 833 '{1} instead.'.format(old, filterredir), 834 DeprecationWarning, 3) 835 836 apgen = self._generator(api.PageGenerator, type_arg='allpages', 837 namespaces=namespace, 838 gapfrom=start, total=total, 839 g_content=content) 840 if prefix: 841 apgen.request['gapprefix'] = prefix 842 if filterredir is not None: 843 apgen.request['gapfilterredir'] = ('redirects' if filterredir else 844 'nonredirects') 845 if filterlanglinks is not None: 846 apgen.request['gapfilterlanglinks'] = ('withlanglinks' 847 if filterlanglinks else 848 'withoutlanglinks') 849 if isinstance(minsize, int): 850 apgen.request['gapminsize'] = str(minsize) 851 if isinstance(maxsize, int): 852 apgen.request['gapmaxsize'] = str(maxsize) 853 if isinstance(protect_type, str): 854 apgen.request['gapprtype'] = protect_type 855 if isinstance(protect_level, str): 856 apgen.request['gapprlevel'] = protect_level 857 if reverse: 858 apgen.request['gapdir'] = 'descending' 859 return apgen 860 861 @deprecated_args(step=True) 862 def alllinks(self, start='!', prefix='', namespace=0, unique=False, 863 fromids=False, total=None): 864 """Iterate all links to pages (which need not exist) in one namespace. 865 866 Note that, in practice, links that were found on pages that have 867 been deleted may not have been removed from the links table, so this 868 method can return false positives. 869 870 :see: https://www.mediawiki.org/wiki/API:Alllinks 871 872 :param start: Start at this title (page need not exist). 873 :param prefix: Only yield pages starting with this string. 874 :param namespace: Iterate pages from this (single) namespace 875 :type namespace: int or Namespace 876 :param unique: If True, only iterate each link title once (default: 877 iterate once for each linking page) 878 :param fromids: if True, include the pageid of the page containing 879 each link (default: False) as the '_fromid' attribute of the Page; 880 cannot be combined with unique 881 :raises KeyError: the namespace identifier was not resolved 882 :raises TypeError: the namespace identifier has an inappropriate 883 type such as bool, or an iterable with more than one namespace 884 """ 885 if unique and fromids: 886 raise Error('alllinks: unique and fromids cannot both be True.') 887 algen = self._generator(api.ListGenerator, type_arg='alllinks', 888 namespaces=namespace, alfrom=start, 889 total=total, alunique=unique) 890 if prefix: 891 algen.request['alprefix'] = prefix 892 if fromids: 893 algen.request['alprop'] = 'title|ids' 894 for link in algen: 895 p = pywikibot.Page(self, link['title'], link['ns']) 896 if fromids: 897 p._fromid = link['fromid'] 898 yield p 899 900 @deprecated_args(step=True) 901 def allcategories(self, start='!', prefix='', total=None, 902 reverse=False, content=False): 903 """Iterate categories used (which need not have a Category page). 904 905 Iterator yields Category objects. Note that, in practice, links that 906 were found on pages that have been deleted may not have been removed 907 from the database table, so this method can return false positives. 908 909 :see: https://www.mediawiki.org/wiki/API:Allcategories 910 911 :param start: Start at this category title (category need not exist). 912 :param prefix: Only yield categories starting with this string. 913 :param reverse: if True, iterate in reverse Unicode lexigraphic 914 order (default: iterate in forward order) 915 :param content: if True, load the current content of each iterated page 916 (default False); note that this means the contents of the category 917 description page, not the pages that are members of the category 918 """ 919 acgen = self._generator(api.PageGenerator, 920 type_arg='allcategories', gacfrom=start, 921 total=total, g_content=content) 922 if prefix: 923 acgen.request['gacprefix'] = prefix 924 if reverse: 925 acgen.request['gacdir'] = 'descending' 926 return acgen 927 928 @deprecated_args(step=True) 929 def botusers(self, total=None): 930 """Iterate bot users. 931 932 Iterated values are dicts containing 'name', 'userid', 'editcount', 933 'registration', and 'groups' keys. 'groups' will be present only if 934 the user is a member of at least 1 group, and will be a list of 935 str; all the other values are str and should always be present. 936 """ 937 if not hasattr(self, '_bots'): 938 self._bots = {} 939 940 if not self._bots: 941 for item in self.allusers(group='bot', total=total): 942 self._bots.setdefault(item['name'], item) 943 944 yield from self._bots.values() 945 946 @deprecated_args(step=True) 947 def allusers(self, start='!', prefix='', group=None, total=None): 948 """Iterate registered users, ordered by username. 949 950 Iterated values are dicts containing 'name', 'editcount', 951 'registration', and (sometimes) 'groups' keys. 'groups' will be 952 present only if the user is a member of at least 1 group, and 953 will be a list of str; all the other values are str and should 954 always be present. 955 956 :see: https://www.mediawiki.org/wiki/API:Allusers 957 958 :param start: start at this username (name need not exist) 959 :param prefix: only iterate usernames starting with this substring 960 :param group: only iterate users that are members of this group 961 :type group: str 962 """ 963 augen = self._generator(api.ListGenerator, type_arg='allusers', 964 auprop='editcount|groups|registration', 965 aufrom=start, total=total) 966 if prefix: 967 augen.request['auprefix'] = prefix 968 if group: 969 augen.request['augroup'] = group 970 return augen 971 972 @deprecated_args(step=True) 973 def allimages(self, start='!', prefix='', minsize=None, maxsize=None, 974 reverse=False, sha1=None, sha1base36=None, 975 total=None, content=False): 976 """Iterate all images, ordered by image title. 977 978 Yields FilePages, but these pages need not exist on the wiki. 979 980 :see: https://www.mediawiki.org/wiki/API:Allimages 981 982 :param start: start at this title (name need not exist) 983 :param prefix: only iterate titles starting with this substring 984 :param minsize: only iterate images of at least this many bytes 985 :param maxsize: only iterate images of no more than this many bytes 986 :param reverse: if True, iterate in reverse lexigraphic order 987 :param sha1: only iterate image (it is theoretically possible there 988 could be more than one) with this sha1 hash 989 :param sha1base36: same as sha1 but in base 36 990 :param content: if True, load the current content of each iterated page 991 (default False); note that this means the content of the image 992 description page, not the image itself 993 """ 994 aigen = self._generator(api.PageGenerator, 995 type_arg='allimages', gaifrom=start, 996 total=total, g_content=content) 997 if prefix: 998 aigen.request['gaiprefix'] = prefix 999 if isinstance(minsize, int): 1000 aigen.request['gaiminsize'] = str(minsize) 1001 if isinstance(maxsize, int): 1002 aigen.request['gaimaxsize'] = str(maxsize) 1003 if reverse: 1004 aigen.request['gaidir'] = 'descending' 1005 if sha1: 1006 aigen.request['gaisha1'] = sha1 1007 if sha1base36: 1008 aigen.request['gaisha1base36'] = sha1base36 1009 return aigen 1010 1011 @deprecated_args(limit='total') # ignore falimit setting 1012 def filearchive(self, start=None, end=None, reverse=False, total=None, 1013 **kwargs): 1014 """Iterate archived files. 1015 1016 Yields dict of file archive informations. 1017 1018 :see: https://www.mediawiki.org/wiki/API:filearchive 1019 1020 :param start: start at this title (name need not exist) 1021 :param end: end at this title (name need not exist) 1022 :param reverse: if True, iterate in reverse lexigraphic order 1023 :param total: maximum number of pages to retrieve in total 1024 :keyword prefix: only iterate titles starting with this substring 1025 :keyword sha1: only iterate image with this sha1 hash 1026 :keyword sha1base36: same as sha1 but in base 36 1027 :keyword prop: Image information to get. Default is timestamp 1028 """ 1029 if start and end: 1030 self.assert_valid_iter_params( 1031 'filearchive', start, end, reverse, is_ts=False) 1032 fagen = self._generator(api.ListGenerator, 1033 type_arg='filearchive', 1034 fafrom=start, 1035 fato=end, 1036 total=total) 1037 for k, v in kwargs.items(): 1038 fagen.request['fa' + k] = v 1039 if reverse: 1040 fagen.request['fadir'] = 'descending' 1041 return fagen 1042 1043 @deprecated_args(step=True) 1044 def blocks(self, starttime=None, endtime=None, reverse=False, 1045 blockids=None, users=None, iprange: Optional[str] = None, 1046 total: Optional[int] = None): 1047 """Iterate all current blocks, in order of creation. 1048 1049 The iterator yields dicts containing keys corresponding to the 1050 block properties. 1051 1052 :see: https://www.mediawiki.org/wiki/API:Blocks 1053 1054 :note: logevents only logs user blocks, while this method 1055 iterates all blocks including IP ranges. 1056 :note: ``iprange`` parameter cannot be used together with ``users``. 1057 1058 :param starttime: start iterating at this Timestamp 1059 :type starttime: pywikibot.Timestamp 1060 :param endtime: stop iterating at this Timestamp 1061 :type endtime: pywikibot.Timestamp 1062 :param reverse: if True, iterate oldest blocks first (default: newest) 1063 :type reverse: bool 1064 :param blockids: only iterate blocks with these id numbers. Numbers 1065 must be separated by '|' if given by a str. 1066 :type blockids: str, tuple or list 1067 :param users: only iterate blocks affecting these usernames or IPs 1068 :type users: str, tuple or list 1069 :param iprange: a single IP or an IP range. Ranges broader than 1070 IPv4/16 or IPv6/19 are not accepted. 1071 :param total: total amount of block entries 1072 """ 1073 if starttime and endtime: 1074 self.assert_valid_iter_params('blocks', starttime, endtime, 1075 reverse) 1076 bkgen = self._generator(api.ListGenerator, type_arg='blocks', 1077 total=total) 1078 bkgen.request['bkprop'] = ['id', 'user', 'by', 'timestamp', 'expiry', 1079 'reason', 'range', 'flags', 'userid'] 1080 if starttime: 1081 bkgen.request['bkstart'] = starttime 1082 if endtime: 1083 bkgen.request['bkend'] = endtime 1084 if reverse: 1085 bkgen.request['bkdir'] = 'newer' 1086 if blockids: 1087 bkgen.request['bkids'] = blockids 1088 if users: 1089 if isinstance(users, str): 1090 users = users.split('|') 1091 1092 # actual IPv6 addresses (anonymous users) are uppercase, but they 1093 # have never a :: in the username (so those are registered users) 1094 users = [user.upper() if is_ip_address(user) and '::' not in user 1095 else user for user in users] 1096 bkgen.request['bkusers'] = users 1097 elif iprange: 1098 bkgen.request['bkip'] = iprange 1099 return bkgen 1100 1101 @deprecated_args(step=True) 1102 def exturlusage(self, url: Optional[str] = None, 1103 protocol: Optional[str] = None, namespaces=None, 1104 total: Optional[int] = None, content=False): 1105 """Iterate Pages that contain links to the given URL. 1106 1107 :see: https://www.mediawiki.org/wiki/API:Exturlusage 1108 1109 :param url: The URL to search for (with or without the protocol 1110 prefix); this may include a '*' as a wildcard, only at the start 1111 of the hostname 1112 :param namespaces: list of namespace numbers to fetch contribs from 1113 :type namespaces: list of int 1114 :param total: Maximum number of pages to retrieve in total 1115 :param protocol: Protocol to search for, likely http or https, http by 1116 default. Full list shown on Special:LinkSearch wikipage 1117 """ 1118 if url is not None: 1119 found_protocol, _, url = url.rpartition('://') 1120 1121 # If url is * we make it None in order to search for every page 1122 # with any URL. 1123 if url == '*': 1124 url = None 1125 1126 if found_protocol: 1127 if protocol and protocol != found_protocol: 1128 raise ValueError('Protocol was specified, but a different ' 1129 'one was found in searched url') 1130 protocol = found_protocol 1131 1132 if not protocol: 1133 protocol = 'http' 1134 1135 return self._generator(api.PageGenerator, type_arg='exturlusage', 1136 geuquery=url, geuprotocol=protocol, 1137 namespaces=namespaces, 1138 total=total, g_content=content) 1139 1140 @deprecated_args(step=True) 1141 def imageusage(self, image, namespaces=None, filterredir=None, 1142 total=None, content=False): 1143 """Iterate Pages that contain links to the given FilePage. 1144 1145 :see: https://www.mediawiki.org/wiki/API:Imageusage 1146 1147 :param image: the image to search for (FilePage need not exist on 1148 the wiki) 1149 :type image: pywikibot.FilePage 1150 :param namespaces: If present, only iterate pages in these namespaces 1151 :type namespaces: iterable of str or Namespace key, 1152 or a single instance of those types. May be a '|' separated 1153 list of namespace identifiers. 1154 :param filterredir: if True, only yield redirects; if False (and not 1155 None), only yield non-redirects (default: yield both) 1156 :param content: if True, load the current content of each iterated page 1157 (default False) 1158 :raises KeyError: a namespace identifier was not resolved 1159 :raises TypeError: a namespace identifier has an inappropriate 1160 type such as NoneType or bool 1161 """ 1162 iuargs = {'giutitle': image.title(with_section=False)} 1163 if filterredir is not None: 1164 iuargs['giufilterredir'] = ('redirects' if filterredir else 1165 'nonredirects') 1166 return self._generator(api.PageGenerator, type_arg='imageusage', 1167 namespaces=namespaces, 1168 total=total, g_content=content, **iuargs) 1169 1170 @deprecated_args(step=True) 1171 def logevents(self, logtype: Optional[str] = None, 1172 user: Optional[str] = None, page=None, 1173 namespace=None, start=None, end=None, 1174 reverse: bool = False, tag: Optional[str] = None, 1175 total: Optional[int] = None): 1176 """Iterate all log entries. 1177 1178 :see: https://www.mediawiki.org/wiki/API:Logevents 1179 1180 :note: logevents with logtype='block' only logs user blocks whereas 1181 site.blocks iterates all blocks including IP ranges. 1182 1183 :param logtype: only iterate entries of this type 1184 (see mediawiki api documentation for available types) 1185 :param user: only iterate entries that match this user name 1186 :param page: only iterate entries affecting this page 1187 :type page: pywikibot.Page or str 1188 :param namespace: namespace(s) to retrieve logevents from 1189 :type namespace: int or Namespace or an iterable of them 1190 :note: due to an API limitation, if namespace param contains multiple 1191 namespaces, log entries from all namespaces will be fetched from 1192 the API and will be filtered later during iteration. 1193 :param start: only iterate entries from and after this Timestamp 1194 :type start: Timestamp or ISO date string 1195 :param end: only iterate entries up to and through this Timestamp 1196 :type end: Timestamp or ISO date string 1197 :param reverse: if True, iterate oldest entries first (default: newest) 1198 :param tag: only iterate entries tagged with this tag 1199 :param total: maximum number of events to iterate 1200 :rtype: iterable 1201 1202 :raises KeyError: the namespace identifier was not resolved 1203 :raises TypeError: the namespace identifier has an inappropriate 1204 type such as bool, or an iterable with more than one namespace 1205 """ 1206 if start and end: 1207 self.assert_valid_iter_params('logevents', start, end, reverse) 1208 1209 legen = self._generator(api.LogEntryListGenerator, type_arg=logtype, 1210 total=total) 1211 if logtype is not None: 1212 legen.request['letype'] = logtype 1213 if user is not None: 1214 legen.request['leuser'] = user 1215 if page is not None: 1216 legen.request['letitle'] = page 1217 if start is not None: 1218 legen.request['lestart'] = start 1219 if end is not None: 1220 legen.request['leend'] = end 1221 if reverse: 1222 legen.request['ledir'] = 'newer' 1223 if namespace is not None: 1224 legen.set_namespace(namespace) 1225 if tag: 1226 legen.request['letag'] = tag 1227 1228 return legen 1229 1230 @deprecated_args(includeredirects='redirect', namespace='namespaces', 1231 number='total', rcend='end', rclimit='total', 1232 rcnamespace='namespaces', rcstart='start', 1233 rctype='changetype', showAnon='anon', showBot='bot', 1234 showMinor='minor', showPatrolled='patrolled', 1235 showRedirects='redirect', topOnly='top_only') 1236 def recentchanges(self, *, 1237 start=None, 1238 end=None, 1239 reverse: bool = False, 1240 namespaces=None, 1241 changetype: Optional[str] = None, 1242 minor: Optional[bool] = None, 1243 bot: Optional[bool] = None, 1244 anon: Optional[bool] = None, 1245 redirect: Optional[bool] = None, 1246 patrolled: Optional[bool] = None, 1247 top_only: bool = False, 1248 total: Optional[int] = None, 1249 user: Union[str, List[str], None] = None, 1250 excludeuser: Union[str, List[str], None] = None, 1251 tag: Optional[str] = None): 1252 """Iterate recent changes. 1253 1254 :see: https://www.mediawiki.org/wiki/API:RecentChanges 1255 1256 :param start: Timestamp to start listing from 1257 :type start: pywikibot.Timestamp 1258 :param end: Timestamp to end listing at 1259 :type end: pywikibot.Timestamp 1260 :param reverse: if True, start with oldest changes (default: newest) 1261 :param namespaces: only iterate pages in these namespaces 1262 :type namespaces: iterable of str or Namespace key, 1263 or a single instance of those types. May be a '|' separated 1264 list of namespace identifiers. 1265 :param changetype: only iterate changes of this type ("edit" for 1266 edits to existing pages, "new" for new pages, "log" for log 1267 entries) 1268 :param minor: if True, only list minor edits; if False, only list 1269 non-minor edits; if None, list all 1270 :param bot: if True, only list bot edits; if False, only list 1271 non-bot edits; if None, list all 1272 :param anon: if True, only list anon edits; if False, only list 1273 non-anon edits; if None, list all 1274 :param redirect: if True, only list edits to redirect pages; if 1275 False, only list edits to non-redirect pages; if None, list all 1276 :param patrolled: if True, only list patrolled edits; if False, 1277 only list non-patrolled edits; if None, list all 1278 :param top_only: if True, only list changes that are the latest 1279 revision (default False) 1280 :param user: if not None, only list edits by this user or users 1281 :param excludeuser: if not None, exclude edits by this user or users 1282 :param tag: a recent changes tag 1283 :raises KeyError: a namespace identifier was not resolved 1284 :raises TypeError: a namespace identifier has an inappropriate 1285 type such as NoneType or bool 1286 """ 1287 if start and end: 1288 self.assert_valid_iter_params('recentchanges', start, end, reverse) 1289 1290 rcgen = self._generator(api.ListGenerator, type_arg='recentchanges', 1291 rcprop='user|comment|timestamp|title|ids' 1292 '|sizes|redirect|loginfo|flags|tags', 1293 namespaces=namespaces, 1294 total=total, rctoponly=top_only) 1295 if start is not None: 1296 rcgen.request['rcstart'] = start 1297 if end is not None: 1298 rcgen.request['rcend'] = end 1299 if reverse: 1300 rcgen.request['rcdir'] = 'newer' 1301 if changetype: 1302 rcgen.request['rctype'] = changetype 1303 filters = {'minor': minor, 1304 'bot': bot, 1305 'anon': anon, 1306 'redirect': redirect, 1307 } 1308 if patrolled is not None and ( 1309 self.has_right('patrol') or self.has_right('patrolmarks')): 1310 rcgen.request['rcprop'] += ['patrolled'] 1311 filters['patrolled'] = patrolled 1312 rcgen.request['rcshow'] = api.OptionSet(self, 'recentchanges', 'show', 1313 filters) 1314 1315 if user: 1316 rcgen.request['rcuser'] = user 1317 1318 if excludeuser: 1319 rcgen.request['rcexcludeuser'] = excludeuser 1320 rcgen.request['rctag'] = tag 1321 return rcgen 1322 1323 @deprecated_args(number='total', step=True, key='searchstring', 1324 getredirects=True, get_redirects=True) 1325 def search(self, searchstring: str, *, 1326 namespaces=None, 1327 where: str = 'text', 1328 total: Optional[int] = None, 1329 content: bool = False): 1330 """Iterate Pages that contain the searchstring. 1331 1332 Note that this may include non-existing Pages if the wiki's database 1333 table contains outdated entries. 1334 1335 :see: https://www.mediawiki.org/wiki/API:Search 1336 1337 :param searchstring: the text to search for 1338 :param where: Where to search; value must be "text", "title" or 1339 "nearmatch" (many wikis do not support title or nearmatch search) 1340 :param namespaces: search only in these namespaces (defaults to all) 1341 :type namespaces: iterable of str or Namespace key, 1342 or a single instance of those types. May be a '|' separated 1343 list of namespace identifiers. 1344 :param content: if True, load the current content of each iterated page 1345 (default False) 1346 :raises KeyError: a namespace identifier was not resolved 1347 :raises TypeError: a namespace identifier has an inappropriate 1348 type such as NoneType or bool 1349 """ 1350 where_types = ['nearmatch', 'text', 'title', 'titles'] 1351 if not searchstring: 1352 raise Error('search: searchstring cannot be empty') 1353 if where not in where_types: 1354 raise Error("search: unrecognized 'where' value: {}".format(where)) 1355 if where in ('title', 'titles'): 1356 if where == 'titles': 1357 issue_deprecation_warning("where='titles'", "where='title'", 1358 since='20160224') 1359 where = 'title' 1360 1361 if self.has_extension('CirrusSearch') and \ 1362 isinstance(self.family, pywikibot.family.WikimediaFamily): 1363 # 'title' search was disabled, use intitle instead 1364 searchstring = 'intitle:' + searchstring 1365 issue_deprecation_warning( 1366 "where='{}'".format(where), 1367 "searchstring='{}'".format(searchstring), 1368 since='20160224') 1369 1370 where = None # default 1371 1372 if not namespaces and namespaces != 0: 1373 namespaces = [ns_id for ns_id in self.namespaces if ns_id >= 0] 1374 srgen = self._generator(api.PageGenerator, type_arg='search', 1375 gsrsearch=searchstring, gsrwhat=where, 1376 namespaces=namespaces, 1377 total=total, g_content=content) 1378 return srgen 1379 1380 @deprecated_args(step=True, showMinor='minor') 1381 def usercontribs(self, user=None, userprefix=None, start=None, end=None, 1382 reverse=False, namespaces=None, minor=None, 1383 total: Optional[int] = None, top_only=False): 1384 """Iterate contributions by a particular user. 1385 1386 Iterated values are in the same format as recentchanges. 1387 1388 :see: https://www.mediawiki.org/wiki/API:Usercontribs 1389 1390 :param user: Iterate contributions by this user (name or IP) 1391 :param userprefix: Iterate contributions by all users whose names 1392 or IPs start with this substring 1393 :param start: Iterate contributions starting at this Timestamp 1394 :param end: Iterate contributions ending at this Timestamp 1395 :param reverse: Iterate oldest contributions first (default: newest) 1396 :param namespaces: only iterate pages in these namespaces 1397 :type namespaces: iterable of str or Namespace key, 1398 or a single instance of those types. May be a '|' separated 1399 list of namespace identifiers. 1400 :param minor: if True, iterate only minor edits; if False and 1401 not None, iterate only non-minor edits (default: iterate both) 1402 :param total: limit result to this number of pages 1403 :param top_only: if True, iterate only edits which are the latest 1404 revision (default: False) 1405 :raises pywikibot.exceptions.Error: either user or userprefix must be 1406 non-empty 1407 :raises KeyError: a namespace identifier was not resolved 1408 :raises TypeError: a namespace identifier has an inappropriate 1409 type such as NoneType or bool 1410 """ 1411 if not (user or userprefix): 1412 raise Error( 1413 'usercontribs: either user or userprefix must be non-empty') 1414 1415 if start and end: 1416 self.assert_valid_iter_params('usercontribs', start, end, reverse) 1417 1418 ucgen = self._generator(api.ListGenerator, type_arg='usercontribs', 1419 ucprop='ids|title|timestamp|comment|flags', 1420 namespaces=namespaces, 1421 total=total, uctoponly=top_only) 1422 if user: 1423 ucgen.request['ucuser'] = user 1424 if userprefix: 1425 ucgen.request['ucuserprefix'] = userprefix 1426 if start is not None: 1427 ucgen.request['ucstart'] = str(start) 1428 if end is not None: 1429 ucgen.request['ucend'] = str(end) 1430 if reverse: 1431 ucgen.request['ucdir'] = 'newer' 1432 option_set = api.OptionSet(self, 'usercontribs', 'show') 1433 option_set['minor'] = minor 1434 ucgen.request['ucshow'] = option_set 1435 return ucgen 1436 1437 @deprecated_args(step=True, showMinor='minor', showAnon='anon', 1438 showBot='bot') 1439 def watchlist_revs(self, start=None, end=None, reverse=False, 1440 namespaces=None, minor=None, bot=None, 1441 anon=None, total=None): 1442 """Iterate revisions to pages on the bot user's watchlist. 1443 1444 Iterated values will be in same format as recentchanges. 1445 1446 :see: https://www.mediawiki.org/wiki/API:Watchlist 1447 1448 :param start: Iterate revisions starting at this Timestamp 1449 :param end: Iterate revisions ending at this Timestamp 1450 :param reverse: Iterate oldest revisions first (default: newest) 1451 :param namespaces: only iterate pages in these namespaces 1452 :type namespaces: iterable of str or Namespace key, 1453 or a single instance of those types. May be a '|' separated 1454 list of namespace identifiers. 1455 :param minor: if True, only list minor edits; if False (and not 1456 None), only list non-minor edits 1457 :param bot: if True, only list bot edits; if False (and not 1458 None), only list non-bot edits 1459 :param anon: if True, only list anon edits; if False (and not 1460 None), only list non-anon edits 1461 :raises KeyError: a namespace identifier was not resolved 1462 :raises TypeError: a namespace identifier has an inappropriate 1463 type such as NoneType or bool 1464 """ 1465 if start and end: 1466 self.assert_valid_iter_params( 1467 'watchlist_revs', start, end, reverse) 1468 1469 wlgen = self._generator( 1470 api.ListGenerator, type_arg='watchlist', 1471 wlprop='user|comment|timestamp|title|ids|flags', 1472 wlallrev='', namespaces=namespaces, total=total) 1473 # TODO: allow users to ask for "patrol" as well? 1474 if start is not None: 1475 wlgen.request['wlstart'] = start 1476 if end is not None: 1477 wlgen.request['wlend'] = end 1478 if reverse: 1479 wlgen.request['wldir'] = 'newer' 1480 filters = {'minor': minor, 'bot': bot, 'anon': anon} 1481 wlgen.request['wlshow'] = api.OptionSet(self, 'watchlist', 'show', 1482 filters) 1483 return wlgen 1484 1485 def _check_view_deleted(self, msg_prefix: str, prop: List[str]) -> None: 1486 """Check if the user can view deleted comments and content. 1487 1488 :param msg_prefix: The calling method name 1489 :param prop: Requested props to check 1490 :raises UserRightsError: user cannot view a requested prop 1491 """ 1492 err = '{}: User:{} not authorized to view '.format(msg_prefix, 1493 self.user()) 1494 if not self.has_right('deletedhistory'): 1495 if self.mw_version < '1.34': 1496 raise UserRightsError(err + 'deleted revisions.') 1497 if 'comment' in prop or 'parsedcomment' in prop: 1498 raise UserRightsError(err + 'comments of deleted revisions.') 1499 if ('content' in prop and not (self.has_right('deletedtext') 1500 or self.has_right('undelete'))): 1501 raise UserRightsError(err + 'deleted content.') 1502 1503 @deprecated_args(step=True, get_text='content', page='titles', 1504 limit='total') 1505 def deletedrevs(self, titles=None, start=None, end=None, 1506 reverse: bool = False, 1507 content=False, total=None, **kwargs): 1508 """Iterate deleted revisions. 1509 1510 Each value returned by the iterator will be a dict containing the 1511 'title' and 'ns' keys for a particular Page and a 'revisions' key 1512 whose value is a list of revisions in the same format as 1513 recentchanges plus a 'content' element with key '*' if requested 1514 when 'content' parameter is set. For older wikis a 'token' key is 1515 also given with the content request. 1516 1517 :see: https://www.mediawiki.org/wiki/API:Deletedrevisions 1518 1519 :param titles: The page titles to check for deleted revisions 1520 :type titles: str (multiple titles delimited with '|') 1521 or pywikibot.Page or typing.Iterable[pywikibot.Page] 1522 or typing.Iterable[str] 1523 :keyword revids: Get revisions by their ID 1524 1525 :note: either titles or revids must be set but not both 1526 1527 :param start: Iterate revisions starting at this Timestamp 1528 :param end: Iterate revisions ending at this Timestamp 1529 :param reverse: Iterate oldest revisions first (default: newest) 1530 :param content: If True, retrieve the content of each revision 1531 :param total: number of revisions to retrieve 1532 :keyword user: List revisions by this user 1533 :keyword excludeuser: Exclude revisions by this user 1534 :keyword tag: Only list revision tagged with this tag 1535 :keyword prop: Which properties to get. Defaults are ids, user, 1536 comment, flags and timestamp 1537 """ 1538 def handle_props(props): 1539 """Translate deletedrev props to deletedrevisions props.""" 1540 if isinstance(props, str): 1541 props = props.split('|') 1542 if self.mw_version >= '1.25': 1543 return props 1544 1545 old_props = [] 1546 for item in props: 1547 if item == 'ids': 1548 old_props += ['revid', 'parentid'] 1549 elif item == 'flags': 1550 old_props.append('minor') 1551 elif item != 'timestamp': 1552 old_props.append(item) 1553 if item == 'content' and self.mw_version < '1.24': 1554 old_props.append('token') 1555 return old_props 1556 1557 # set default properties 1558 prop = kwargs.pop('prop', 1559 ['ids', 'user', 'comment', 'flags', 'timestamp']) 1560 if content: 1561 prop.append('content') 1562 1563 if start and end: 1564 self.assert_valid_iter_params('deletedrevs', start, end, reverse) 1565 1566 self._check_view_deleted('deletedrevs', prop) 1567 1568 revids = kwargs.pop('revids', None) 1569 if not (bool(titles) ^ (revids is not None)): 1570 raise Error('deletedrevs: either "titles" or "revids" parameter ' 1571 'must be given.') 1572 if revids and self.mw_version < '1.25': 1573 raise NotImplementedError( 1574 'deletedrevs: "revid" is not implemented with MediaWiki {}' 1575 .format(self.mw_version)) 1576 1577 if self.mw_version >= '1.25': 1578 pre = 'drv' 1579 type_arg = 'deletedrevisions' 1580 generator = api.PropertyGenerator 1581 else: 1582 pre = 'dr' 1583 type_arg = 'deletedrevs' 1584 generator = api.ListGenerator 1585 1586 gen = self._generator(generator, type_arg=type_arg, 1587 titles=titles, revids=revids, 1588 total=total) 1589 1590 gen.request[pre + 'start'] = start 1591 gen.request[pre + 'end'] = end 1592 gen.request[pre + 'prop'] = handle_props(prop) 1593 1594 # handle other parameters like user 1595 for k, v in kwargs.items(): 1596 gen.request[pre + k] = v 1597 1598 if reverse: 1599 gen.request[pre + 'dir'] = 'newer' 1600 1601 if self.mw_version < '1.25': 1602 yield from gen 1603 1604 else: 1605 # The dict result is different for both generators 1606 for data in gen: 1607 with suppress(KeyError): 1608 data['revisions'] = data.pop('deletedrevisions') 1609 yield data 1610 1611 @need_version('1.25') 1612 def alldeletedrevisions( 1613 self, 1614 *, 1615 namespaces=None, 1616 reverse: bool = False, 1617 content: bool = False, 1618 total: Optional[int] = None, 1619 **kwargs 1620 ) -> typing.Iterable[Dict[str, Any]]: 1621 """ 1622 Iterate all deleted revisions. 1623 1624 :see: https://www.mediawiki.org/wiki/API:Alldeletedrevisions 1625 1626 :param namespaces: Only iterate pages in these namespaces 1627 :type namespaces: iterable of str or Namespace key, 1628 or a single instance of those types. May be a '|' separated 1629 list of namespace identifiers. 1630 :param reverse: Iterate oldest revisions first (default: newest) 1631 :param content: If True, retrieve the content of each revision 1632 :param total: Number of revisions to retrieve 1633 :keyword from: Start listing at this title 1634 :keyword to: Stop listing at this title 1635 :keyword prefix: Search for all page titles that begin with this value 1636 :keyword excludeuser: Exclude revisions by this user 1637 :keyword tag: Only list revisions tagged with this tag 1638 :keyword user: List revisions by this user 1639 :keyword start: Iterate revisions starting at this Timestamp 1640 :keyword end: Iterate revisions ending at this Timestamp 1641 :keyword prop: Which properties to get. Defaults are ids, timestamp, 1642 flags, user, and comment (if you have the right to view). 1643 :type prop: List[str] 1644 """ 1645 if 'start' in kwargs and 'end' in kwargs: 1646 self.assert_valid_iter_params('alldeletedrevisions', 1647 kwargs['start'], 1648 kwargs['end'], 1649 reverse) 1650 prop = kwargs.pop('prop', []) 1651 parameters = {'adr' + k: v for k, v in kwargs.items()} 1652 if not prop: 1653 prop = ['ids', 'timestamp', 'flags', 'user'] 1654 if self.has_right('deletedhistory'): 1655 prop.append('comment') 1656 if content: 1657 prop.append('content') 1658 self._check_view_deleted('alldeletedrevisions', prop) 1659 parameters['adrprop'] = prop 1660 if reverse: 1661 parameters['adrdir'] = 'newer' 1662 yield from self._generator(api.ListGenerator, 1663 type_arg='alldeletedrevisions', 1664 namespaces=namespaces, 1665 total=total, 1666 parameters=parameters) 1667 1668 def users(self, usernames): 1669 """Iterate info about a list of users by name or IP. 1670 1671 :see: https://www.mediawiki.org/wiki/API:Users 1672 1673 :param usernames: a list of user names 1674 :type usernames: list, or other iterable, of str 1675 """ 1676 usprop = ['blockinfo', 'gender', 'groups', 'editcount', 'registration', 1677 'rights', 'emailable'] 1678 usgen = api.ListGenerator( 1679 'users', site=self, parameters={ 1680 'ususers': usernames, 'usprop': usprop}) 1681 return usgen 1682 1683 @deprecated_args(step=True) 1684 def randompages(self, total=None, namespaces=None, 1685 redirects=False, content=False): 1686 """Iterate a number of random pages. 1687 1688 :see: https://www.mediawiki.org/wiki/API:Random 1689 1690 Pages are listed in a fixed sequence, only the starting point is 1691 random. 1692 1693 :param total: the maximum number of pages to iterate 1694 :param namespaces: only iterate pages in these namespaces. 1695 :type namespaces: iterable of str or Namespace key, 1696 or a single instance of those types. May be a '|' separated 1697 list of namespace identifiers. 1698 :param redirects: if True, include only redirect pages in results, 1699 False does not include redirects and None (MW 1.26+) include both 1700 types. (default: False) 1701 :type redirects: bool or None 1702 :param content: if True, load the current content of each iterated page 1703 (default False) 1704 :raises KeyError: a namespace identifier was not resolved 1705 :raises TypeError: a namespace identifier has an inappropriate 1706 type such as NoneType or bool 1707 :raises AssertError: unsupported redirects parameter 1708 """ 1709 mapping = {False: None, True: 'redirects', None: 'all'} 1710 assert redirects in mapping 1711 redirects = mapping[redirects] 1712 params = {} 1713 if redirects is not None: 1714 if self.mw_version < '1.26': 1715 if redirects == 'all': 1716 warn("parameter redirects=None to retrieve 'all' random" 1717 'page types is not supported by mw version {}. ' 1718 'Using default.'.format(self.mw_version), 1719 UserWarning) 1720 params['grnredirect'] = redirects == 'redirects' 1721 else: 1722 params['grnfilterredir'] = redirects 1723 return self._generator(api.PageGenerator, type_arg='random', 1724 namespaces=namespaces, total=total, 1725 g_content=content, **params) 1726 1727 # TODO: implement undelete 1728 1729 _patrol_errors = { 1730 'nosuchrcid': 'There is no change with rcid {rcid}', 1731 'nosuchrevid': 'There is no change with revid {revid}', 1732 'patroldisabled': 'Patrolling is disabled on {site} wiki', 1733 'noautopatrol': 'User {user} has no permission to patrol its own ' 1734 'changes, "autopatrol" is needed', 1735 'notpatrollable': 1736 "The revision {revid} can't be patrolled as it's too old." 1737 } 1738 1739 @need_right('patrol') 1740 @deprecated_args(token=True) 1741 def patrol(self, rcid=None, revid=None, revision=None): 1742 """Return a generator of patrolled pages. 1743 1744 :see: https://www.mediawiki.org/wiki/API:Patrol 1745 1746 Pages to be patrolled are identified by rcid, revid or revision. 1747 At least one of the parameters is mandatory. 1748 See https://www.mediawiki.org/wiki/API:Patrol. 1749 1750 :param rcid: an int/string/iterable/iterator providing rcid of pages 1751 to be patrolled. 1752 :type rcid: iterable/iterator which returns a number or string which 1753 contains only digits; it also supports a string (as above) or int 1754 :param revid: an int/string/iterable/iterator providing revid of pages 1755 to be patrolled. 1756 :type revid: iterable/iterator which returns a number or string which 1757 contains only digits; it also supports a string (as above) or int. 1758 :param revision: an Revision/iterable/iterator providing Revision 1759 object of pages to be patrolled. 1760 :type revision: iterable/iterator which returns a Revision object; it 1761 also supports a single Revision. 1762 :rtype: iterator of dict with 'rcid', 'ns' and 'title' 1763 of the patrolled page. 1764 1765 """ 1766 # If patrol is not enabled, attr will be set the first time a 1767 # request is done. 1768 if hasattr(self, '_patroldisabled'): 1769 if self._patroldisabled: 1770 return 1771 1772 if all(_ is None for _ in [rcid, revid, revision]): 1773 raise Error('No rcid, revid or revision provided.') 1774 1775 if isinstance(rcid, (int, str)): 1776 rcid = {rcid} 1777 if isinstance(revid, (int, str)): 1778 revid = {revid} 1779 if isinstance(revision, pywikibot.page.Revision): 1780 revision = {revision} 1781 1782 # Handle param=None. 1783 rcid = rcid or set() 1784 revid = revid or set() 1785 revision = revision or set() 1786 1787 combined_revid = set(revid) | {r.revid for r in revision} 1788 1789 gen = itertools.chain( 1790 zip_longest(rcid, [], fillvalue='rcid'), 1791 zip_longest(combined_revid, [], fillvalue='revid')) 1792 1793 token = self.tokens['patrol'] 1794 1795 for idvalue, idtype in gen: 1796 req = self._request(parameters={'action': 'patrol', 1797 'token': token, 1798 idtype: idvalue}) 1799 1800 try: 1801 result = req.submit() 1802 except APIError as err: 1803 # patrol is disabled, store in attr to avoid other requests 1804 if err.code == 'patroldisabled': 1805 self._patroldisabled = True 1806 return 1807 1808 errdata = { 1809 'site': self, 1810 'user': self.user(), 1811 } 1812 errdata[idtype] = idvalue 1813 if err.code in self._patrol_errors: 1814 raise Error(self._patrol_errors[err.code] 1815 .format_map(errdata)) 1816 pywikibot.debug("protect: Unexpected error code '{}' received." 1817 .format(err.code), 1818 _logger) 1819 raise 1820 1821 yield result['patrol'] 1822 1823 @deprecated_args(number='total', repeat=True, namespace='namespaces', 1824 rcshow=True, rc_show=True, get_redirect=True, step=True, 1825 showBot='bot', showRedirects='redirect', 1826 showPatrolled='patrolled') 1827 def newpages(self, user=None, returndict=False, 1828 start=None, end=None, reverse=False, bot=False, 1829 redirect=False, excludeuser=None, 1830 patrolled=None, namespaces=None, total=None): 1831 """Yield new articles (as Page objects) from recent changes. 1832 1833 Starts with the newest article and fetches the number of articles 1834 specified in the first argument. 1835 1836 The objects yielded are dependent on parameter returndict. 1837 When true, it yields a tuple composed of a Page object and a dict of 1838 attributes. 1839 When false, it yields a tuple composed of the Page object, 1840 timestamp (str), length (int), an empty string, username or IP 1841 address (str), comment (str). 1842 1843 :param namespaces: only iterate pages in these namespaces 1844 :type namespaces: iterable of str or Namespace key, 1845 or a single instance of those types. May be a '|' separated 1846 list of namespace identifiers. 1847 :raises KeyError: a namespace identifier was not resolved 1848 :raises TypeError: a namespace identifier has an inappropriate 1849 type such as NoneType or bool 1850 """ 1851 # TODO: update docstring 1852 1853 # N.B. API still provides no way to access Special:Newpages content 1854 # directly, so we get new pages indirectly through 'recentchanges' 1855 1856 gen = self.recentchanges( 1857 start=start, end=end, reverse=reverse, 1858 namespaces=namespaces, changetype='new', user=user, 1859 excludeuser=excludeuser, bot=bot, 1860 redirect=redirect, patrolled=patrolled, 1861 total=total 1862 ) 1863 for pageitem in gen: 1864 newpage = pywikibot.Page(self, pageitem['title']) 1865 if returndict: 1866 yield (newpage, pageitem) 1867 else: 1868 yield (newpage, pageitem['timestamp'], pageitem['newlen'], 1869 '', pageitem['user'], pageitem['comment']) 1870 1871 @deprecated('APISite.logevents(logtype="upload")', since='20170619') 1872 @deprecated_args(lestart='start', leend='end', leuser='user', letitle=True, 1873 repeat=True, number='total', step=True) 1874 def newfiles(self, user=None, start=None, end=None, reverse=False, 1875 total=None): 1876 """Yield information about newly uploaded files. 1877 1878 DEPRECATED: Use logevents(logtype='upload') instead. 1879 1880 Yields a tuple of FilePage, Timestamp, user(str), comment(str). 1881 1882 N.B. the API does not provide direct access to Special:Newimages, so 1883 this is derived from the "upload" log events instead. 1884 """ 1885 for event in self.logevents(logtype='upload', user=user, 1886 start=start, end=end, reverse=reverse, 1887 total=total): 1888 filepage = event.page() 1889 date = event.timestamp() 1890 user = event.user() 1891 comment = event.comment() or '' 1892 yield (filepage, date, user, comment) 1893 1894 def querypage(self, special_page, total=True): 1895 """Yield Page objects retrieved from Special:{special_page}. 1896 1897 :see: https://www.mediawiki.org/wiki/API:Querypage 1898 1899 Generic function for all special pages supported by the site MW API. 1900 1901 :param special_page: Special page to query 1902 :param total: number of pages to return 1903 :raise AssertionError: special_page is not supported in SpecialPages. 1904 """ 1905 param = self._paraminfo.parameter('query+querypage', 'page') 1906 assert special_page in param['type'], ( 1907 '{} not in {}'.format(special_page, param['type'])) 1908 1909 return self._generator(api.PageGenerator, 1910 type_arg='querypage', gqppage=special_page, 1911 total=total) 1912 1913 @deprecated_args(number='total', step=True, repeat=True) 1914 def longpages(self, total=None): 1915 """Yield Pages and lengths from Special:Longpages. 1916 1917 Yields a tuple of Page object, length(int). 1918 1919 :param total: number of pages to return 1920 """ 1921 lpgen = self._generator(api.ListGenerator, 1922 type_arg='querypage', qppage='Longpages', 1923 total=total) 1924 for pageitem in lpgen: 1925 yield (pywikibot.Page(self, pageitem['title']), 1926 int(pageitem['value'])) 1927 1928 @deprecated_args(number='total', step=True, repeat=True) 1929 def shortpages(self, total=None): 1930 """Yield Pages and lengths from Special:Shortpages. 1931 1932 Yields a tuple of Page object, length(int). 1933 1934 :param total: number of pages to return 1935 """ 1936 spgen = self._generator(api.ListGenerator, 1937 type_arg='querypage', qppage='Shortpages', 1938 total=total) 1939 for pageitem in spgen: 1940 yield (pywikibot.Page(self, pageitem['title']), 1941 int(pageitem['value'])) 1942 1943 @deprecated_args(number='total', step=True, repeat=True) 1944 def deadendpages(self, total=None): 1945 """Yield Page objects retrieved from Special:Deadendpages. 1946 1947 :param total: number of pages to return 1948 """ 1949 return self.querypage('Deadendpages', total) 1950 1951 @deprecated_args(number='total', step=True, repeat=True) 1952 def ancientpages(self, total=None): 1953 """Yield Pages, datestamps from Special:Ancientpages. 1954 1955 :param total: number of pages to return 1956 """ 1957 apgen = self._generator(api.ListGenerator, 1958 type_arg='querypage', qppage='Ancientpages', 1959 total=total) 1960 for pageitem in apgen: 1961 yield (pywikibot.Page(self, pageitem['title']), 1962 pywikibot.Timestamp.fromISOformat(pageitem['timestamp'])) 1963 1964 @deprecated_args(number='total', step=True, repeat=True) 1965 def lonelypages(self, total=None): 1966 """Yield Pages retrieved from Special:Lonelypages. 1967 1968 :param total: number of pages to return 1969 """ 1970 return self.querypage('Lonelypages', total) 1971 1972 @deprecated_args(number='total', step=True, repeat=True) 1973 def unwatchedpages(self, total=None): 1974 """Yield Pages from Special:Unwatchedpages (requires Admin privileges). 1975 1976 :param total: number of pages to return 1977 """ 1978 return self.querypage('Unwatchedpages', total) 1979 1980 @deprecated_args(step=True) 1981 def wantedpages(self, total=None): 1982 """Yield Pages from Special:Wantedpages. 1983 1984 :param total: number of pages to return 1985 """ 1986 return self.querypage('Wantedpages', total) 1987 1988 def wantedfiles(self, total=None): 1989 """Yield Pages from Special:Wantedfiles. 1990 1991 :param total: number of pages to return 1992 """ 1993 return self.querypage('Wantedfiles', total) 1994 1995 def wantedtemplates(self, total=None): 1996 """Yield Pages from Special:Wantedtemplates. 1997 1998 :param total: number of pages to return 1999 """ 2000 return self.querypage('Wantedtemplates', total) 2001 2002 @deprecated_args(number='total', step=True, repeat=True) 2003 def wantedcategories(self, total=None): 2004 """Yield Pages from Special:Wantedcategories. 2005 2006 :param total: number of pages to return 2007 """ 2008 return self.querypage('Wantedcategories', total) 2009 2010 @deprecated_args(number='total', step=True, repeat=True) 2011 def uncategorizedcategories(self, total=None): 2012 """Yield Categories from Special:Uncategorizedcategories. 2013 2014 :param total: number of pages to return 2015 """ 2016 return self.querypage('Uncategorizedcategories', total) 2017 2018 @deprecated_args(number='total', step=True, repeat=True) 2019 def uncategorizedimages(self, total=None): 2020 """Yield FilePages from Special:Uncategorizedimages. 2021 2022 :param total: number of pages to return 2023 """ 2024 return self.querypage('Uncategorizedimages', total) 2025 2026 # synonym 2027 uncategorizedfiles = uncategorizedimages 2028 2029 @deprecated_args(number='total', step=True, repeat=True) 2030 def uncategorizedpages(self, total=None): 2031 """Yield Pages from Special:Uncategorizedpages. 2032 2033 :param total: number of pages to return 2034 """ 2035 return self.querypage('Uncategorizedpages', total) 2036 2037 @deprecated_args(number='total', step=True, repeat=True) 2038 def uncategorizedtemplates(self, total=None): 2039 """Yield Pages from Special:Uncategorizedtemplates. 2040 2041 :param total: number of pages to return 2042 """ 2043 return self.querypage('Uncategorizedtemplates', total) 2044 2045 @deprecated_args(number='total', step=True, repeat=True) 2046 def unusedcategories(self, total=None): 2047 """Yield Category objects from Special:Unusedcategories. 2048 2049 :param total: number of pages to return 2050 """ 2051 return self.querypage('Unusedcategories', total) 2052 2053 @deprecated_args(extension=True, number='total', step=True, repeat=True) 2054 def unusedfiles(self, total=None): 2055 """Yield FilePage objects from Special:Unusedimages. 2056 2057 :param total: number of pages to return 2058 """ 2059 return self.querypage('Unusedimages', total) 2060 2061 @deprecated_args(number='total', step=True, repeat=True) 2062 def withoutinterwiki(self, total=None): 2063 """Yield Pages without language links from Special:Withoutinterwiki. 2064 2065 :param total: number of pages to return 2066 """ 2067 return self.querypage('Withoutinterwiki', total) 2068 2069 @deprecated_args(step=True) 2070 def broken_redirects(self, total=None): 2071 """Yield Pages with broken redirects from Special:BrokenRedirects. 2072 2073 :param total: number of pages to return 2074 """ 2075 return self.querypage('BrokenRedirects', total) 2076 2077 @deprecated_args(step=True) 2078 def double_redirects(self, total=None): 2079 """Yield Pages with double redirects from Special:DoubleRedirects. 2080 2081 :param total: number of pages to return 2082 """ 2083 return self.querypage('DoubleRedirects', total) 2084 2085 @deprecated_args(step=True) 2086 def redirectpages(self, total=None): 2087 """Yield redirect pages from Special:ListRedirects. 2088 2089 :param total: number of pages to return 2090 """ 2091 return self.querypage('Listredirects', total) 2092 2093 @deprecated_args(lvl='level') 2094 def protectedpages(self, namespace=0, type='edit', level=False, 2095 total=None): 2096 """ 2097 Return protected pages depending on protection level and type. 2098 2099 For protection types which aren't 'create' it uses 2100 :py:obj:`APISite.allpages`, while it uses for 'create' the 2101 'query+protectedtitles' module. 2102 2103 :see: https://www.mediawiki.org/wiki/API:Protectedtitles 2104 2105 :param namespace: The searched namespace. 2106 :type namespace: int or Namespace or str 2107 :param type: The protection type to search for (default 'edit'). 2108 :type type: str 2109 :param level: The protection level (like 'autoconfirmed'). If False it 2110 shows all protection levels. 2111 :type level: str or False 2112 :return: The pages which are protected. 2113 :rtype: typing.Iterable[pywikibot.Page] 2114 """ 2115 namespaces = self.namespaces.resolve(namespace) 2116 # always assert that, so we are be sure that type could be 'create' 2117 assert 'create' in self.protection_types(), \ 2118 "'create' should be a valid protection type." 2119 if type == 'create': 2120 return self._generator( 2121 api.PageGenerator, type_arg='protectedtitles', 2122 namespaces=namespaces, gptlevel=level, total=total) 2123 return self.allpages(namespace=namespaces[0], protect_level=level, 2124 protect_type=type, total=total) 2125 2126 def pages_with_property(self, propname: str, *, 2127 total: Optional[int] = None): 2128 """Yield Page objects from Special:PagesWithProp. 2129 2130 :see: https://www.mediawiki.org/wiki/API:Pageswithprop 2131 2132 :param propname: must be a valid property. 2133 :param total: number of pages to return 2134 :return: return a generator of Page objects 2135 :rtype: iterator 2136 """ 2137 if propname not in self.get_property_names(): 2138 raise NotImplementedError( 2139 '"{}" is not a valid page property'.format(propname)) 2140 return self._generator(api.PageGenerator, type_arg='pageswithprop', 2141 gpwppropname=propname, total=total) 2142 2143 @deprecated_args(step=True, sysop=True) 2144 def watched_pages(self, force=False, total=None): 2145 """ 2146 Return watchlist. 2147 2148 :see: https://www.mediawiki.org/wiki/API:Watchlistraw 2149 2150 :param force: Reload watchlist 2151 :type force: bool 2152 :param total: if not None, limit the generator to yielding this many 2153 items in total 2154 :type total: int 2155 :return: list of pages in watchlist 2156 :rtype: list of pywikibot.Page objects 2157 """ 2158 expiry = None if force else pywikibot.config.API_config_expiry 2159 gen = api.PageGenerator(site=self, generator='watchlistraw', 2160 expiry=expiry) 2161 gen.set_maximum_items(total) 2162 return gen 2163