blob: 1458e2168314ef961b744688904c589db6fd950a [file] [log] [blame]
#!/usr/bin/env python3
"""This script can be used to delete and undelete pages en masse.
Of course, you will need an admin account on the relevant wiki.
These command line parameters can be used to specify which pages to work on:
&params;
Furthermore, the following command line parameters are supported:
-always Don't prompt to delete pages, just do it.
-summary:XYZ Set the summary message text for the edit to XYZ.
-undelete Actually undelete pages instead of deleting. Obviously
makes sense only with -page and -file.
-isorphan Alert if there are pages that link to page to be deleted
(check 'What links here'). By default it is active and
only the summary per namespace is be given. If given as
``-isorphan:n``, n pages per namespace will be shown. If
given as ``-isorphan:0``, only the summary per namespace
will be shown. If given as ``-isorphan:n``, with n < 0,
the option is disabled. This option is disregarded if
``-always`` is set.
-orphansonly: Specified namespaces. Separate multiple namespace numbers
or names with commas. Examples:
.. code:: shell
-orphansonly:0,2,4
-orphansonly:Help,MediaWiki
Note that Main ns can be indicated either with a 0 or a ',':
.. code:: shell
-orphansonly:0,1
-orphansonly:,Talk
Usage:
python pwb.py delete [-category categoryName]
Examples
--------
Delete everything in the category "To delete" without prompting:
python pwb.py delete -cat:"To delete" -always
"""
#
# (C) Pywikibot team, 2013-2024
#
# Distributed under the terms of the MIT license.
#
from __future__ import annotations
import collections
import pywikibot
from pywikibot import i18n, pagegenerators
from pywikibot.backports import DefaultDict
from pywikibot.bot import CurrentPageBot
from pywikibot.page import Page
from pywikibot.site import Namespace
from pywikibot.tools.itertools import islice_with_ellipsis
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {'&params;': pagegenerators.parameterHelp} # noqa: N816
RefTable = DefaultDict[Namespace, Page]
class PageWithRefs(Page):
"""A subclass of Page with convenience methods for reference checking.
Supports the same interface as Page, with some added methods.
"""
def __init__(self, source, title: str = '', ns=0) -> None:
"""Initializer."""
super().__init__(source, title, ns)
_cache_attrs = list(super()._cache_attrs)
_cache_attrs = tuple(_cache_attrs + ['_ref_table'])
def get_ref_table(self, *args, **kwargs) -> RefTable:
"""Build mapping table with pages which links the current page."""
ref_table = collections.defaultdict(list)
for page in self.getReferences(*args, **kwargs):
ref_table[page.namespace()].append(page)
return ref_table
@property
def ref_table(self) -> RefTable:
"""Build link reference table lazily.
This property gives a default table without any parameter set for
getReferences(), whereas self.get_ref_table() is able to accept
parameters.
"""
if not hasattr(self, '_ref_table'):
self._ref_table = self.get_ref_table()
return self._ref_table
def namespaces_with_ref_to_page(self, namespaces=None) -> set[Namespace]:
"""Check if current page has links from pages in namespaces.
If namespaces is None, all namespaces are checked.
Returns a set with namespaces where a ref to page is present.
:param namespaces: Namespace to check
:type namespaces: iterable of Namespace objects
"""
if namespaces is None:
namespaces = self.site.namespaces()
return set(namespaces) & set(self.ref_table)
class DeletionRobot(CurrentPageBot):
"""This robot allows deletion of pages en masse."""
update_options = {
'undelete': False,
'isorphan': 0,
'orphansonly': [],
}
def __init__(self, summary: str, **kwargs) -> None:
"""Initializer.
:param summary: the reason for the (un)deletion
"""
super().__init__(**kwargs)
self.summary = summary
# Upcast pages to PageWithRefs()
self.generator = (PageWithRefs(p) for p in self.generator)
def display_references(self) -> None:
"""Display pages that link to the current page, sorted per namespace.
Number of pages to display per namespace is provided by:
- self.opt.isorphan
"""
refs = self.current_page.ref_table
if not refs:
return
total = sum(len(v) for v in refs.values())
if total > 1:
pywikibot.warning(
f'There are {total} pages that link to {self.current_page}.')
else:
pywikibot.warning(
f'There is a page that links to {self.current_page}.')
show_n_pages = self.opt.isorphan
width = len(max((ns.canonical_prefix() for ns in refs), key=len))
for ns in sorted(refs):
n_pages_in_ns = len(refs[ns])
plural = '' if n_pages_in_ns == 1 else 's'
ns_name = ns.canonical_prefix() if ns != ns.MAIN else 'Main:'
ns_id = f'[{ns.id}]'
pywikibot.info(
' {0!s:<{width}} {1:>6} {2:>10} page{pl}'.format(
ns_name, ns_id, n_pages_in_ns, width=width, pl=plural))
if show_n_pages: # do not show marker if 0 pages are requested.
for page in islice_with_ellipsis(refs[ns], show_n_pages):
pywikibot.info(f' {page.title()!s}')
def skip_page(self, page) -> bool:
"""Skip the page under some conditions."""
if self.opt.undelete and page.exists():
pywikibot.info(f'Skipping: {page} already exists.')
return True
if not self.opt.undelete and not page.exists():
pywikibot.info(f'Skipping: {page} does not exist.')
return True
return super().skip_page(page)
def treat_page(self) -> None:
"""Process one page from the generator."""
if self.opt.undelete:
self.current_page.undelete(self.summary)
self.counter['undelete'] += 1
else:
if (self.opt.isorphan is not False
and not self.opt.always):
self.display_references()
if self.opt.orphansonly:
namespaces = self.opt.orphansonly
ns_with_ref = self.current_page.namespaces_with_ref_to_page(
namespaces)
ns_with_ref = sorted(ns_with_ref)
if ns_with_ref:
ns_names = ', '.join(str(ns.id) for ns in ns_with_ref)
pywikibot.info(f'Skipping: {self.current_page} is not '
f'orphan in ns: {ns_names}.')
return # Not an orphan, do not delete.
if self.current_page.site.user() is None:
self.current_page.site.login()
res = self.current_page.delete(self.summary,
not self.opt.always,
self.opt.always,
automatic_quit=True)
if res > 0:
self.counter['delete'] += 1
elif res < 0:
self.counter['marked-for-deletion'] += 1
else:
self.counter['no-action'] += 1
def main(*args: str) -> None:
"""Process command line arguments and invoke bot.
If args is an empty list, sys.argv is used.
:param args: command line arguments
"""
page_name = ''
summary = None
options = {}
pg_args = []
# read command line parameters
local_args = pywikibot.handle_args(args)
gen_factory = pagegenerators.GeneratorFactory()
mysite = pywikibot.Site()
for arg in local_args:
opt, _, value = arg.partition(':')
if opt in ('-always', '-undelete'):
options[opt[1:]] = True
elif opt == '-summary':
summary = value or pywikibot.input(
'Enter a reason for the deletion:')
elif opt == '-isorphan':
value = int(value or 0)
options[opt[1:]] = value if value >= 0 else False
elif opt == '-orphansonly':
if value:
namespaces = mysite.namespaces.resolve(value.split(','))
else:
namespaces = mysite.namespaces
options[opt[1:]] = namespaces
else:
pg_args.append(arg)
un = 'un' if 'undelete' in options else ''
for arg in pg_args:
*_, page_name = arg.partition(':')
if gen_factory.handle_arg(arg) and not summary:
if arg.startswith('-file'):
summary = i18n.twtranslate(mysite, un + 'delete-from-file')
elif page_name:
if arg.startswith(('-cat', '-subcats')):
summary = i18n.twtranslate(mysite, 'delete-from-category',
{'page': page_name})
elif arg.startswith('-links'):
summary = i18n.twtranslate(mysite,
un + 'delete-linked-pages',
{'page': page_name})
elif arg.startswith('-ref'):
summary = i18n.twtranslate(
mysite, 'delete-referring-pages', {'page': page_name})
elif arg.startswith('-imageused'):
summary = i18n.twtranslate(mysite, un + 'delete-images',
{'page': page_name})
# We are just deleting pages, so we have no need of using a preloading
# page generator to actually get the text of those pages.
generator = gen_factory.getCombinedGenerator()
if not pywikibot.bot.suggest_help(missing_generator=not generator):
if summary is None:
summary = pywikibot.input(f'Enter a reason for the {un}deletion:')
bot = DeletionRobot(summary, generator=generator, **options)
bot.run()
if __name__ == '__main__':
main()