Pereiti prie turinio

Naudotojas:VP-bot/wiktwiki.py

Puslapis iš Vikipedijos, laisvosios enciklopedijos.
#!/usr/bin/python
# -*- coding: utf-8  -*-
"""
With this tool you can add the template {{commonscat}} to categories.
The tool works by following the interwiki links. If the template is present on
another langauge page, the bot will use it.

You could probably use it at articles as well, but this isnt tested.

This bot uses pagegenerators to get a list of pages. For example to go through all categories:
commonscat.py -start:Category:!

Commonscat bot:

Take a page. Follow the interwiki's and look for the commonscat template
*Found zero templates. Done.
*Found one template. Add this template
*Found more templates. Ask the user <- still have to implement this

TODO:
*Update interwiki's at commons
*Collect all possibilities also if local wiki already has link.
*Better support for other templates (translations) / redundant templates.
*Check mode, only check pages which already have the template
*More efficient like interwiki.py
*Possibility to update other languages in the same run

"""

#
# (C) Multichill, 2008
#
# Distributed under the terms of the MIT license.
#
__version__ = '$Id: commonscat.py 6342 2009-02-12 15:51:12Z multichill $'

import wikipedia, config, pagegenerators, add_text, re

commonscatTemplates = {
    '_default': u'vikipedija',
    'lt' : u'vikipedija',
}

ignoreTemplates = {
}

def getTemplate (lang = None):
    '''
    Get the template name in a language. Expects the language code, returns the translation.
    '''
    if commonscatTemplates.has_key(lang):
        return commonscatTemplates[lang]
    else:
        return u'vikipedija'

def skipPage(page):
    '''
    Do we want to skip this page?
    '''
    if ignoreTemplates.has_key(page.site().language()):
        templatesInThePage = page.templates()
        templatesWithParams = page.templatesWithParams()
        for template in ignoreTemplates[page.site().language()]:
            if type(template) != type(tuple()):
                if template in templatesInThePage:
                    return True
            else:
                for (inPageTemplate, param) in templatesWithParams:
                    if inPageTemplate == template[0] and template[1] in param[0]:
                        return True
    return False

def updateInterwiki (wikipediaPage = None, commonsPage = None):
    '''
    Update the interwiki's at commons from a wikipedia page. The bot just replaces the interwiki links at the commons page with the interwiki's from the wikipedia page.
    This should probably be more intelligent. We could use add all the interwiki's and remove duplicates. Or only remove language links if multiple language links to the same language exist.

    This function is disabled for the moment untill i figure out what the best way is to update the interwiki's.
    '''
    interwikis = {}
    comment= u''
    interwikilist = wikipediaPage.interwiki()
    interwikilist.append(wikipediaPage)

    for interwikiPage in interwikilist:
        interwikis[interwikiPage.site()]=interwikiPage
    oldtext = commonsPage.get()
    # The commonssite object doesnt work with interwiki's
    newtext = wikipedia.replaceLanguageLinks(oldtext, interwikis, wikipedia.getSite(u'nl'))
    comment = u'Updating interwiki\'s from [[' + wikipediaPage.site().language()  + u':' + wikipediaPage.title() + u']]'

    if newtext != oldtext:
        #This doesnt seem to work. Newtext has some trailing whitespace
        wikipedia.showDiff(oldtext, newtext)
        commonsPage.put(newtext=newtext, comment=comment)


def addWiktWiki (page = None, summary = None, always = False):
    '''
    Take a page. Go to all the interwiki page looking for a commonscat template.
    When all the interwiki's links are checked and a proper category is found add it to the page.
    '''

    try:
        wikipedia.output(u'Working on ' + page.title());
    except:
        wikipedia.output(u'Except on Output');
    if getTemplate(page.site().language()) in page.templates():
        try:
            wikipedia.output(u'Vikipedija template is already on ' + page.title());
        except:
            wikipedia.output(u'Except on Output');
        currentWikt = getWiktLink (page)
        checkedWikt = checkWiktLink(currentWikt)
        if (currentWikt==checkedWikt):
            #The current commonscat link is good
            try:
                wikipedia.output(u'Vikipedija link at ' + page.title() + u' to ' + currentWikt + u' is ok');
            except:
                wikipedia.output(u'Except on Output');
            return (True, always)
        elif checkedWikt!=u'':
            #We have a new Commonscat link, replace the old one
            changeWikt (page, currentWikt, checkedWikt)
            return (True, always)
        else:
            #Commonscat link is wrong
            wiktLink = findWiktLink(page)
            if (wiktLink!=u''):
                changeWikt (page, currentWikt, wiktLink)
            #else
            #Should i remove the commonscat link?            
            
    elif skipPage(page):
        try:
            wikipedia.output("Found a template in the skip list. Skipping " + page.title());
        except:
            wikipedia.output(u'Except on Output');
    else:
        wiktLink = findWiktLink(page)
        if (wiktLink!=u''):
            newtext = re.sub(u'(?i)\=\=\s*\{\{ltv\}\}\s*\=\=',  u'{{' + getTemplate(page.site().language()) + u'|' + wiktLink + u'}}\n== {{ltv}} ==', page.get())
            comment = u'Adding Vikipedija link to [[:w:' + wiktLink + u'|' + wiktLink + u']]'
            wikipedia.showDiff(page.get(), newtext)
            page.put(newtext, comment)
##            textToAdd = u'{{' + getTemplate(page.site().language()) + u'|' + wiktLink + u'}}'
##            (success, always) = add_text.add_text(page, textToAdd, summary, None, None, always);
            return (True, always);
                               
    return (True, always);

def changeWikt (page = None, old = u'', new = u''):
    #newtext = page.get()
    #print u'{{' +  + u'|' + oldcat + u'}}'
    #print u'{{' + getTemplate(page.site().language()) + u'|' + newcat + u'}}'
    #newtext = newtext.replace(u'{{' + getTemplate(page.site().language()) + u'|' + oldcat + u'}}',
    #                u'{{' + getTemplate(page.site().language()) + u'|' + newcat + u'}}')   
    newtext = re.sub(u'(?i)\{\{' + getTemplate(page.site().language()) + u'\|?[^}]*\}\}',  u'{{' + getTemplate(page.site().language()) + u'|' + new + u'}}', page.get())
    comment = u'Changing Vikipedija link from [[:w:' + old + u'|' + old + u']] to [[:w:' + new + u'|' + new + u']]'
    wikipedia.showDiff(page.get(), newtext)
    page.put(newtext, comment)

def findWiktLink (page=None):
    pageName = page.titleWithoutNamespace()
    checkedWikt = checkWiktLink(pageName)
    if checkedWikt!=u'':
        return checkedWikt
    pageName = pageName[0].upper() + pageName[1:]
    checkedWikt = checkWiktLink(pageName)
    if checkedWikt!=u'':
        return checkedWikt
    pageName = pageName + u' (reikšmės)'
    checkedWikt = checkWiktLink(pageName)
    if checkedWikt!=u'':
        return checkedWikt
    return u''
    

def getWiktLink (wikiPage=None):
    #See if commonscat is present
    if getTemplate(wikiPage.site().language()) in wikiPage.templates():
        #Go through all the templates at the page
        for template in wikiPage.templatesWithParams():
            #We found the template and it has the parameter set.
            if ((template[0]==getTemplate(wikiPage.site().language())) and (len(template[1]) > 0)):
                return template[1][0]
        #The template is on the page, but without parameters.
        return wikiPage.titleWithoutNamespace()

    return u''

def checkWiktLink (name = ""):
    '''
    This function will retun a page object of the commons page
    If the page is a redirect this function tries to follow it.
    If the page doesnt exists the function will return None
    '''
    #wikipedia.output("getCommonscat: " + name );
    wiktPage = wikipedia.Page(wikipedia.getSite("lt", "wikipedia"), "" + name);
    #This can throw a wikipedia.BadTitle, maybe convert this to catch
    #wikipedia.BadTitle
    #wikipedia.NoPage
    #wikipedia.IsRedirectPage
    if not wiktPage.exists():
        #wikipedia.output("getCommonscat : The category doesnt exist.");
        return u''
    elif wiktPage.isRedirectPage():
        #wikipedia.output("getCommonscat : The category is a redirect");
        return checkWiktLink(wiktPage.getRedirectTarget().titleWithoutNamespace());
##    elif "Category redirect" in commonsPage.templates():
##        #wikipedia.output("getCommonscat : The category is a category redirect");
##        for template in commonsPage.templatesWithParams():
##            if ((template[0]=="Category redirect") and (len(template[1]) > 0)):
##                return checkCommonscatLink(template[1][0])
    elif wiktPage.isDisambig():
        #wikipedia.output("getCommonscat : The category is disambigu");
        return u''
    else:
        return wiktPage.titleWithoutNamespace()

def main():
    '''
    Parse the command line arguments and get a pagegenerator to work on.
    Iterate through all the pages.
    '''
    summary = u'Vikižodynas -> Vikipedija'; generator = None; checkcurrent = False; always = False
    ns = []
    ns.append(14)
    # Load a lot of default generators
    genFactory = pagegenerators.GeneratorFactory()

    for arg in wikipedia.handleArgs():
        if arg.startswith('-summary'):
            if len(arg) == 8:
                summary = wikipedia.input(u'What summary do you want to use?')
            else:
                summary = arg[9:]
        elif arg.startswith('-page'):
            if len(arg) == 5:
                generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))]
            else:
                generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])]
        elif arg.startswith('-checkcurrent'):
            checkcurrent = True
            generator = pagegenerators.NamespaceFilterPageGenerator(pagegenerators.ReferringPageGenerator(wikipedia.Page(wikipedia.getSite(), u'Template:' + getTemplate(wikipedia.getSite().language())), onlyTemplateInclusion=True), ns)
            
        elif arg == '-always':
            always = True
        else:
            genFactory.handleArg(arg)

    if not generator:
        generator = genFactory.getCombinedGenerator()
    if not generator:
        raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!')

    pregenerator = pagegenerators.PreloadingGenerator(generator)

    for page in pregenerator:
        (status, always) = addWiktWiki(page, summary, always)

if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()