Naudotojas:VP-bot/wiktwiki.py
Išvaizda
#!/usr/bin/python # -*- coding: utf-8 -*- """ With this tool you can add the template {{commonscat}} to categories. The tool works by following the interwiki links. If the template is present on another langauge page, the bot will use it. You could probably use it at articles as well, but this isnt tested. This bot uses pagegenerators to get a list of pages. For example to go through all categories: commonscat.py -start:Category:! Commonscat bot: Take a page. Follow the interwiki's and look for the commonscat template *Found zero templates. Done. *Found one template. Add this template *Found more templates. Ask the user <- still have to implement this TODO: *Update interwiki's at commons *Collect all possibilities also if local wiki already has link. *Better support for other templates (translations) / redundant templates. *Check mode, only check pages which already have the template *More efficient like interwiki.py *Possibility to update other languages in the same run """ # # (C) Multichill, 2008 # # Distributed under the terms of the MIT license. # __version__ = '$Id: commonscat.py 6342 2009-02-12 15:51:12Z multichill $' import wikipedia, config, pagegenerators, add_text, re commonscatTemplates = { '_default': u'vikipedija', 'lt' : u'vikipedija', } ignoreTemplates = { } def getTemplate (lang = None): ''' Get the template name in a language. Expects the language code, returns the translation. ''' if commonscatTemplates.has_key(lang): return commonscatTemplates[lang] else: return u'vikipedija' def skipPage(page): ''' Do we want to skip this page? ''' if ignoreTemplates.has_key(page.site().language()): templatesInThePage = page.templates() templatesWithParams = page.templatesWithParams() for template in ignoreTemplates[page.site().language()]: if type(template) != type(tuple()): if template in templatesInThePage: return True else: for (inPageTemplate, param) in templatesWithParams: if inPageTemplate == template[0] and template[1] in param[0]: return True return False def updateInterwiki (wikipediaPage = None, commonsPage = None): ''' Update the interwiki's at commons from a wikipedia page. The bot just replaces the interwiki links at the commons page with the interwiki's from the wikipedia page. This should probably be more intelligent. We could use add all the interwiki's and remove duplicates. Or only remove language links if multiple language links to the same language exist. This function is disabled for the moment untill i figure out what the best way is to update the interwiki's. ''' interwikis = {} comment= u'' interwikilist = wikipediaPage.interwiki() interwikilist.append(wikipediaPage) for interwikiPage in interwikilist: interwikis[interwikiPage.site()]=interwikiPage oldtext = commonsPage.get() # The commonssite object doesnt work with interwiki's newtext = wikipedia.replaceLanguageLinks(oldtext, interwikis, wikipedia.getSite(u'nl')) comment = u'Updating interwiki\'s from [[' + wikipediaPage.site().language() + u':' + wikipediaPage.title() + u']]' if newtext != oldtext: #This doesnt seem to work. Newtext has some trailing whitespace wikipedia.showDiff(oldtext, newtext) commonsPage.put(newtext=newtext, comment=comment) def addWiktWiki (page = None, summary = None, always = False): ''' Take a page. Go to all the interwiki page looking for a commonscat template. When all the interwiki's links are checked and a proper category is found add it to the page. ''' try: wikipedia.output(u'Working on ' + page.title()); except: wikipedia.output(u'Except on Output'); if getTemplate(page.site().language()) in page.templates(): try: wikipedia.output(u'Vikipedija template is already on ' + page.title()); except: wikipedia.output(u'Except on Output'); currentWikt = getWiktLink (page) checkedWikt = checkWiktLink(currentWikt) if (currentWikt==checkedWikt): #The current commonscat link is good try: wikipedia.output(u'Vikipedija link at ' + page.title() + u' to ' + currentWikt + u' is ok'); except: wikipedia.output(u'Except on Output'); return (True, always) elif checkedWikt!=u'': #We have a new Commonscat link, replace the old one changeWikt (page, currentWikt, checkedWikt) return (True, always) else: #Commonscat link is wrong wiktLink = findWiktLink(page) if (wiktLink!=u''): changeWikt (page, currentWikt, wiktLink) #else #Should i remove the commonscat link? elif skipPage(page): try: wikipedia.output("Found a template in the skip list. Skipping " + page.title()); except: wikipedia.output(u'Except on Output'); else: wiktLink = findWiktLink(page) if (wiktLink!=u''): newtext = re.sub(u'(?i)\=\=\s*\{\{ltv\}\}\s*\=\=', u'{{' + getTemplate(page.site().language()) + u'|' + wiktLink + u'}}\n== {{ltv}} ==', page.get()) comment = u'Adding Vikipedija link to [[:w:' + wiktLink + u'|' + wiktLink + u']]' wikipedia.showDiff(page.get(), newtext) page.put(newtext, comment) ## textToAdd = u'{{' + getTemplate(page.site().language()) + u'|' + wiktLink + u'}}' ## (success, always) = add_text.add_text(page, textToAdd, summary, None, None, always); return (True, always); return (True, always); def changeWikt (page = None, old = u'', new = u''): #newtext = page.get() #print u'{{' + + u'|' + oldcat + u'}}' #print u'{{' + getTemplate(page.site().language()) + u'|' + newcat + u'}}' #newtext = newtext.replace(u'{{' + getTemplate(page.site().language()) + u'|' + oldcat + u'}}', # u'{{' + getTemplate(page.site().language()) + u'|' + newcat + u'}}') newtext = re.sub(u'(?i)\{\{' + getTemplate(page.site().language()) + u'\|?[^}]*\}\}', u'{{' + getTemplate(page.site().language()) + u'|' + new + u'}}', page.get()) comment = u'Changing Vikipedija link from [[:w:' + old + u'|' + old + u']] to [[:w:' + new + u'|' + new + u']]' wikipedia.showDiff(page.get(), newtext) page.put(newtext, comment) def findWiktLink (page=None): pageName = page.titleWithoutNamespace() checkedWikt = checkWiktLink(pageName) if checkedWikt!=u'': return checkedWikt pageName = pageName[0].upper() + pageName[1:] checkedWikt = checkWiktLink(pageName) if checkedWikt!=u'': return checkedWikt pageName = pageName + u' (reikšmės)' checkedWikt = checkWiktLink(pageName) if checkedWikt!=u'': return checkedWikt return u'' def getWiktLink (wikiPage=None): #See if commonscat is present if getTemplate(wikiPage.site().language()) in wikiPage.templates(): #Go through all the templates at the page for template in wikiPage.templatesWithParams(): #We found the template and it has the parameter set. if ((template[0]==getTemplate(wikiPage.site().language())) and (len(template[1]) > 0)): return template[1][0] #The template is on the page, but without parameters. return wikiPage.titleWithoutNamespace() return u'' def checkWiktLink (name = ""): ''' This function will retun a page object of the commons page If the page is a redirect this function tries to follow it. If the page doesnt exists the function will return None ''' #wikipedia.output("getCommonscat: " + name ); wiktPage = wikipedia.Page(wikipedia.getSite("lt", "wikipedia"), "" + name); #This can throw a wikipedia.BadTitle, maybe convert this to catch #wikipedia.BadTitle #wikipedia.NoPage #wikipedia.IsRedirectPage if not wiktPage.exists(): #wikipedia.output("getCommonscat : The category doesnt exist."); return u'' elif wiktPage.isRedirectPage(): #wikipedia.output("getCommonscat : The category is a redirect"); return checkWiktLink(wiktPage.getRedirectTarget().titleWithoutNamespace()); ## elif "Category redirect" in commonsPage.templates(): ## #wikipedia.output("getCommonscat : The category is a category redirect"); ## for template in commonsPage.templatesWithParams(): ## if ((template[0]=="Category redirect") and (len(template[1]) > 0)): ## return checkCommonscatLink(template[1][0]) elif wiktPage.isDisambig(): #wikipedia.output("getCommonscat : The category is disambigu"); return u'' else: return wiktPage.titleWithoutNamespace() def main(): ''' Parse the command line arguments and get a pagegenerator to work on. Iterate through all the pages. ''' summary = u'Vikižodynas -> Vikipedija'; generator = None; checkcurrent = False; always = False ns = [] ns.append(14) # Load a lot of default generators genFactory = pagegenerators.GeneratorFactory() for arg in wikipedia.handleArgs(): if arg.startswith('-summary'): if len(arg) == 8: summary = wikipedia.input(u'What summary do you want to use?') else: summary = arg[9:] elif arg.startswith('-page'): if len(arg) == 5: generator = [wikipedia.Page(wikipedia.getSite(), wikipedia.input(u'What page do you want to use?'))] else: generator = [wikipedia.Page(wikipedia.getSite(), arg[6:])] elif arg.startswith('-checkcurrent'): checkcurrent = True generator = pagegenerators.NamespaceFilterPageGenerator(pagegenerators.ReferringPageGenerator(wikipedia.Page(wikipedia.getSite(), u'Template:' + getTemplate(wikipedia.getSite().language())), onlyTemplateInclusion=True), ns) elif arg == '-always': always = True else: genFactory.handleArg(arg) if not generator: generator = genFactory.getCombinedGenerator() if not generator: raise add_text.NoEnoughData('You have to specify the generator you want to use for the script!') pregenerator = pagegenerators.PreloadingGenerator(generator) for page in pregenerator: (status, always) = addWiktWiki(page, summary, always) if __name__ == "__main__": try: main() finally: wikipedia.stopme()