Naudotojas:Vpovilaitis/SarasasVertimai.py
Puslapis iš Vikižodyno, laisvojo žodyno.
#!/usr/bin/python # -*- coding: utf-8 -*- """ This is not a complete bot; rather, it is a template from which simple bots can be made. You can rename it to mybot.py, then edit it in whatever way you want. The following parameters are supported: ¶ms; -debug If given, doesn't do any real changes, but only shows what would have been changed. All other parameters will be regarded as part of the title of a single page, and the bot will only work on that single page. """ __version__ = '$Id: basic.py 7015 2009-07-03 20:28:49Z alexsh $' import os, codecs, re, time from datetime import datetime, timedelta import wikipedia, config import editarticle import pagegenerators import config # This is required for the text that is shown when you run this script # with the parameter -help. docuReplacements = { '¶ms;': pagegenerators.parameterHelp } class SarasasVertimai: # Edit summary message that should be used. # NOTE: Put a good description here, and add translations, if possible! msg = { 'ar': u'روبوت: تغيير ...', 'cs': u'Robot změnil ...', 'de': u'Bot: Ändere ...', 'en': u'Robot: Changing ...', 'fr': u'Robot: Changé ...', 'ja':u'ロボットによる:編集', 'ksh': u'Bot: Ännern ...', 'nds': u'Bot: Änderung ...', 'nl': u'Bot: wijziging ...', 'pt': u'Bot: alterando...', 'sv': u'Bot: Ändrar ...', 'zh': u'機器人:編輯.....', 'lt': u'Bot: Vertimų generavimas', } def __init__(self, generator, debug): """ Constructor. Parameters: * generator - The page generator that determines on which pages to work on. * debug - If True, doesn't do any real changes, but only shows what would have been changed. """ self.generator = generator self.debug = debug def run(self): # Set the edit summary message wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), self.msg)) for page in self.generator: self.treat(page) def treat(self, page): """ Loads the given page, does some changes, and saves it. """ kdalis = { u'bdv': u'Būdvardis', u'bdn': u'Būdinys', u'dkt': u'Daiktavardis', u'dll': u'Dalelytė', u'dlv': u'Dalyvis', u'gln': u'Galūnė', u'įvrd': u'Įvardis', u'jst': u'Jaustukas', u'jng': u'Jungtukas', u'pdlv': u'Padalyvis', u'prl': u'Prielinksnis', u'prs': u'Priesaga', u'prd': u'Priešdėlis', u'prv': u'Prieveiksmis', u'psdlv': u'Pusdalyvis', u'rad': u'Raidė', u'sant': u'Santrumpa', u'siek': u'Siekinys', u'smb': u'Simbolis', u'skm': u'Skaitmuo', u'skt': u'Skaitvardis', u'škn': u'Šaknis', u'jung': u'Žodžių junginys', u'vks': u'Veiksmažodis', } kkodas = { u'būdvardžiai': u'bdv', u'būdiniai': u'bdn', u'daiktavardžiai': u'dkt', u'dalelytės': u'dll', u'dalyviai': u'dlv', u'galūnės': u'gln', u'įvardžiai': u'įvrd', u'jaustukai': u'jst', u'jungtukai': u'jng', u'padalyviai': u'pdlv', u'prielinksniai': u'prl', u'priesagos': u'prs', u'priešdėliai': u'prd', u'prieveiksmiai': u'prv', u'pusdalyviai': u'psdlv', u'raidės': u'rad', u'santrumpos': u'sant', u'siekiniai': u'siek', u'simboliai': u'smb', u'skaitmenys': u'skm', u'skaitvardžiai': u'skt', u'šaknys': u'škn', u'žodžių junginiai': u'jung', u'veiksmažodžiai': u'vks', } try: # Load the page text = page.get() except wikipedia.NoPage: wikipedia.output(u"Page %s does not exist; skipping." % page.aslink()) return except wikipedia.IsRedirectPage: wikipedia.output(u"Page %s is a redirect; skipping." % page.aslink()) return _fnf = os.path.join('', "Praleisti zodiai.txt") #if not file_exists(_fnf): f = codecs.open(_fnf, "w", "utf-8") sabl = re.compile(u'\{\{trans\-top\|(?P<kdal>[^\}]*?)\}\}.*?\{\{trans\-bottom\}\}', re.S) lent = 0 acceptall = False match = sabl.search(text, lent) while match is not None: lent = match.end() kdal = match.group('kdal') if kdal is not None and kdal in kkodas: kdal = kkodas[kdal] else: match = sabl.search(text, lent) continue ## try: ## wikipedia.output(u'Vertimai: %s, %s, %s\r\n%s' % (page.titleWithoutNamespace(), ## self.kdal, ## kdalis[self.kdal], ## text[match.start():match.end()])) ## except: ## wikipedia.output(u'\03{lightpurple}<<<Error>>>\03{default}') sablw = re.compile(u'\{\{t(?:\+|\ø)\|(?P<klb>[^\|]*?)\|(?P<zod>[^\|]*?)(?:\|(?P<parms>[^\}]*?))?\}\}', re.M) vert = text[match.start():match.end()] dt = datetime.today() siandien = dt.strftime(u"|%Y|%m|%d") nuo = 0 matchw = sablw.search(vert, nuo) while matchw is not None: nuo = matchw.end() klb = matchw.group('klb') zod = matchw.group('zod') parms = matchw.group('parms') sc = '' tr = '' kt = '' pst = '' xs = '' alt = '' if parms is None: parms = '' parmssar = parms.split('|') parmssar2 = [] dedktdgs = False svdktdgs = False for parm in parmssar: if parm.startswith("sc="): sc = parm[3:] elif parm.startswith("tr="): tr = parm[3:] elif parm.startswith("kt="): kt = parm[3:] elif parm.startswith("xs="): xs = parm[3:] elif parm.startswith("pst="): pst = parm[4:] elif parm.startswith("alt="): alt = parm[4:] elif parm == 'd' and klb + kdal == 'dedkt': dedktdgs = True elif parm == 'd' and klb + kdal == 'svdkt': dedktdgs = True elif parm == '': continue else: parmssar2 += [ parm ] psts = [] if pst != '': psts = pst.split(';') else: matchw = sablw.search(vert, nuo) continue parms2 = '' for parm in parmssar2: parms2 += ' {{' + parm + '}}' zod2 = '' if sc == '': zod2 = u"'''[[" + zod + u"]]'''" + parms2 else: zod2 = u"'''{{" + sc + u"|[[" + zod + u"]]}}'''" + parms2 if tr != '': tr = u'=== Tarimas ===\n' + \ u'\n' + \ u"*'''Transliteracija:'''\n" + \ u'** ' + tr + u'\n' + \ u'\n' form = '' if klb + kdal == 'dedkt': if dedktdgs: form = '|vnsvard=-|vnskilm=-|vnsnaud=-|vnsgal=-|dgsvard='+zod+'|dgskilm=-|dgsnaud=-|dgsgal=-' else: form = '|vnsvard='+zod+'|vnskilm=-|vnsnaud=-|vnsgal=-|dgsvard=-|dgskilm=-|dgsnaud=-|dgsgal=-' elif klb + kdal == 'svdkt': if svdktdgs: form = '|-|-|'+zod+'|-|-|-|-|-' else: form = '|'+zod+'|-|-|-|-|-|-|-' kat = '' if kt != '': kat = '|kt=' + kt txta = u'== {{' + klb + u'v}} ==\n' + \ tr txt = u"=== ''" + kdalis[kdal] + u"'' ===\n" + \ u'{{' + klb + kdal + form + u'}}\n' + \ zod2 + u'\n' + \ u'\n' for vrt in psts: vrtt = vrt.strip() vrtt = vrtt.replace(':', u'|') txt += u'# {{t+|lt|' + vrtt + kat + u'|from=' + klb + u'}}\n' txt += u'\n' + \ u'==== Etimologija ====\n' + \ u'{{etimologija-stub|' + klb + u'}}\n' + \ u'\n' + \ u'{{bot-entry|' + klb + u'|' + zod + siandien + u'|' + config.usernames['wiktionary']['lt'] + u'}}\n' + \ u'\n' + \ u'<br clear=all>\n' + \ u'----\n' ## try: ## wikipedia.output(u'Žodis: %s, %s, tr=%s\r\n%s' % (klb, zod2, tr, txt)) ## except: ## wikipedia.output(u'\03{lightpurple}<<<Error>>>\03{default}') mysite = wikipedia.getSite() pagew = wikipedia.Page(mysite, zod) old = '' if pagew.exists(): old = pagew.get(get_redirect=True) interwiki = wikipedia.interwikiFormat(wikipedia.getLanguageLinks(old)) cat = wikipedia.categoryFormat(wikipedia.getCategoryLinks(old,mysite)) new = wikipedia.removeLanguageLinks(old) new = wikipedia.removeCategoryLinks(new,mysite) sablo = re.compile(u'^\=\= \{\{' + klb + u'v\}\} \=\=$', re.M) matcho = sablo.search(new, 0) if matcho is None: new += u'\n' + txta + txt if cat != '': new += u'\n' + cat if interwiki != '': new += u'\n' + interwiki else: sablok = re.compile(ur'^\{\{' + klb + kdal + ur'(?:\||\})', re.M) matchok = sablok.search(new, 0) if matchok is None: sablon = re.compile(u'^\=\= \{\{.*?v\}\} \=\=$', re.M) matchon = sablo.search(new, matcho.end()) if matchon is None: new += u'\n' + txt if cat != '': new += u'\n' + cat if interwiki != '': new += u'\n' + interwiki else: new = new[:matchon.start()] + u'\n' + txt + new[matchon.end():] if cat != '': new += u'\n' + cat if interwiki != '': new += u'\n' + interwiki else: try: wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % pagew.title()) except: wikipedia.output(u'\03{lightred}<<<Error>>>\03{default}') wikipedia.output(u'Skipping „%s -> %s (%s)“ because of exists' % (pagew.title(), pst, kdal)) f.write(u'# [[%s]] -> %s (%s)\r\n' % (pagew.title(), pst, kdal)) matchw = sablw.search(vert, nuo) continue else: new = txta + txt #return ################################################################ # NOTE: Here you can modify the text in whatever way you want. # ################################################################ # If you find out that you do not want to edit this page, just return. # Example: This puts the text 'Test' at the beginning of the page. #text = 'Test ' + text # only save if something was changed if new != old: # Show the title of the page we're working on. # Highlight the title in purple. if not self.debug: while True: try: wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % pagew.title()) except: wikipedia.output(u'\03{lightred}<<<Error>>>\03{default}') # show what was changed try: wikipedia.showDiff(old, new) except: wikipedia.output(u'\03{lightred}<<<Error>>>\03{default}') if not acceptall: choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'Edit', 'All'], ['y', 'N', 'e', 'a'], 'N') else: choice == 'y' if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(new) # if user didn't press Cancel if as_edited and as_edited != new: new = as_edited continue if choice == 'a': acceptall = True choice = 'y' if choice == 'y': try: # Save the page pagew.put(new) except wikipedia.LockedPage: wikipedia.output(u"Page %s is locked; skipping." % pagew.aslink()) except wikipedia.EditConflict: wikipedia.output(u'Skipping %s because of edit conflict' % (pagew.title())) except wikipedia.SpamfilterError, error: wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (pagew.title(), error.url)) break matchw = sablw.search(vert, nuo) match = sabl.search(text, lent) def main(*args): # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() # The generator gives the pages that should be worked upon. gen = None # This temporary array is used to read the page title if one single # page to work on is specified by the arguments. pageTitleParts = [] # If debug is True, doesn't do any real changes, but only show # what would have been changed. debug = False # Parse command line arguments for arg in wikipedia.handleArgs(*args): if arg.startswith("-debug"): debug = True else: # check if a standard argument like # -start:XYZ or -ref:Asdf was given. if not genFactory.handleArg(arg): pageTitleParts.append(arg) if pageTitleParts != []: # We will only work on a single page. pageTitle = ' '.join(pageTitleParts) page = wikipedia.Page(wikipedia.getSite(), pageTitle) gen = iter([page]) if not gen: gen = genFactory.getCombinedGenerator() if gen: # The preloading generator is responsible for downloading multiple # pages from the wiki simultaneously. gen = pagegenerators.PreloadingGenerator(gen) bot = SarasasVertimai(gen, debug) bot.run() else: wikipedia.showHelp() if __name__ == "__main__": try: main() finally: wikipedia.stopme()