Naudotojas:Vpovilaitis/SarasasVertimai.py

Puslapis iš Vikižodyno, laisvojo žodyno.
#!/usr/bin/python
# -*- coding: utf-8  -*-
"""
This is not a complete bot; rather, it is a template from which simple
bots can be made. You can rename it to mybot.py, then edit it in
whatever way you want.

The following parameters are supported:

&params;

    -debug         If given, doesn't do any real changes, but only shows
                   what would have been changed.

All other parameters will be regarded as part of the title of a single page,
and the bot will only work on that single page.
"""
__version__ = '$Id: basic.py 7015 2009-07-03 20:28:49Z alexsh $'
import os, codecs, re, time
from datetime import datetime, timedelta
import wikipedia, config
import editarticle
import pagegenerators
import config

# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
    '&params;': pagegenerators.parameterHelp
}

class SarasasVertimai:
    # Edit summary message that should be used.
    # NOTE: Put a good description here, and add translations, if possible!
    msg = {
        'ar': u'روبوت: تغيير ...',
        'cs': u'Robot změnil ...',
        'de': u'Bot: Ändere ...',
        'en': u'Robot: Changing ...',
        'fr': u'Robot: Changé ...',
        'ja':u'ロボットによる:編集',
        'ksh': u'Bot: Ännern ...',
        'nds': u'Bot: Änderung ...',
        'nl': u'Bot: wijziging ...',
        'pt': u'Bot: alterando...',
        'sv': u'Bot: Ändrar ...',
        'zh': u'機器人:編輯.....',
        'lt': u'Bot: Vertimų generavimas',
    }

    def __init__(self, generator, debug):
        """
        Constructor. Parameters:
            * generator - The page generator that determines on which pages
                          to work on.
            * debug     - If True, doesn't do any real changes, but only shows
                          what would have been changed.
        """
        self.generator = generator
        self.debug = debug

    def run(self):
        # Set the edit summary message
        wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), self.msg))
        for page in self.generator:
            self.treat(page)

    def treat(self, page):
        """
        Loads the given page, does some changes, and saves it.
        """
        kdalis = {
                u'bdv': u'Būdvardis',
                u'bdn': u'Būdinys',
                u'dkt': u'Daiktavardis',
                u'dll': u'Dalelytė',
                u'dlv': u'Dalyvis',
                u'gln': u'Galūnė',
                u'įvrd': u'Įvardis',
                u'jst': u'Jaustukas',
                u'jng': u'Jungtukas',
                u'pdlv': u'Padalyvis',
                u'prl': u'Prielinksnis',
                u'prs': u'Priesaga',
                u'prd': u'Priešdėlis',
                u'prv': u'Prieveiksmis',
                u'psdlv': u'Pusdalyvis',
                u'rad': u'Raidė',
                u'sant': u'Santrumpa',
                u'siek': u'Siekinys',
                u'smb': u'Simbolis',
                u'skm': u'Skaitmuo',
                u'skt': u'Skaitvardis',
                u'škn': u'Šaknis',
                u'jung': u'Žodžių junginys',
                u'vks': u'Veiksmažodis',
            }
        kkodas = {
                u'būdvardžiai': u'bdv',
                u'būdiniai': u'bdn',
                u'daiktavardžiai': u'dkt',
                u'dalelytės': u'dll',
                u'dalyviai': u'dlv',
                u'galūnės': u'gln',
                u'įvardžiai': u'įvrd',
                u'jaustukai': u'jst',
                u'jungtukai': u'jng',
                u'padalyviai': u'pdlv',
                u'prielinksniai': u'prl',
                u'priesagos': u'prs',
                u'priešdėliai': u'prd',
                u'prieveiksmiai': u'prv',
                u'pusdalyviai': u'psdlv',
                u'raidės': u'rad',
                u'santrumpos': u'sant',
                u'siekiniai': u'siek',
                u'simboliai': u'smb',
                u'skaitmenys': u'skm',
                u'skaitvardžiai': u'skt',
                u'šaknys': u'škn',
                u'žodžių junginiai': u'jung',
                u'veiksmažodžiai': u'vks',
            }
       
        try:
            # Load the page
            text = page.get()
        except wikipedia.NoPage:
            wikipedia.output(u"Page %s does not exist; skipping." % page.aslink())
            return
        except wikipedia.IsRedirectPage:
            wikipedia.output(u"Page %s is a redirect; skipping." % page.aslink())
            return

        _fnf = os.path.join('', "Praleisti zodiai.txt")
        #if not file_exists(_fnf):
        f = codecs.open(_fnf, "w", "utf-8")
        sabl = re.compile(u'\{\{trans\-top\|(?P<kdal>[^\}]*?)\}\}.*?\{\{trans\-bottom\}\}', re.S)
        lent = 0
        acceptall = False
        match = sabl.search(text, lent)
        while match is not None:
            lent = match.end()
            kdal = match.group('kdal')
            if kdal is not None and kdal in kkodas:
                kdal = kkodas[kdal]
            else:
                match = sabl.search(text, lent)
                continue
           
##            try:
##                wikipedia.output(u'Vertimai: %s, %s, %s\r\n%s' % (page.titleWithoutNamespace(),
##                                                                  self.kdal,
##                                                                  kdalis[self.kdal],
##                                                                  text[match.start():match.end()]))
##            except:
##                wikipedia.output(u'\03{lightpurple}<<<Error>>>\03{default}')
            sablw = re.compile(u'\{\{t(?:\+|\ø)\|(?P<klb>[^\|]*?)\|(?P<zod>[^\|]*?)(?:\|(?P<parms>[^\}]*?))?\}\}', re.M)
            vert = text[match.start():match.end()]
            dt = datetime.today()
            siandien = dt.strftime(u"|%Y|%m|%d")
            nuo = 0
            matchw = sablw.search(vert, nuo)
            while matchw is not None:
                nuo = matchw.end()
                klb = matchw.group('klb')
                zod = matchw.group('zod')
                parms = matchw.group('parms')
                sc = ''
                tr = ''
                kt = ''
                pst = ''
                xs = ''
                alt = ''
                if parms is None:
                    parms = ''
                parmssar = parms.split('|')
                parmssar2 = []
                dedktdgs = False
                svdktdgs = False
                for parm in parmssar:
                    if parm.startswith("sc="):
                        sc = parm[3:]
                    elif parm.startswith("tr="):
                        tr = parm[3:]
                    elif parm.startswith("kt="):
                        kt = parm[3:]
                    elif parm.startswith("xs="):
                        xs = parm[3:]
                    elif parm.startswith("pst="):
                        pst = parm[4:]
                    elif parm.startswith("alt="):
                        alt = parm[4:]
                    elif parm == 'd' and klb + kdal == 'dedkt':
                        dedktdgs = True
                    elif parm == 'd' and klb + kdal == 'svdkt':
                        dedktdgs = True
                    elif parm == '':
                        continue
                    else:
                        parmssar2 += [ parm ]
                psts = []
                if pst != '':
                    psts = pst.split(';')
                else:
                    matchw = sablw.search(vert, nuo)
                    continue
                parms2 = ''
                for parm in parmssar2:
                    parms2 += ' {{' + parm + '}}'
                zod2 = ''
                if sc == '':
                    zod2 = u"'''[[" + zod + u"]]'''" + parms2
                else:
                    zod2 = u"'''{{" + sc + u"|[[" + zod + u"]]}}'''" + parms2
                if tr != '':
                    tr = u'=== Tarimas ===\n' + \
                         u'\n' + \
                         u"*'''Transliteracija:'''\n" + \
                         u'** ' + tr + u'\n' + \
                         u'\n'
                form = ''
                if klb + kdal == 'dedkt':
                    if dedktdgs:
                        form = '|vnsvard=-|vnskilm=-|vnsnaud=-|vnsgal=-|dgsvard='+zod+'|dgskilm=-|dgsnaud=-|dgsgal=-'
                    else:
                        form = '|vnsvard='+zod+'|vnskilm=-|vnsnaud=-|vnsgal=-|dgsvard=-|dgskilm=-|dgsnaud=-|dgsgal=-'
                elif klb + kdal == 'svdkt':
                    if svdktdgs:
                        form = '|-|-|'+zod+'|-|-|-|-|-'
                    else:
                        form = '|'+zod+'|-|-|-|-|-|-|-'
                kat = ''
                if kt != '':
                    kat = '|kt=' + kt
                txta = u'== {{' + klb + u'v}} ==\n' + \
                      tr
                txt = u"=== ''" + kdalis[kdal] + u"'' ===\n" + \
                      u'{{' + klb + kdal + form + u'}}\n' + \
                      zod2 + u'\n' + \
                      u'\n'
                for vrt in psts:
                    vrtt = vrt.strip()
                    vrtt = vrtt.replace(':', u'|')
                    txt += u'# {{t+|lt|' + vrtt + kat + u'|from=' + klb + u'}}\n'
                txt += u'\n' + \
                      u'==== Etimologija ====\n' + \
                      u'{{etimologija-stub|' + klb + u'}}\n' + \
                      u'\n' + \
                      u'{{bot-entry|' + klb + u'|' + zod + siandien + u'|' + config.usernames['wiktionary']['lt'] + u'}}\n' + \
                      u'\n' + \
                      u'<br clear=all>\n' + \
                      u'----\n'
##                try:
##                    wikipedia.output(u'Žodis: %s, %s, tr=%s\r\n%s' % (klb, zod2, tr, txt))
##                except:
##                    wikipedia.output(u'\03{lightpurple}<<<Error>>>\03{default}')
                mysite = wikipedia.getSite()
                pagew = wikipedia.Page(mysite, zod)
                old = ''
                if pagew.exists():
                    old = pagew.get(get_redirect=True)
                    interwiki = wikipedia.interwikiFormat(wikipedia.getLanguageLinks(old))
                    cat = wikipedia.categoryFormat(wikipedia.getCategoryLinks(old,mysite))
                    new = wikipedia.removeLanguageLinks(old)
                    new = wikipedia.removeCategoryLinks(new,mysite)
                    sablo = re.compile(u'^\=\= \{\{' + klb + u'v\}\} \=\=$', re.M)
                    matcho = sablo.search(new, 0)
                    if matcho is None:
                        new += u'\n' + txta + txt
                        if cat != '':
                            new += u'\n' + cat
                        if interwiki != '':
                            new += u'\n' + interwiki
                    else:
                        sablok = re.compile(ur'^\{\{' + klb + kdal + ur'(?:\||\})', re.M)
                        matchok = sablok.search(new, 0)
                        if matchok is None:
                            sablon = re.compile(u'^\=\= \{\{.*?v\}\} \=\=$', re.M)
                            matchon = sablo.search(new, matcho.end())
                            if matchon is None:
                                new += u'\n' + txt
                                if cat != '':
                                    new += u'\n' + cat
                                if interwiki != '':
                                    new += u'\n' + interwiki
                            else:
                                new = new[:matchon.start()] + u'\n' + txt + new[matchon.end():]
                                if cat != '':
                                    new += u'\n' + cat
                                if interwiki != '':
                                    new += u'\n' + interwiki
                        else:
                            try:
                                wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % pagew.title())
                            except:
                                wikipedia.output(u'\03{lightred}<<<Error>>>\03{default}')
                            wikipedia.output(u'Skipping „%s -> %s (%s)“ because of exists' % (pagew.title(), pst, kdal))
                            f.write(u'# [[%s]] -> %s (%s)\r\n' % (pagew.title(), pst, kdal))
                            matchw = sablw.search(vert, nuo)
                            continue
                else:
                    new = txta + txt
               
                #return

                ################################################################
                # NOTE: Here you can modify the text in whatever way you want. #
                ################################################################

                # If you find out that you do not want to edit this page, just return.
                # Example: This puts the text 'Test' at the beginning of the page.
                #text = 'Test ' + text

                # only save if something was changed
                if new != old:
                    # Show the title of the page we're working on.
                    # Highlight the title in purple.
                    if not self.debug:
                        while True:
                            try:
                                wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % pagew.title())
                            except:
                                wikipedia.output(u'\03{lightred}<<<Error>>>\03{default}')
                            # show what was changed
                            try:
                                wikipedia.showDiff(old, new)
                            except:
                                wikipedia.output(u'\03{lightred}<<<Error>>>\03{default}')
                            if not acceptall:
                                choice = wikipedia.inputChoice(u'Do you want to accept these changes?', ['Yes', 'No', 'Edit', 'All'], ['y', 'N', 'e', 'a'], 'N')
                            else:
                                choice == 'y'
                            if choice == 'e':
                                editor = editarticle.TextEditor()
                                as_edited = editor.edit(new)
                                # if user didn't press Cancel
                                if as_edited and as_edited != new:
                                    new = as_edited
                                continue
                            if choice == 'a':
                                acceptall = True
                                choice = 'y'
                            if choice == 'y':
                                try:
                                    # Save the page
                                    pagew.put(new)
                                except wikipedia.LockedPage:
                                    wikipedia.output(u"Page %s is locked; skipping." % pagew.aslink())
                                except wikipedia.EditConflict:
                                    wikipedia.output(u'Skipping %s because of edit conflict' % (pagew.title()))
                                except wikipedia.SpamfilterError, error:
                                    wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (pagew.title(), error.url))
                            break

                matchw = sablw.search(vert, nuo)
                       
            match = sabl.search(text, lent)

def main(*args):
    # This factory is responsible for processing command line arguments
    # that are also used by other scripts and that determine on which pages
    # to work on.
    genFactory = pagegenerators.GeneratorFactory()
    # The generator gives the pages that should be worked upon.
    gen = None
    # This temporary array is used to read the page title if one single
    # page to work on is specified by the arguments.
    pageTitleParts = []
    # If debug is True, doesn't do any real changes, but only show
    # what would have been changed.
    debug = False

    # Parse command line arguments
    for arg in wikipedia.handleArgs(*args):
        if arg.startswith("-debug"):
            debug = True
        else:
            # check if a standard argument like
            # -start:XYZ or -ref:Asdf was given.
            if not genFactory.handleArg(arg):
                pageTitleParts.append(arg)

    if pageTitleParts != []:
        # We will only work on a single page.
        pageTitle = ' '.join(pageTitleParts)
        page = wikipedia.Page(wikipedia.getSite(), pageTitle)
        gen = iter([page])

    if not gen:
        gen = genFactory.getCombinedGenerator()
    if gen:
        # The preloading generator is responsible for downloading multiple
        # pages from the wiki simultaneously.
        gen = pagegenerators.PreloadingGenerator(gen)
        bot = SarasasVertimai(gen, debug)
        bot.run()
    else:
        wikipedia.showHelp()

if __name__ == "__main__":
    try:
        main()
    finally:
        wikipedia.stopme()