Naudotojas:Vpovilaitis/replacewords.py
Puslapis iš Vikižodyno, laisvojo žodyno.
# -*- coding: utf-8 -*- """ This bot will make direct text replacements. It will retrieve information on which pages might need changes either from an XML dump or a text file, or only change a single page. These command line parameters can be used to specify which pages to work on: ¶ms; -xml Retrieve information from a local XML dump (pages-articles or pages-meta-current, see http://download.wikimedia.org). Argument can also be given as "-xml:filename". -page Only edit a specific page. Argument can also be given as "-page:pagetitle". You can give this parameter multiple times to edit multiple pages. Furthermore, the following command line parameters are supported: -regex Make replacements using regular expressions. If this argument isn't given, the bot will make simple text replacements. -nocase Use case insensitive regular expressions. -dotall Make the dot match any character at all, including a newline. Without this flag, '.' will match anything except a newline. -multiline '^' and '$' will now match begin and end of each line. -xmlstart (Only works with -xml) Skip all articles in the XML dump before the one specified (may also be given as -xmlstart:Article). -addcat:cat_name Adds "cat_name" category to every altered page. -excepttitle:XYZ Skip pages with titles that contain XYZ. If the -regex argument is given, XYZ will be regarded as a regular expression. -requiretitle:XYZ Only do pages with titles that contain XYZ. If the -regex argument is given, XYZ will be regarded as a regular expression. -excepttext:XYZ Skip pages which contain the text XYZ. If the -regex argument is given, XYZ will be regarded as a regular expression. -exceptinside:XYZ Skip occurences of the to-be-replaced text which lie within XYZ. If the -regex argument is given, XYZ will be regarded as a regular expression. -exceptinsidetag:XYZ Skip occurences of the to-be-replaced text which lie within an XYZ tag. -summary:XYZ Set the summary message text for the edit to XYZ, bypassing the predefined message texts with original and replacements inserted. -sleep:123 If you use -fix you can check multiple regex at the same time in every page. This can lead to a great waste of CPU because the bot will check every regex without waiting using all the resources. This will slow it down between a regex and another in order not to waste too much CPU. -fix:XYZ Perform one of the predefined replacements tasks, which are given in the dictionary 'fixes' defined inside the file fixes.py. The -regex and -nocase argument and given replacements will be ignored if you use -fix. Currently available predefined fixes are: &fixes-help; -always Don't prompt you for each replacement -recursive Recurse replacement as long as possible. Be careful, this might lead to an infinite loop. -allowoverlap When occurences of the pattern overlap, replace all of them. Be careful, this might lead to an infinite loop. other: First argument is the old text, second argument is the new text. If the -regex argument is given, the first argument will be regarded as a regular expression, and the second argument might contain expressions like \\1 or \g<name>. Examples: If you want to change templates from the old syntax, e.g. {{msg:Stub}}, to the new syntax, e.g. {{Stub}}, download an XML dump file (pages-articles) from http://download.wikimedia.org, then use this command: python replace.py -xml -regex "{{msg:(.*?)}}" "{{\\1}}" If you have a dump called foobar.xml and want to fix typos in articles, e.g. Errror -> Error, use this: python replace.py -xml:foobar.xml "Errror" "Error" -namespace:0 If you have a page called 'John Doe' and want to fix the format of ISBNs, use: python replace.py -page:John_Doe -fix:isbn This command will change 'referer' to 'referrer', but not in pages which talk about HTTP, where the typo has become part of the standard: python replace.py referer referrer -file:typos.txt -excepttext:HTTP """ # # (C) Daniel Herding & the Pywikipediabot Team, 2004-2008 # # Distributed under the terms of the MIT license. # from __future__ import generators import os, codecs, re, time from string import Template import wikipedia, pagegenerators import editarticle import webbrowser # Imports predefined replacements tasks from fixeswords.py import fixeswords #, nesusije # This is required for the text that is shown when you run this script # with the parameter -help. docuReplacements = { '¶ms;': pagegenerators.parameterHelp, '&fixes-help;': fixeswords.help, } __version__='$Id: replace.py 6412 2009-02-22 16:13:01Z nicdumz $' # Summary messages in different languages # NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes' # below.`v msg = { 'ar': u'%s روبوت : استبدال تلقائي للنص', 'ca': u'Robot: Reemplaçament automàtic de text %s', 'cs': u'Robot automaticky nahradil text: %s', 'de': u'Bot: Automatisierte Textersetzung %s', 'el': u'Ρομπότ: Αυτόματη αντικατάσταση κειμένου %s', 'en': u'Robot: Automated text replacement %s', 'es': u'Robot: Reemplazo automático de texto %s', 'fa': u'ربات: تغییر خودکار متن %s', 'fr': u'Bot : Remplacement de texte automatisé %s', 'he': u'בוט: החלפת טקסט אוטומטית %s', 'hu': u'Robot: Automatikus szövegcsere %s', 'ia': u'Robot: Reimplaciamento automatic de texto %s', 'id': u'Bot: Penggantian teks otomatis %s', 'is': u'Vélmenni: breyti texta %s', 'it': u'Bot: Sostituzione automatica %s', 'ja': u'ロボットによる: 文字置き換え %s', 'ka': u'რობოტი: ტექსტის ავტომატური შეცვლა %s', 'kk': u'Бот: Мәтінді өздікті алмастырды: %s', 'ksh': u'Bot: hät outomatesch Täx jetuusch: %s', 'lt': u'robotas: Automatinis teksto keitimas %s', 'nds': u'Bot: Text automaatsch utwesselt: %s', 'nds-nl': u'Bot: autematisch tekse vervungen %s', 'nl': u'Bot: automatisch tekst vervangen %s', 'nn': u'robot: automatisk teksterstatning: %s', 'no': u'robot: automatisk teksterstatning: %s', 'pl': u'Robot automatycznie zamienia tekst %s', 'pt': u'Bot: Mudança automática %s', 'ru': u'Робот: Автоматизированная замена текста %s', 'sr': u'Бот: Аутоматска замена текста %s', 'sv': u'Bot: Automatisk textersättning: %s', 'zh': u'機器人:執行文字代換作業 %s', } class XmlDumpReplacePageGenerator: """ Iterator that will yield Pages that might contain text to replace. These pages will be retrieved from a local XML dump file. Arguments: * xmlFilename - The dump's path, either absolute or relative * xmlStart - Skip all articles in the dump before this one * replacements - A list of 2-tuples of original text (as a compiled regular expression) and replacement text (as a string). * exceptions - A dictionary which defines when to ignore an occurence. See docu of the ReplaceRobot constructor below. """ def __init__(self, xmlFilename, xmlStart, replacements, exceptions): self.xmlFilename = xmlFilename self.replacements = replacements self.exceptions = exceptions self.xmlStart = xmlStart self.skipping = bool(xmlStart) self.excsInside = [] if 'inside-tags' in self.exceptions: self.excsInside += self.exceptions['inside-tags'] if 'inside' in self.exceptions: self.excsInside += self.exceptions['inside'] import xmlreader self.site = wikipedia.getSite() dump = xmlreader.XmlDump(self.xmlFilename) self.parser = dump.parse() def __iter__(self): try: for entry in self.parser: if self.skipping: if entry.title != self.xmlStart: continue self.skipping = False if not self.isTitleExcepted(entry.title) \ and not self.isTextExcepted(entry.text): new_text = entry.text for old, new in self.replacements: new_text = wikipedia.replaceExcept(new_text, old, new, self.excsInside, self.site) if new_text != entry.text: yield wikipedia.Page(self.site, entry.title) except KeyboardInterrupt: try: if not self.skipping: wikipedia.output( u'To resume, use "-xmlstart:%s" on the command line.' % entry.title) except NameError: pass def isTitleExcepted(self, title): if 'title' in self.exceptions: for exc in self.exceptions['title']: if exc.search(title): return True if 'require-title' in self.exceptions: for req in self.exceptions['require-title']: if not req.search(title): # if not all requirements are met: return True return False def isTextExcepted(self, text): if 'text-contains' in self.exceptions: for exc in self.exceptions['text-contains']: if exc.search(text): return True if 'require-text' in self.exceptions: ret = True for exc in self.exceptions['require-text']: if exc.search(text): return False return ret return False class PageCreateReader: def __init__(self, words): self.words = words def run(self): #wikipedia.output('Beginning \'%s\'...' % self.filesinfo) for page, contents, wordsk, anton in self.words: yield page, contents, wordsk, anton class PageNesusijeReader: def __init__(self, words): self.words = words def run(self): #wikipedia.output('Beginning \'%s\'...' % self.filesinfo) for page, topage in self.words: try: wikipedia.output('Beginning >>> \03{lightpurple}%s\03{default} <<<>>> \03{lightred}%s\03{default} <<<...' % (page, topage)) except: wikipedia.output(u'Except on Output') yield page, topage class PageNesusijeRobot: """ Responsible for writing pages to the wiki, with the titles and contents given by a PageFromFileReader. """ msg = { 'lt': u'Automated creating of articles', } # The following messages are added to topic when the page already exists msg_top = { 'lt': u'append on top', } msg_bottom = { 'lt': u'append on bottom', } msg_force = { 'lt': u'existing text overwritten', } append = '' def __getattr__(self, append): return self.append def __init__(self, reader = False, force = False, append = False, summary = 'Susiejama', minor = False, autosummary = False, debug = False, acceptall=False, acceptallnew=False, quit=False): self.reader = reader self.force = force self.append = append self.summary = summary self.minor = minor self.autosummary = autosummary self.debug = debug self.acceptall = acceptall self.acceptallnew = acceptallnew self.quit = quit def run(self): for word, toword in self.reader.run(): self.put(word, toword) if self.quit: return def put(self, word, toword): mysite = wikipedia.getSite() page = wikipedia.Page(mysite, word) # Show the title of the page we're working on. # Highlight the title in purple. try: wikipedia.output(u">>> \03{lightpurple}%s\03{default} <<<" % page.title()) except: wikipedia.output(u'Except on Output') if self.summary: comment = self.summary else: comment = wikipedia.translate(mysite, self.msg) if page.exists(): original_text = page.get() isnew = True new_text = original_text needchange = True #wikipedia.output(old) temp = re.compile(ur'\{\{see\|(?P<parms>[^\}]*)\}\}', re.MULTILINE) while temp.search(new_text) is not None and needchange: for m in temp.finditer(new_text): text = m.group() isnew = False parms = m.group('parms').strip() oldsee = parms.split('|') see = '{{see' for wd in oldsee: if wd.strip() == toword: needchange = False new_text = original_text break else: see += '|' + wd.strip() if not needchange: break see += '|' + toword + '}}' new_text = new_text.replace(text, see) break if isnew: new_text = '{{see|' + toword + u'}}\n' + original_text while True: if new_text == original_text: try: wikipedia.output('No changes were necessary in >>> \03{lightpurple}%s\03{default} <<< %s >>>' % (page.aslink(), new_text[0:50])) except: wikipedia.output(u'Except on Output') break # Show the title of the page we're working on. # Highlight the title in purple. #wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" # % page.title()) try: wikipedia.showDiff(original_text, new_text) except: wikipedia.output(u'Except on Output') if self.acceptall: break choice = wikipedia.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No', 'Edit', 'open in Browser', 'All', "Quit"], ['y', 'N', 'e', 'b', 'a', 'q'], 'N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(original_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited continue if choice == 'b': webbrowser.open("http://%s%s" % ( page.site().hostname(), page.site().nice_get_address(page.title()) )) wikipedia.input("Press Enter when finished in browser.") original_text = page.get(get_redirect=True, force=True) new_text = original_text continue if choice == 'q': return if choice == 'a': self.acceptall = True if choice == 'y': page.put_async(new_text) # choice must be 'N' break if self.acceptall and new_text != original_text: try: page.put(new_text, comment = comment, minorEdit = self.minor) except wikipedia.LockedPage: wikipedia.output(u"Page %s is locked; skipping." % title) except wikipedia.EditConflict: wikipedia.output(u'Skipping %s because of edit conflict' % title) except wikipedia.SpamfilterError, error: wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (title, error.url)) prot = None class PageCreateRobot: """ Responsible for writing pages to the wiki, with the titles and contents given by a PageFromFileReader. """ msg = { 'lt': u'Automated creating of articles', } # The following messages are added to topic when the page already exists msg_top = { 'lt': u'append on top', } msg_bottom = { 'lt': u'append on bottom', } msg_force = { 'lt': u'existing text overwritten', } append = '' def __getattr__(self, append): return self.append def __init__(self, reader, force, append, summary, minor = False, autosummary = False, debug = False, acceptall=False, acceptallnew=False, quit=False, acceptallnewnotauto=False, prot=None, test=True, fromword = None): self.reader = reader self.force = force self.append = append self.summary = summary self.minor = minor self.autosummary = autosummary self.debug = debug self.acceptall = acceptall self.acceptallnew = acceptallnew self.quit = quit self.acceptallnewnotauto=acceptallnewnotauto self.prot = prot self.test = test self.fromword = fromword self.skip = False if self.fromword is not None: self.skip = True def run(self): for word, contents, wordsk, anton in self.reader.run(): self.put(word, contents['title'], contents['contents'], contents['auto'], contents['reddir'], contents['nenaud']) if self.quit: return None return self.fromword def put(self, word, title, contents, auto, reddir, nenaud): if self.fromword is not None: if word == self.fromword: self.skip = False self.fromword = None if self.skip: return wikipedia.output(u">>> \03{lightpurple}%s\03{default} <<<" % word) #wikipedia.output(u">>> \03{lightpurple}%s\03{default} <<<\n%s" % (word,contents)) if self.test: self.prot.write('# [[%s]]\r\n' % word) return mysite = wikipedia.getSite() page = wikipedia.Page(mysite, word) # Show the title of the page we're working on. # Highlight the title in purple. # wikipedia.output(u">>> \03{lightpurple}%s\03{default} <<<" % page.title()) if self.summary: comment = self.summary else: comment = wikipedia.translate(mysite, self.msg) comment_top = comment + " - " + wikipedia.translate(mysite, self.msg_top) comment_bottom = comment + " - " + wikipedia.translate(mysite, self.msg_bottom) comment_force = comment + " *** " + wikipedia.translate(mysite, self.msg_force) + " ***" rezfile = u'Zodziai/' if page.exists(): old = page.get(get_redirect=True) try: wikipedia.output(old) except: wikipedia.output(u'Except on Output') if page.isRedirectPage(): _fnf = os.path.join('', rezfile+word+'.txt') wprot = codecs.open(_fnf, "a+", "utf-8") wprot.write('%s' % title + '\n' + contents) return ## if self.test: ## self.prot.write('# [[%s]] - egzistavo, reikia patikrinti\r\n' % word) ## return interwiki = wikipedia.interwikiFormat(wikipedia.getLanguageLinks(old)) #wikipedia.output('Interwiki: %s\n' % interwiki) cat = wikipedia.categoryFormat(wikipedia.getCategoryLinks(old,mysite)) #wikipedia.output('Kategorijos: %s\n' % cat) txt = wikipedia.removeLanguageLinks(old) txt = wikipedia.removeCategoryLinks(txt,mysite) #wikipedia.output('text: %s\n' % txt) #wikipedia.output('text: %s\n' % (txt + '\n' + contents + '\n' + cat + '\n' + interwiki)) if self.append == None: self.append = wikipedia.inputChoice( u'Page <<%s>> already exists. What do?\n' % word, ['Top', 'Bottom', 'Change', 'Skip'], ['t', 'b', 'c', 's'], 's') if self.append == "t": wikipedia.output(u"Page %s already exists, appending on top!" % word) contents = title + '\n' + contents + '\n' + txt + '\n' if self.acceptallnewnotauto or self.force: #contents += '{{patikrinti}}\n' pass contents += cat + '\n' + interwiki comment = comment_top elif self.append == "b": wikipedia.output(u"Page %s already exists, appending on bottom!" % word) contents = txt + '\n' + contents + '\n' if self.acceptallnewnotauto or self.force: #contents += '{{patikrinti}}\n' pass contents += cat + '\n' + interwiki comment = comment_bottom elif self.append == "c": wikipedia.output(u"Page %s already exists, ***overwriting!" % word) contents = title + '\n' + contents + '\n' if self.acceptallnewnotauto or self.force: contents += '{{patikrinti}}\n' pass contents += cat + '\n' + interwiki comment = comment_force else: contents = old if not self.acceptall or not self.force or not auto: choice = None if not auto: if self.acceptallnewnotauto and not self.force: choice = 'y' elif self.force: choice = 'y' else: choice = 'e' else: if self.force: choice = 'y' else: choice = wikipedia.inputChoice( u'\nDo you want to accept these changes?\n', ['Yes', 'No', 'Edit', 'Browser', 'All', 'AllOld', 'Skip', 'Change', "Quit"], ['y', 'N', 'e', 'b', 'a', 'ao', 's', 'c', 'q'], 'N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(contents) # if user didn't press Cancel if as_edited and as_edited != contents: contents = as_edited elif choice == 'b': webbrowser.open("http://%s%s" % ( page.site().hostname(), page.site().nice_get_address(page.title()) )) wikipedia.input("Press Enter when finished in browser.") old = page.get(get_redirect=True, force=True) contents = old elif choice == 'a': self.acceptall = True elif choice == 'ao': self.acceptall = True elif choice == 's': self.append = 's' wikipedia.output(u"Page %s already exists, not adding!" % word) return elif choice == 'c': self.append = None elif choice == 'q': wikipedia.output(u"Page %s already exists, not adding!" % word) self.quit = True contents = old return elif choice != 'y': wikipedia.output(u"Page %s already exists, not adding!" % word) contents = old return if choice == 'c': choice = wikipedia.inputChoice( u'\nDo you want to accept these changes?\n', ['Yes', 'No', 'Edit', 'Browser', 'All', 'AllOld', 'Skip', "Quit"], ['y', 'N', 'e', 'b', 'a', 'ao', 's', 'q'], 'N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(contents) # if user didn't press Cancel if as_edited and as_edited != contents: contents = as_edited elif choice == 'a': self.acceptall = True elif choice == 'ao': self.acceptall = True elif choice == 'b': webbrowser.open("http://%s%s" % ( page.site().hostname(), page.site().nice_get_address(page.title()) )) wikipedia.input("Press Enter when finished in browser.") old = page.get(get_redirect=True, force=True) contents = old elif choice == 's': wikipedia.output(u"Not adding!") self.append = 's' return elif choice == 'q': wikipedia.output(u"Not adding!") self.quit = True return elif choice != 'y': wikipedia.output(u"Not adding!") return if old == contents: wikipedia.output(u"Page %s already exists, not adding!" % word) return else: ## if self.test: auto, reddir, nenaud ## return if nenaud and auto: _fnf = os.path.join('', rezfile+word+'.txt') wprot = codecs.open(_fnf, "a+", "utf-8") wprot.write('%s' % title + '\n' + contents) contents = reddir else: contents = title + '\n' + contents if not self.acceptallnew or not auto: choice = None if not auto: if self.acceptallnewnotauto: contents = contents #+ '{{patikrinti}}\n' choice = 'y' else: choice = 'e' else: choice = wikipedia.inputChoice( u'\nDo you want to accept these changes?\n', ['Yes', 'No', 'Edit', 'All', 'AllNew', 'Skip', 'Change', "Quit"], ['y', 'N', 'e', 'a', 'an', 's', 'c', 'q'], 'N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(contents) # if user didn't press Cancel if as_edited and as_edited != contents: contents = as_edited elif choice == 'a': self.acceptallnew = True elif choice == 'an': self.acceptallnew = True elif choice == 's': wikipedia.output(u"Not adding!") self.append = 's' return elif choice == 'c': self.append = None elif choice == 'q': wikipedia.output(u"Not adding!") self.quit = True return elif choice != 'y': wikipedia.output(u"Not adding!") return if choice == 'c': choice = wikipedia.inputChoice( u'\nDo you want to accept these changes?\n', ['Yes', 'No', 'Edit', 'All', 'AllNew', 'Skip', "Quit"], ['y', 'N', 'e', 'a', 'an', 's', 'q'], 'N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(contents) # if user didn't press Cancel if as_edited and as_edited != contents: contents = as_edited elif choice == 'a': self.acceptallnew = True elif choice == 'an': self.acceptallnew = True elif choice == 's': wikipedia.output(u"Not adding!") self.append = 's' return elif choice == 'q': wikipedia.output(u"Not adding!") self.quit = True return elif choice != 'y': wikipedia.output(u"Not adding!") return if self.autosummary: comment = '' wikipedia.setAction('') # Remove trailing newlines (cause troubles when creating redirects) contents = re.sub('^[\r\n]*','', contents) if self.debug: wikipedia.output("*** Debug mode ***\n" + \ "\03{lightpurple}word\03{default}: " + word + "\n" + \ "\03{lightpurple}contents\03{default}:\n" + contents + "\n" \ "\03{lightpurple}comment\03{default}: " + comment + "\n") return try: page.put(contents, comment = comment, minorEdit = self.minor) except wikipedia.LockedPage: wikipedia.output(u"Page %s is locked; skipping." % title) except wikipedia.EditConflict: wikipedia.output(u'Skipping %s because of edit conflict' % title) except wikipedia.SpamfilterError, error: wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (title, error.url)) class ReplaceRobot: """ A bot that can do text replacements. """ def __init__(self, generator, replacements, exceptions={}, acceptall=False, allowoverlap=False, recursive=False, addedCat=None, sleep=None): """ Arguments: * generator - A generator that yields Page objects. * replacements - A list of 2-tuples of original text (as a compiled regular expression) and replacement text (as a string). * exceptions - A dictionary which defines when not to change an occurence. See below. * acceptall - If True, the user won't be prompted before changes are made. * allowoverlap - If True, when matches overlap, all of them are replaced. * addedCat - If set to a value, add this category to every page touched. Structure of the exceptions dictionary: This dictionary can have these keys: title A list of regular expressions. All pages with titles that are matched by one of these regular expressions are skipped. text-contains A list of regular expressions. All pages with text that contains a part which is matched by one of these regular expressions are skipped. inside A list of regular expressions. All occurences are skipped which lie within a text region which is matched by one of these regular expressions. inside-tags A list of strings. These strings must be keys from the exceptionRegexes dictionary in wikipedia.replaceExcept(). """ self.generator = generator self.replacements = replacements self.exceptions = exceptions self.acceptall = acceptall self.allowoverlap = allowoverlap self.recursive = recursive if addedCat: site = wikipedia.getSite() cat_ns = site.category_namespaces()[0] self.addedCat = wikipedia.Page(site, cat_ns + ':' + addedCat) self.sleep = sleep #wikipedia.output(u'fix gen = %s' % self.replacements) def isTitleExcepted(self, title): """ Iff one of the exceptions applies for the given title, returns True. """ if 'title' in self.exceptions: for exc in self.exceptions['title']: if exc.search(title): return True if 'require-title' in self.exceptions: for req in self.exceptions['require-title']: if not req.search(title): return True return False def isTextExcepted(self, original_text): """ Iff one of the exceptions applies for the given page contents, returns True. """ if 'text-contains' in self.exceptions: for exc in self.exceptions['text-contains']: if exc.search(original_text): return True if 'require-title' in self.exceptions: for req in self.exceptions['require-title']: if not req.search(title): # if not all requirements are met: return True if 'require-text' in self.exceptions: ret = True for exc in self.exceptions['require-text']: if exc.search(original_text): return False return ret return False def doReplacements(self, original_text): """ Returns the text which is generated by applying all replacements to the given text. """ new_text = original_text exceptions = [] if 'inside-tags' in self.exceptions: exceptions += self.exceptions['inside-tags'] if 'inside' in self.exceptions: exceptions += self.exceptions['inside'] for old, new in self.replacements: if self.sleep != None: time.sleep(self.sleep) new_text = wikipedia.replaceExcept(new_text, old, new, exceptions, allowoverlap=self.allowoverlap) return new_text def run(self): """ Starts the robot. """ # Run the generator which will yield Pages which might need to be # changed. #wikipedia.output(u'fix gen = %s' % self.generator) #return for page in self.generator: wikipedia.output( u'File %s .' % page.aslink()) if self.isTitleExcepted(page.title()): wikipedia.output( u'Skipping %s because the title is on the exceptions list.' % page.aslink()) continue try: # Load the page's text from the wiki original_text = page.get(get_redirect=True) if not page.canBeEdited(): wikipedia.output(u"You can't edit page %s" % page.aslink()) continue except wikipedia.NoPage: wikipedia.output(u'Page %s not found' % page.aslink()) continue new_text = original_text while True: if self.isTextExcepted(new_text): wikipedia.output( u'Skipping %s because it contains text that is on the exceptions list.' % page.aslink()) break new_text = self.doReplacements(new_text) if new_text == original_text: wikipedia.output('No changes were necessary in %s' % page.aslink()) break if self.recursive: newest_text = self.doReplacements(new_text) while newest_text!=new_text: new_text = newest_text newest_text = self.doReplacements(new_text) if hasattr(self, "addedCat"): cats = page.categories(nofollow_redirects=True) if self.addedCat not in cats: cats.append(self.addedCat) new_text = wikipedia.replaceCategoryLinks(new_text, cats) # Show the title of the page we're working on. # Highlight the title in purple. wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<" % page.title()) wikipedia.showDiff(original_text, new_text) if self.acceptall: break choice = wikipedia.inputChoice( u'Do you want to accept these changes?', ['Yes', 'No', 'Edit', 'open in Browser', 'All', "Quit"], ['y', 'N', 'e', 'b', 'a', 'q'], 'N') if choice == 'e': editor = editarticle.TextEditor() as_edited = editor.edit(original_text) # if user didn't press Cancel if as_edited and as_edited != new_text: new_text = as_edited continue if choice == 'b': webbrowser.open("http://%s%s" % ( page.site().hostname(), page.site().nice_get_address(page.title()) )) wikipedia.input("Press Enter when finished in browser.") original_text = page.get(get_redirect=True, force=True) new_text = original_text continue if choice == 'q': return if choice == 'a': self.acceptall = True if choice == 'y': page.put_async(new_text) # choice must be 'N' break if self.acceptall and new_text != original_text: try: page.put(new_text) except wikipedia.EditConflict: wikipedia.output(u'Skipping %s because of edit conflict' % (page.title(),)) except wikipedia.SpamfilterError, e: wikipedia.output( u'Cannot change %s because of blacklist entry %s' % (page.title(), e.url)) except wikipedia.PageNotSaved, error: wikipedia.output(u'Error putting page: %s' % (error.args,)) except wikipedia.LockedPage: wikipedia.output(u'Skipping %s (locked page)' % (page.title(),)) def prepareRegexForMySQL(pattern): pattern = pattern.replace('\s', '[:space:]') pattern = pattern.replace('\d', '[:digit:]') pattern = pattern.replace('\w', '[:alnum:]') pattern = pattern.replace("'", "\\" + "'") #pattern = pattern.replace('\\', '\\\\') #for char in ['[', ']', "'"]: # pattern = pattern.replace(char, '\%s' % char) return pattern def noskiem(word): return word.replace('~', '') def skiem(word): return word.replace('~~', '-').replace('~','') def cikleWords (wordsList, force, append, acceptallnew=False, acceptallnewnotauto=False, acceptallgroups=False, prot=None, test=True, fromword = None, zodziai={}): acceptallgroup = acceptallgroups for word in wordsList: #wikipedia.output(u'word <<< %s >>>.' % word) wordform = fixeswords.wordforms[word['dalis']][word['form']] eti = u'' if word['fs0'] != u'': t1 = Template(wordform[word['fs0']]) eti = t1.substitute(s1 = noskiem(word['s1']), s2 = noskiem(word['s2']), s3 = noskiem(word['s3']), s4 = noskiem(word['s4']), s5 = noskiem(word['s5']), fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']) ## if bot.append == 's': ## bot.append = None subwordsList = [] for word2 in wordform['wordsList']: #wikipedia.output(u'word2 <<< %s >>>.' % word2) #wikipedia.output(u'word <<< %s >>>.' % word) reiksmesimas = '' reiksmesejas = '' reiksmeseja = '' reiksmesojas = '' reiksmesoja = '' if u'reiksmesimas' in word: reiksmesimas = word[u'reiksmesimas'] if u'reiksmesejas' in word: reiksmesejas = word[u'reiksmesejas'] if u'reiksmeseja' in word: reiksmeseja = word[u'reiksmeseja'] if u'reiksmesojas' in word: reiksmesojas = word[u'reiksmesojas'] if u'reiksmesoja' in word: reiksmesoja = word[u'reiksmesoja'] lg1 = Template(word2['g1']) lg1 = lg1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']) ls1 = Template(word2['s1']) ls1 = ls1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']) ls2 = Template(word2['s2']) ls2 = ls2.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']) ls3 = Template(word2['s3']) ls3 = ls3.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']) ls4 = Template(word2['s4']) ls4 = ls4.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']) ls5 = Template(word2['s5']) ls5 = ls5.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']) lfs0 = Template(word2['fs0']) lfs0 = noskiem(lfs0.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])) lfs1 = Template(word2['fs1']) lfs1 = noskiem(lfs1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])) lfs2 = Template(word2['fs2']) lfs2 = noskiem(lfs2.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])) lfs3 = Template(word2['fs3']) lfs3 = noskiem(lfs3.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])) lfs4 = Template(word2['fs4']) lfs4 = noskiem(lfs4.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])) lfs5 = Template(word2['fs5']) lfs5 = noskiem(lfs5.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])) ltipas = Template(word2['tipas']) ltipas = noskiem(ltipas.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])) lreiksmes = Template(word2['reiksmes']) lreiksmes = noskiem(lreiksmes.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'], reiksmesimas = reiksmesimas, reiksmesejas = reiksmesejas, reiksmeseja = reiksmeseja, reiksmesojas = reiksmesojas, reiksmesoja = reiksmesoja)) lpozymis = Template(word2['pozymis']) lpozymis = noskiem(lpozymis.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'], pozymis = word['pozymis'], x = u'${x}')) sins = word2['sinonimai'] lsinonimai = [] for sin in sins: lsinonimai.append(sin) isrs = word2['israiskos'] lisraiskos = [] for isr in isrs: lisraiskos.append(isr) newword = {'dalis': word2['dalis'], 'form': word2['form'], 'g1': lg1, 's1': ls1, 's2': ls2, 's3': ls3, 's4': ls4, 's5': ls5, 'fs0': lfs0, 'fs1': lfs1, 'fs2': lfs2, 'pozymis': lpozymis, 'israiskos': lisraiskos, 'sinonimai': lsinonimai, 'reiksmes': lreiksmes, 'fs3': lfs3, 'fs4': eti, 'fs5': lfs5, 'tipas': ltipas, } if u'List' in word2: newword['List'] = word2['List'] if '[[neig.]] [[neig.]] ' not in ltipas: subwordsList.append(newword) #wikipedia.output(u'subwordsList <<< %s >>>.' % subwordsList) #return if u'List' in word: for word2 in word['List']: #wikipedia.output(u'word2 <<< %s >>>.' % word2) #wikipedia.output(u'word <<< %s >>>.' % word) reiksmesimas = '' reiksmesejas = '' reiksmeseja = '' reiksmesojas = '' reiksmesoja = '' if u'reiksmesimas' in word2: reiksmesimas = word2[u'reiksmesimas'] if u'reiksmesejas' in word2: reiksmesejas = word2[u'reiksmesejas'] if u'reiksmeseja' in word2: reiksmeseja = word2[u'reiksmeseja'] if u'reiksmesojas' in word2: reiksmesojas = word2[u'reiksmesojas'] if u'reiksmesoja' in word2: reiksmesoja = word2[u'reiksmesoja'] lg1 = Template(word2['g1']) lg1 = lg1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']) ls1 = Template(word2['s1']) ls1 = ls1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']) ls2 = Template(word2['s2']) ls2 = ls2.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']) ls3 = Template(word2['s3']) ls3 = ls3.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']) ls4 = Template(word2['s4']) ls4 = ls4.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']) ls5 = Template(word2['s5']) ls5 = ls5.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']) lfs0 = Template(word2['fs0']) lfs0 = noskiem(lfs0.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])) lfs1 = Template(word2['fs1']) lfs1 = noskiem(lfs1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])) lfs2 = Template(word2['fs2']) lfs2 = noskiem(lfs2.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])) lfs3 = Template(word2['fs3']) lfs3 = noskiem(lfs3.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])) lfs4 = Template(word2['fs4']) lfs4 = noskiem(lfs4.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])) lfs5 = Template(word2['fs5']) lfs5 = noskiem(lfs5.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])) ltipas = Template(word2['tipas']) ltipas = noskiem(ltipas.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])) lreiksmes = Template(word2['reiksmes']) lreiksmes = noskiem(lreiksmes.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'], reiksmesimas = reiksmesimas, reiksmesejas = reiksmesejas, reiksmeseja = reiksmeseja, reiksmesojas = reiksmesojas, reiksmesoja = reiksmesoja)) lpozymis = Template(word2['pozymis']) lpozymis = noskiem(lpozymis.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'], pozymis = word['pozymis'], x = u'${x}')) sins = word2['sinonimai'] lsinonimai = [] for sin in sins: lsinonimai.append(sin) isrs = word2['israiskos'] lisraiskos = [] for isr in isrs: lisraiskos.append(isr) newword = {'dalis': word2['dalis'], 'form': word2['form'], 'g1': lg1, 's1': ls1, 's2': ls2, 's3': ls3, 's4': ls4, 's5': ls5, 'fs0': lfs0, 'fs1': lfs1, 'fs2': lfs2, 'pozymis': lpozymis, 'israiskos': lisraiskos, 'sinonimai': lsinonimai, 'reiksmes': lreiksmes, 'fs3': lfs3, 'fs4': eti, 'fs5': lfs5, 'tipas': ltipas, } if u'List' in word2: newword['List'] = word2['List'] if '[[neig.]] [[neig.]] ' not in ltipas: subwordsList.append(newword) wordsll = [] for wrd in subwordsList: wrdform = fixeswords.wordforms[wrd['dalis']][wrd['form']] wrdref = Template(wrdform['ref']) wrdref = wrdref.substitute(s1 = noskiem(wrd['s1']), s2 = noskiem(wrd['s2']), s3 = noskiem(wrd['s3']), s4 = noskiem(wrd['s4']), s5 = noskiem(wrd['s5']), fs0 = wrd['fs0'], fs1 = wrd['fs1'], fs2 = wrd['fs2'], fs3 = wrd['fs3'], fs4 = wrd['fs4'], fs5 = wrd['fs5'], tipas = wrd['tipas'], form = wrd['form']) if wrdref != u'': wordsllt = u'* '+wrdref if wrd[u'tipas'] != u'': wordsllt += u' ('+wrd[u'tipas']+u')' pozymis = Template(wrd['pozymis']) pozymis = pozymis.substitute(x = u'{{x}}') wordsllt += pozymis wordsll.append(wordsllt) wordsll.sort() for wrd in wordsll: wikipedia.output(u' <<<\03{lightpurple}%s\03{default}>>> ' % wrd) Words = [] for wordtemp, form in wordform['forms']: t1 = Template(wordtemp) t1 = t1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']) skiemt1 = skiem(t1) noskiemt1 = noskiem(t1) ps1 = noskiem(word['s1']) pg1 = noskiem(word['g1']) tf = Template(form) tf = tf.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'], fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']) t2 = Template(wordform[tf]['text']) sinonimai = u'' if len(word['sinonimai']) > 0: sinonimai = u'==== Sinonimai ====\n\n{{sin-top|kalba=lt|vardas='+noskiemt1+u'|tipas=bendrai}}\n' for wordii,sin in enumerate(word['sinonimai']): sinonimai += u'* {{t+|lt|'+sin+u'}}\n' if wordii < len(word['sinonimai'])/2.0 <= wordii+1: sinonimai += u'{{sin-mid}}\n' sinonimai += u'{{sin-bottom}}\n\n' antonimai = u'' antonimas = u'' if noskiemt1.find('ne') == 0 and ('[[ne-]]' in word['fs4'] or '[[ne-]]' in word['fs5']): antonimas = noskiemt1[2:] antonimai = u'==== Antonimai ====\n\n{{ant-top|kalba=lt|vardas='+noskiemt1+u'|tipas=bendrai}}\n' antonimai += u'* {{t+|lt|'+noskiemt1[2:]+u'}}\n' antonimai += u'{{ant-mid}}\n{{ant-bottom}}\n\n' else: if word['fs5'].find(u'{{sangrž.}}') == 0: antonimas = 'nesi'+noskiemt1 if antonimas.endswith('si'): antonimas = antonimas[:-2] if antonimas.endswith('s'): antonimas = antonimas[:-1] antonimai = u'==== Antonimai ====\n\n{{ant-top|kalba=lt|vardas='+noskiemt1+u'|tipas=bendrai}}\n' antonimai += u'* {{t+|lt|'+antonimas+u'}}\n' antonimai += u'{{ant-mid}}\n{{ant-bottom}}\n\n' else: antonimas = 'ne'+noskiemt1 antonimai = u'==== Antonimai ====\n\n{{ant-top|kalba=lt|vardas='+noskiemt1+u'|tipas=bendrai}}\n' antonimai += u'* {{t+|lt|ne'+noskiemt1+u'}}\n' antonimai += u'{{ant-mid}}\n{{ant-bottom}}\n\n' isvestiniai = u'' if len(wordsll) > 0: isvestiniai = u'==== Išvestiniai žodžiai ====\n\n{{rel-top|kalba=lt|vardas='+noskiemt1+u'|tipas=Išvestiniai žodžiai}}\n' for wordii,wrd in enumerate(wordsll): isvestiniai += wrd+u'\n' if wordii < len(wordsll)/2.0 <= wordii+1: isvestiniai = isvestiniai + u'{{rel-mid}}\n' isvestiniai += u'{{rel-bottom}}\n\n' israiskos = u'' if len(word['israiskos']) > 0: israiskos = u'==== Išraiškos arba posakiai ====\n\n{{rel-top|kalba=lt|vardas='+noskiemt1+u'|tipas=Išraiškos arba posakiai}}\n' for wordii,isr in enumerate(word['israiskos']): israiskos += u'* {{t+|lt|'+isr+u'}}\n' if wordii < len(word['israiskos'])/2.0 <= wordii+1: israiskos += u'{{rel-mid}}\n' israiskos += u'{{rel-bottom}}\n\n' vertimai = u'' sabl = re.compile(u'^\#+\s+(?P<tipas>.*?)(?:\:|\.)$', re.M) inmatch = 0 match = sabl.search(word['reiksmes'], inmatch) while match != None: inmatch = match.end() vtipas = match.group('tipas') if vtipas != None: sabl1 = re.compile(u'\[\[(?P<n1>[^\[\]\|]*?)\|(?P<n2>[^\[\]\|]*?)\]\]', re.M) match1 = sabl1.search(vtipas, 0) while match1 != None: n1 = match1.group('n1') n2 = match1.group('n2') vtipas = vtipas[:match1.start()] + n2 + vtipas[match1.end():] match1 = sabl1.search(vtipas, 0) sabl1 = re.compile(u'\[\[(?P<n1>[^\[\]\|]*?)\]\]', re.M) match1 = sabl1.search(vtipas, 0) while match1 != None: n1 = match1.group('n1') vtipas = vtipas[:match1.start()] + n1 + vtipas[match1.end():] match1 = sabl1.search(vtipas, 0) vertimai += u'{{trans-top|kalba=lt|vardas=' + noskiemt1 + u'|tipas='+ vtipas + u'}}\n' + \ u'{{trans-mid}}\n' + \ u'{{trans-bottom}}\n' match = sabl.search(word['reiksmes'], inmatch) if noskiemt1 not in zodziai: zodziai[noskiemt1] = {} if u'pagrf' not in zodziai[noskiemt1]: zodziai[noskiemt1][u'pagrf'] = not wordform[tf]['auto'] elif not zodziai[noskiemt1][u'pagrf']: zodziai[noskiemt1][u'pagrf'] = not wordform[tf]['auto'] pozymis = Template(word['pozymis']) if zodziai[noskiemt1][u'pagrf']: pozymis = pozymis.substitute(x = u'{{x|lt|p}}') else: pozymis = pozymis.substitute(x = u'{{x|lt|}}') reddir = Template(u'#REDIRECT [[${reddir}]]') reddir = reddir.substitute(reddir = ps1+pg1) sabl2 = re.compile(u'\{\{x\|lt\|p?\}\}', re.M) match2 = sabl2.search(pozymis, 0) nenaud = False if match2 != None: nenaud = True t2 = t2.substitute(s1 = noskiem(word['s1']), s2 = noskiem(word['s2']), s3 = noskiem(word['s3']), s4 = noskiem(word['s4']), s5 = noskiem(word['s5']), fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'], reiksmes = word['reiksmes'], pozymis = pozymis, sinonimai = sinonimai, antonimai = antonimai, isvestiniai = isvestiniai, israiskos = israiskos, vertimai = vertimai, word = noskiemt1, words = skiemt1) sabl3 = re.compile(u'\[\[\]\]\, \[\[\]\]\, \[\[\]\]', re.M) match3 = sabl3.search(t2, 0) t3 = Template(wordform[tf]['title']) if match3 != None: t2 = t2[:match3.start()] + t2[match3.end():] t3 = t3.substitute(s1 = noskiem(word['s1']), s2 = noskiem(word['s2']), s3 = noskiem(word['s3']), s4 = noskiem(word['s4']), s5 = noskiem(word['s5']), fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'], word = noskiemt1, words = skiemt1) Words.append((noskiemt1, {'title':t3, 'contents':t2, 'auto':not zodziai[noskiemt1][u'pagrf'], 'reddir':reddir, 'nenaud':nenaud}, skiemt1, antonimas)) preloadingGen = PageCreateReader(Words) bot = PageCreateRobot(preloadingGen, force, append, wordform['msg'], acceptallnew=acceptallnew, acceptallnewnotauto=acceptallnewnotauto, prot=prot, test=test, fromword = fromword) wikipedia.output(u'Bus formuojami: ') for wordl, descr, wordlsk, anton in Words: wikipedia.output(u' <<<\03{lightpurple}%s\03{default}>>> <<<\03{lightpurple}%s\03{default}>>> <<<\03{lightpurple}%s\03{default}>>> ' % (wordl, wordlsk, anton)) if acceptallgroup: fromword = bot.run() else: choice = wikipedia.inputChoice( u'Ar formuoti?', ['Create', 'CreateAll', 'No'], ['c', 'ca', 'N'], 'N') if choice == 'c': fromword = bot.run() elif choice == 'ca': acceptallgroup = True fromword = bot.run() (acceptallgroup, fromword, zodziai) = cikleWords (subwordsList, force, bot.append, acceptallnew=acceptallnew, acceptallnewnotauto=acceptallnewnotauto, acceptallgroups=acceptallgroup, prot=prot, test=test, fromword = fromword, zodziai=zodziai) return (acceptallgroup, fromword, zodziai) def main(*args): add_cat = None gen = None # summary message summary_commandline = None # Array which will collect commandline parameters. # First element is original text, second element is replacement text. commandline_replacements = [] # A list of 2-tuples of original text and replacement text. replacements = [] # Don't edit pages which contain certain texts. exceptions = { 'title': [], 'text-contains': [], 'require-text': [], 'inside': [], 'inside-tags': [], 'require-title': [], # using a seperate requirements dict needs some } # major refactoring of code. # Should the elements of 'replacements' and 'exceptions' be interpreted # as regular expressions? regex = False # Predefined fixes from dictionary 'fixes' (see above). fix = None force = False append = 'b' summary = None minor = False autosummary = False debug = False # the dump's path, either absolute or relative, which will be used # if -xml flag is present xmlFilename = None useSql = False PageTitles = [] # will become True when the user presses a ('yes to all') or uses the # -always flag. acceptall = False # Will become True if the user inputs the commandline parameter -nocase caseInsensitive = False # Will become True if the user inputs the commandline parameter -dotall dotall = False # Will become True if the user inputs the commandline parameter -multiline multiline = False # Do all hits when they overlap allowoverlap = False # Do not recurse replacement recursive = False # This factory is responsible for processing command line arguments # that are also used by other scripts and that determine on which pages # to work on. genFactory = pagegenerators.GeneratorFactory() # Load default summary message. # BUG WARNING: This is probably incompatible with the -lang parameter. wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg)) # Between a regex and another (using -fix) sleep some time (not to waste # too much CPU sleep = None #Set the regular expression flags flags = re.UNICODE if caseInsensitive: flags = flags | re.IGNORECASE if dotall: flags = flags | re.DOTALL if multiline: flags = flags | re.MULTILINE # Read commandline parameters. for arg in wikipedia.handleArgs(*args): if arg == '-regex': regex = True elif arg.startswith('-xmlstart'): if len(arg) == 9: xmlStart = wikipedia.input( u'Please enter the dumped article to start with:') else: xmlStart = arg[10:] elif arg.startswith('-xml'): if len(arg) == 4: xmlFilename = wikipedia.input( u'Please enter the XML dump\'s filename:') else: xmlFilename = arg[5:] elif arg =='-sql': useSql = True elif arg.startswith('-page'): if len(arg) == 5: PageTitles.append(wikipedia.input( u'Which page do you want to change?')) else: PageTitles.append(arg[6:]) elif arg.startswith('-excepttitle:'): exceptions['title'].append(arg[13:]) elif arg.startswith('-requiretitle:'): exceptions['require-title'].append(arg[14:]) elif arg.startswith('-excepttext:'): exceptions['text-contains'].append(arg[12:]) elif arg.startswith('-exceptinside:'): exceptions['inside'].append(arg[14:]) elif arg.startswith('-exceptinsidetag:'): exceptions['inside-tags'].append(arg[17:]) elif arg == "-appendtop": append = "t" elif arg == "-appendbottom": append = "b" elif arg == "-force": force=True elif arg == '-minor': minor = True elif arg.startswith("-summary:"): summary = arg[9:] elif arg == '-autosummary': autosummary = True elif arg.startswith('-fix:'): fix = arg[5:] elif arg.startswith('-sleep:'): sleep = float(arg[7:]) elif arg == '-always': acceptall = True elif arg == '-recursive': recursive = True elif arg == '-nocase': caseInsensitive = True elif arg == '-dotall': dotall = True elif arg == '-multiline': multiline = True elif arg.startswith('-addcat:'): add_cat = arg[len('addcat:'):] elif arg.startswith('-summary:'): wikipedia.setAction(arg[len('-summary:'):]) summary_commandline = True elif arg.startswith('-allowoverlap'): allowoverlap = True else: if not genFactory.handleArg(arg): commandline_replacements.append(arg) if len(commandline_replacements) % 2: raise wikipedia.Error, 'require even number of replacements.' elif len(commandline_replacements) == 2 and fix == None: replacements.append((commandline_replacements[0], commandline_replacements[1])) if summary_commandline == None: wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg ) % (' (-' + commandline_replacements[0] + ' +' + commandline_replacements[1] + ')')) elif len(commandline_replacements) > 1: if fix == None: for i in xrange (0, len(commandline_replacements), 2): replacements.append((commandline_replacements[i], commandline_replacements[i + 1])) if summary_commandline == None: pairs = [( commandline_replacements[i], commandline_replacements[i + 1] ) for i in range(0, len(commandline_replacements), 2)] replacementsDescription = '(%s)' % ', '.join( [('-' + pair[0] + ' +' + pair[1]) for pair in pairs]) wikipedia.setAction( wikipedia.translate(wikipedia.getSite(), msg ) % replacementsDescription) else: raise wikipedia.Error( 'Specifying -fix with replacements is undefined') elif fix == None: old = wikipedia.input(u'Please enter the text that should be replaced:') new = wikipedia.input(u'Please enter the new text:') change = '(-' + old + ' +' + new replacements.append((old, new)) while True: old = wikipedia.input( u'Please enter another text that should be replaced, or press Enter to start:') if old == '': change = change + ')' break new = wikipedia.input(u'Please enter the new text:') change = change + ' & -' + old + ' +' + new replacements.append((old, new)) if not summary_commandline == True: default_summary_message = wikipedia.translate(wikipedia.getSite(), msg) % change wikipedia.output(u'The summary message will default to: %s' % default_summary_message) summary_message = wikipedia.input( u'Press Enter to use this default message, or enter a description of the\nchanges your bot will make:') if summary_message == '': summary_message = default_summary_message wikipedia.setAction(summary_message) elif fix not in ('word', 'nesusije') : # Perform one of the predefined actions. try: fix = fixeswords.fixes[fix] except KeyError: wikipedia.output(u'Available predefined fixes are: %s' % fixeswords.fixes.keys()) return if 'regex' in fix: regex = fix['regex'] if 'msg' in fix: wikipedia.setAction( wikipedia.translate(wikipedia.getSite(), fix['msg'])) if 'exceptions' in fix: exceptions = fix['exceptions'] if 'nocase' in fix: caseInsensitive = fix['nocase'] replacements = fix['replacements'] #wikipedia.output(u'fix get = %s' % fix) # Pre-compile all regular expressions here to save time later for i in range(len(replacements)): old, new = replacements[i] if not regex: old = re.escape(old) oldR = re.compile(old, flags) replacements[i] = oldR, new for exceptionCategory in ['title', 'require-title', 'text-contains', 'inside', 'require-text']: if exceptionCategory in exceptions: patterns = exceptions[exceptionCategory] if not regex: patterns = [re.escape(pattern) for pattern in patterns] patterns = [re.compile(pattern, flags) for pattern in patterns] exceptions[exceptionCategory] = patterns #wikipedia.output(u'fix rep = %s' % replacements) #wikipedia.output(u'fix page = %s' % PageTitles) #return if xmlFilename: try: xmlStart except NameError: xmlStart = None gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart, replacements, exceptions) elif useSql: whereClause = 'WHERE (%s)' % ' OR '.join( ["old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern) for (old, new) in replacements]) if exceptions: exceptClause = 'AND NOT (%s)' % ' OR '.join( ["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern) for exc in exceptions]) else: exceptClause = '' query = u""" SELECT page_namespace, page_title FROM page JOIN text ON (page_id = old_id) %s %s LIMIT 200""" % (whereClause, exceptClause) gen = pagegenerators.MySQLPageGenerator(query) elif PageTitles: pages = [wikipedia.Page(wikipedia.getSite(), PageTitle) for PageTitle in PageTitles] gen = iter(pages) if fix not in ('word', 'nesusije'): gen = genFactory.getCombinedGenerator(gen) if fix not in ('word', 'nesusije'): if not gen: # syntax error, show help text from the top of this file wikipedia.showHelp('replace') return if xmlFilename: # XML parsing can be quite slow, so use smaller batches and # longer lookahead. preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=20, lookahead=100) elif fix not in ('word', 'nesusije'): preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=60) _fnf = os.path.join('', 'protokolas.txt') prot = codecs.open(_fnf, "w+", "utf-8") if fix == 'word': fromword = None zodziai = {} (acceptallgroup, fromword, zodziai) = cikleWords(fixeswords.wordsList, force, append, acceptallnew=True, acceptallnewnotauto=True, acceptallgroups=False, prot=prot, test=True, fromword=fromword, zodziai=zodziai) _fnf = os.path.join('', 'protokolas2.txt') prot = codecs.open(_fnf, "w+", "utf-8") wikipedia.output(u'<<<\03{lightpurple} Prasideda straipsnių kūrimo etapas. \03{default}>>>') (acceptallgroup, fromword, zodziai) = cikleWords(fixeswords.wordsList, force, append, acceptallnew=True, acceptallnewnotauto=True, acceptallgroups=False, prot=prot, test=False, fromword=fromword, zodziai=zodziai) elif fix == 'nesusije': wikipedia.output(u'fix = %s' % fix) preloadingGen = PageNesusijeReader(nesusije.wordsList) bot = PageNesusijeRobot(preloadingGen) bot.run() else: bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, add_cat, sleep) #wikipedia.output(u'fix gen = %s' % preloadingGen) #return bot.run() if __name__ == "__main__": try: main() finally: wikipedia.stopme()