# -*- coding: utf-8 -*-
"""
This bot will make direct text replacements. It will retrieve information on
which pages might need changes either from an XML dump or a text file, or only
change a single page.
These command line parameters can be used to specify which pages to work on:
¶ms;
-xml Retrieve information from a local XML dump (pages-articles
or pages-meta-current, see http://download.wikimedia.org).
Argument can also be given as "-xml:filename".
-page Only edit a specific page.
Argument can also be given as "-page:pagetitle". You can
give this parameter multiple times to edit multiple pages.
Furthermore, the following command line parameters are supported:
-regex Make replacements using regular expressions. If this argument
isn't given, the bot will make simple text replacements.
-nocase Use case insensitive regular expressions.
-dotall Make the dot match any character at all, including a newline.
Without this flag, '.' will match anything except a newline.
-multiline '^' and '$' will now match begin and end of each line.
-xmlstart (Only works with -xml) Skip all articles in the XML dump
before the one specified (may also be given as
-xmlstart:Article).
-addcat:cat_name Adds "cat_name" category to every altered page.
-excepttitle:XYZ Skip pages with titles that contain XYZ. If the -regex
argument is given, XYZ will be regarded as a regular
expression.
-requiretitle:XYZ Only do pages with titles that contain XYZ. If the -regex
argument is given, XYZ will be regarded as a regular
expression.
-excepttext:XYZ Skip pages which contain the text XYZ. If the -regex
argument is given, XYZ will be regarded as a regular
expression.
-exceptinside:XYZ Skip occurences of the to-be-replaced text which lie
within XYZ. If the -regex argument is given, XYZ will be
regarded as a regular expression.
-exceptinsidetag:XYZ Skip occurences of the to-be-replaced text which lie
within an XYZ tag.
-summary:XYZ Set the summary message text for the edit to XYZ, bypassing
the predefined message texts with original and replacements
inserted.
-sleep:123 If you use -fix you can check multiple regex at the same time
in every page. This can lead to a great waste of CPU because
the bot will check every regex without waiting using all the
resources. This will slow it down between a regex and another
in order not to waste too much CPU.
-fix:XYZ Perform one of the predefined replacements tasks, which are
given in the dictionary 'fixes' defined inside the file
fixes.py.
The -regex and -nocase argument and given replacements will
be ignored if you use -fix.
Currently available predefined fixes are:
&fixes-help;
-always Don't prompt you for each replacement
-recursive Recurse replacement as long as possible. Be careful, this
might lead to an infinite loop.
-allowoverlap When occurences of the pattern overlap, replace all of them.
Be careful, this might lead to an infinite loop.
other: First argument is the old text, second argument is the new
text. If the -regex argument is given, the first argument
will be regarded as a regular expression, and the second
argument might contain expressions like \\1 or \g<name>.
Examples:
If you want to change templates from the old syntax, e.g. {{msg:Stub}}, to the
new syntax, e.g. {{Stub}}, download an XML dump file (pages-articles) from
http://download.wikimedia.org, then use this command:
python replace.py -xml -regex "{{msg:(.*?)}}" "{{\\1}}"
If you have a dump called foobar.xml and want to fix typos in articles, e.g.
Errror -> Error, use this:
python replace.py -xml:foobar.xml "Errror" "Error" -namespace:0
If you have a page called 'John Doe' and want to fix the format of ISBNs, use:
python replace.py -page:John_Doe -fix:isbn
This command will change 'referer' to 'referrer', but not in pages which
talk about HTTP, where the typo has become part of the standard:
python replace.py referer referrer -file:typos.txt -excepttext:HTTP
"""
#
# (C) Daniel Herding & the Pywikipediabot Team, 2004-2008
#
# Distributed under the terms of the MIT license.
#
from __future__ import generators
import os, codecs, re, time
from string import Template
import wikipedia, pagegenerators
import editarticle
import webbrowser
# Imports predefined replacements tasks from fixeswords.py
import fixeswords #, nesusije
# This is required for the text that is shown when you run this script
# with the parameter -help.
docuReplacements = {
'¶ms;': pagegenerators.parameterHelp,
'&fixes-help;': fixeswords.help,
}
__version__='$Id: replace.py 6412 2009-02-22 16:13:01Z nicdumz $'
# Summary messages in different languages
# NOTE: Predefined replacement tasks might use their own dictionary, see 'fixes'
# below.`v
msg = {
'ar': u'%s روبوت : استبدال تلقائي للنص',
'ca': u'Robot: Reemplaçament automàtic de text %s',
'cs': u'Robot automaticky nahradil text: %s',
'de': u'Bot: Automatisierte Textersetzung %s',
'el': u'Ρομπότ: Αυτόματη αντικατάσταση κειμένου %s',
'en': u'Robot: Automated text replacement %s',
'es': u'Robot: Reemplazo automático de texto %s',
'fa': u'ربات: تغییر خودکار متن %s',
'fr': u'Bot : Remplacement de texte automatisé %s',
'he': u'בוט: החלפת טקסט אוטומטית %s',
'hu': u'Robot: Automatikus szövegcsere %s',
'ia': u'Robot: Reimplaciamento automatic de texto %s',
'id': u'Bot: Penggantian teks otomatis %s',
'is': u'Vélmenni: breyti texta %s',
'it': u'Bot: Sostituzione automatica %s',
'ja': u'ロボットによる: 文字置き換え %s',
'ka': u'რობოტი: ტექსტის ავტომატური შეცვლა %s',
'kk': u'Бот: Мәтінді өздікті алмастырды: %s',
'ksh': u'Bot: hät outomatesch Täx jetuusch: %s',
'lt': u'robotas: Automatinis teksto keitimas %s',
'nds': u'Bot: Text automaatsch utwesselt: %s',
'nds-nl': u'Bot: autematisch tekse vervungen %s',
'nl': u'Bot: automatisch tekst vervangen %s',
'nn': u'robot: automatisk teksterstatning: %s',
'no': u'robot: automatisk teksterstatning: %s',
'pl': u'Robot automatycznie zamienia tekst %s',
'pt': u'Bot: Mudança automática %s',
'ru': u'Робот: Автоматизированная замена текста %s',
'sr': u'Бот: Аутоматска замена текста %s',
'sv': u'Bot: Automatisk textersättning: %s',
'zh': u'機器人:執行文字代換作業 %s',
}
class XmlDumpReplacePageGenerator:
"""
Iterator that will yield Pages that might contain text to replace.
These pages will be retrieved from a local XML dump file.
Arguments:
* xmlFilename - The dump's path, either absolute or relative
* xmlStart - Skip all articles in the dump before this one
* replacements - A list of 2-tuples of original text (as a
compiled regular expression) and replacement
text (as a string).
* exceptions - A dictionary which defines when to ignore an
occurence. See docu of the ReplaceRobot
constructor below.
"""
def __init__(self, xmlFilename, xmlStart, replacements, exceptions):
self.xmlFilename = xmlFilename
self.replacements = replacements
self.exceptions = exceptions
self.xmlStart = xmlStart
self.skipping = bool(xmlStart)
self.excsInside = []
if 'inside-tags' in self.exceptions:
self.excsInside += self.exceptions['inside-tags']
if 'inside' in self.exceptions:
self.excsInside += self.exceptions['inside']
import xmlreader
self.site = wikipedia.getSite()
dump = xmlreader.XmlDump(self.xmlFilename)
self.parser = dump.parse()
def __iter__(self):
try:
for entry in self.parser:
if self.skipping:
if entry.title != self.xmlStart:
continue
self.skipping = False
if not self.isTitleExcepted(entry.title) \
and not self.isTextExcepted(entry.text):
new_text = entry.text
for old, new in self.replacements:
new_text = wikipedia.replaceExcept(new_text, old, new, self.excsInside, self.site)
if new_text != entry.text:
yield wikipedia.Page(self.site, entry.title)
except KeyboardInterrupt:
try:
if not self.skipping:
wikipedia.output(
u'To resume, use "-xmlstart:%s" on the command line.'
% entry.title)
except NameError:
pass
def isTitleExcepted(self, title):
if 'title' in self.exceptions:
for exc in self.exceptions['title']:
if exc.search(title):
return True
if 'require-title' in self.exceptions:
for req in self.exceptions['require-title']:
if not req.search(title): # if not all requirements are met:
return True
return False
def isTextExcepted(self, text):
if 'text-contains' in self.exceptions:
for exc in self.exceptions['text-contains']:
if exc.search(text):
return True
if 'require-text' in self.exceptions:
ret = True
for exc in self.exceptions['require-text']:
if exc.search(text):
return False
return ret
return False
class PageCreateReader:
def __init__(self, words):
self.words = words
def run(self):
#wikipedia.output('Beginning \'%s\'...' % self.filesinfo)
for page, contents, wordsk, anton in self.words:
yield page, contents, wordsk, anton
class PageNesusijeReader:
def __init__(self, words):
self.words = words
def run(self):
#wikipedia.output('Beginning \'%s\'...' % self.filesinfo)
for page, topage in self.words:
try:
wikipedia.output('Beginning >>> \03{lightpurple}%s\03{default} <<<>>> \03{lightred}%s\03{default} <<<...' % (page, topage))
except:
wikipedia.output(u'Except on Output')
yield page, topage
class PageNesusijeRobot:
"""
Responsible for writing pages to the wiki, with the titles and contents
given by a PageFromFileReader.
"""
msg = {
'lt': u'Automated creating of articles',
}
# The following messages are added to topic when the page already exists
msg_top = {
'lt': u'append on top',
}
msg_bottom = {
'lt': u'append on bottom',
}
msg_force = {
'lt': u'existing text overwritten',
}
append = ''
def __getattr__(self, append):
return self.append
def __init__(self, reader = False, force = False, append = False, summary = 'Susiejama', minor = False,
autosummary = False, debug = False, acceptall=False, acceptallnew=False, quit=False):
self.reader = reader
self.force = force
self.append = append
self.summary = summary
self.minor = minor
self.autosummary = autosummary
self.debug = debug
self.acceptall = acceptall
self.acceptallnew = acceptallnew
self.quit = quit
def run(self):
for word, toword in self.reader.run():
self.put(word, toword)
if self.quit:
return
def put(self, word, toword):
mysite = wikipedia.getSite()
page = wikipedia.Page(mysite, word)
# Show the title of the page we're working on.
# Highlight the title in purple.
try:
wikipedia.output(u">>> \03{lightpurple}%s\03{default} <<<" % page.title())
except:
wikipedia.output(u'Except on Output')
if self.summary:
comment = self.summary
else:
comment = wikipedia.translate(mysite, self.msg)
if page.exists():
original_text = page.get()
isnew = True
new_text = original_text
needchange = True
#wikipedia.output(old)
temp = re.compile(ur'\{\{see\|(?P<parms>[^\}]*)\}\}', re.MULTILINE)
while temp.search(new_text) is not None and needchange:
for m in temp.finditer(new_text):
text = m.group()
isnew = False
parms = m.group('parms').strip()
oldsee = parms.split('|')
see = '{{see'
for wd in oldsee:
if wd.strip() == toword:
needchange = False
new_text = original_text
break
else:
see += '|' + wd.strip()
if not needchange:
break
see += '|' + toword + '}}'
new_text = new_text.replace(text, see)
break
if isnew:
new_text = '{{see|' + toword + u'}}\n' + original_text
while True:
if new_text == original_text:
try:
wikipedia.output('No changes were necessary in >>> \03{lightpurple}%s\03{default} <<< %s >>>'
% (page.aslink(), new_text[0:50]))
except:
wikipedia.output(u'Except on Output')
break
# Show the title of the page we're working on.
# Highlight the title in purple.
#wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
# % page.title())
try:
wikipedia.showDiff(original_text, new_text)
except:
wikipedia.output(u'Except on Output')
if self.acceptall:
break
choice = wikipedia.inputChoice(
u'Do you want to accept these changes?',
['Yes', 'No', 'Edit', 'open in Browser', 'All', "Quit"],
['y', 'N', 'e', 'b', 'a', 'q'], 'N')
if choice == 'e':
editor = editarticle.TextEditor()
as_edited = editor.edit(original_text)
# if user didn't press Cancel
if as_edited and as_edited != new_text:
new_text = as_edited
continue
if choice == 'b':
webbrowser.open("http://%s%s" % (
page.site().hostname(),
page.site().nice_get_address(page.title())
))
wikipedia.input("Press Enter when finished in browser.")
original_text = page.get(get_redirect=True, force=True)
new_text = original_text
continue
if choice == 'q':
return
if choice == 'a':
self.acceptall = True
if choice == 'y':
page.put_async(new_text)
# choice must be 'N'
break
if self.acceptall and new_text != original_text:
try:
page.put(new_text, comment = comment, minorEdit = self.minor)
except wikipedia.LockedPage:
wikipedia.output(u"Page %s is locked; skipping." % title)
except wikipedia.EditConflict:
wikipedia.output(u'Skipping %s because of edit conflict' % title)
except wikipedia.SpamfilterError, error:
wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (title, error.url))
prot = None
class PageCreateRobot:
"""
Responsible for writing pages to the wiki, with the titles and contents
given by a PageFromFileReader.
"""
msg = {
'lt': u'Automated creating of articles',
}
# The following messages are added to topic when the page already exists
msg_top = {
'lt': u'append on top',
}
msg_bottom = {
'lt': u'append on bottom',
}
msg_force = {
'lt': u'existing text overwritten',
}
append = ''
def __getattr__(self, append):
return self.append
def __init__(self, reader, force, append, summary, minor = False,
autosummary = False, debug = False, acceptall=False, acceptallnew=False, quit=False,
acceptallnewnotauto=False, prot=None, test=True, fromword = None):
self.reader = reader
self.force = force
self.append = append
self.summary = summary
self.minor = minor
self.autosummary = autosummary
self.debug = debug
self.acceptall = acceptall
self.acceptallnew = acceptallnew
self.quit = quit
self.acceptallnewnotauto=acceptallnewnotauto
self.prot = prot
self.test = test
self.fromword = fromword
self.skip = False
if self.fromword is not None:
self.skip = True
def run(self):
for word, contents, wordsk, anton in self.reader.run():
self.put(word, contents['title'], contents['contents'], contents['auto'], contents['reddir'], contents['nenaud'])
if self.quit:
return None
return self.fromword
def put(self, word, title, contents, auto, reddir, nenaud):
if self.fromword is not None:
if word == self.fromword:
self.skip = False
self.fromword = None
if self.skip:
return
wikipedia.output(u">>> \03{lightpurple}%s\03{default} <<<" % word)
#wikipedia.output(u">>> \03{lightpurple}%s\03{default} <<<\n%s" % (word,contents))
if self.test:
self.prot.write('# [[%s]]\r\n' % word)
return
mysite = wikipedia.getSite()
page = wikipedia.Page(mysite, word)
# Show the title of the page we're working on.
# Highlight the title in purple.
# wikipedia.output(u">>> \03{lightpurple}%s\03{default} <<<" % page.title())
if self.summary:
comment = self.summary
else:
comment = wikipedia.translate(mysite, self.msg)
comment_top = comment + " - " + wikipedia.translate(mysite, self.msg_top)
comment_bottom = comment + " - " + wikipedia.translate(mysite, self.msg_bottom)
comment_force = comment + " *** " + wikipedia.translate(mysite, self.msg_force) + " ***"
rezfile = u'Zodziai/'
if page.exists():
old = page.get(get_redirect=True)
try:
wikipedia.output(old)
except:
wikipedia.output(u'Except on Output')
if page.isRedirectPage():
_fnf = os.path.join('', rezfile+word+'.txt')
wprot = codecs.open(_fnf, "a+", "utf-8")
wprot.write('%s' % title + '\n' + contents)
return
## if self.test:
## self.prot.write('# [[%s]] - egzistavo, reikia patikrinti\r\n' % word)
## return
interwiki = wikipedia.interwikiFormat(wikipedia.getLanguageLinks(old))
#wikipedia.output('Interwiki: %s\n' % interwiki)
cat = wikipedia.categoryFormat(wikipedia.getCategoryLinks(old,mysite))
#wikipedia.output('Kategorijos: %s\n' % cat)
txt = wikipedia.removeLanguageLinks(old)
txt = wikipedia.removeCategoryLinks(txt,mysite)
#wikipedia.output('text: %s\n' % txt)
#wikipedia.output('text: %s\n' % (txt + '\n' + contents + '\n' + cat + '\n' + interwiki))
if self.append == None:
self.append = wikipedia.inputChoice(
u'Page <<%s>> already exists. What do?\n' % word,
['Top', 'Bottom', 'Change', 'Skip'],
['t', 'b', 'c', 's'], 's')
if self.append == "t":
wikipedia.output(u"Page %s already exists, appending on top!" % word)
contents = title + '\n' + contents + '\n' + txt + '\n'
if self.acceptallnewnotauto or self.force:
#contents += '{{patikrinti}}\n'
pass
contents += cat + '\n' + interwiki
comment = comment_top
elif self.append == "b":
wikipedia.output(u"Page %s already exists, appending on bottom!" % word)
contents = txt + '\n' + contents + '\n'
if self.acceptallnewnotauto or self.force:
#contents += '{{patikrinti}}\n'
pass
contents += cat + '\n' + interwiki
comment = comment_bottom
elif self.append == "c":
wikipedia.output(u"Page %s already exists, ***overwriting!" % word)
contents = title + '\n' + contents + '\n'
if self.acceptallnewnotauto or self.force:
contents += '{{patikrinti}}\n'
pass
contents += cat + '\n' + interwiki
comment = comment_force
else:
contents = old
if not self.acceptall or not self.force or not auto:
choice = None
if not auto:
if self.acceptallnewnotauto and not self.force:
choice = 'y'
elif self.force:
choice = 'y'
else:
choice = 'e'
else:
if self.force:
choice = 'y'
else:
choice = wikipedia.inputChoice(
u'\nDo you want to accept these changes?\n',
['Yes', 'No', 'Edit', 'Browser', 'All', 'AllOld', 'Skip', 'Change', "Quit"],
['y', 'N', 'e', 'b', 'a', 'ao', 's', 'c', 'q'], 'N')
if choice == 'e':
editor = editarticle.TextEditor()
as_edited = editor.edit(contents)
# if user didn't press Cancel
if as_edited and as_edited != contents:
contents = as_edited
elif choice == 'b':
webbrowser.open("http://%s%s" % (
page.site().hostname(),
page.site().nice_get_address(page.title())
))
wikipedia.input("Press Enter when finished in browser.")
old = page.get(get_redirect=True, force=True)
contents = old
elif choice == 'a':
self.acceptall = True
elif choice == 'ao':
self.acceptall = True
elif choice == 's':
self.append = 's'
wikipedia.output(u"Page %s already exists, not adding!" % word)
return
elif choice == 'c':
self.append = None
elif choice == 'q':
wikipedia.output(u"Page %s already exists, not adding!" % word)
self.quit = True
contents = old
return
elif choice != 'y':
wikipedia.output(u"Page %s already exists, not adding!" % word)
contents = old
return
if choice == 'c':
choice = wikipedia.inputChoice(
u'\nDo you want to accept these changes?\n',
['Yes', 'No', 'Edit', 'Browser', 'All', 'AllOld', 'Skip', "Quit"],
['y', 'N', 'e', 'b', 'a', 'ao', 's', 'q'], 'N')
if choice == 'e':
editor = editarticle.TextEditor()
as_edited = editor.edit(contents)
# if user didn't press Cancel
if as_edited and as_edited != contents:
contents = as_edited
elif choice == 'a':
self.acceptall = True
elif choice == 'ao':
self.acceptall = True
elif choice == 'b':
webbrowser.open("http://%s%s" % (
page.site().hostname(),
page.site().nice_get_address(page.title())
))
wikipedia.input("Press Enter when finished in browser.")
old = page.get(get_redirect=True, force=True)
contents = old
elif choice == 's':
wikipedia.output(u"Not adding!")
self.append = 's'
return
elif choice == 'q':
wikipedia.output(u"Not adding!")
self.quit = True
return
elif choice != 'y':
wikipedia.output(u"Not adding!")
return
if old == contents:
wikipedia.output(u"Page %s already exists, not adding!" % word)
return
else:
## if self.test: auto, reddir, nenaud
## return
if nenaud and auto:
_fnf = os.path.join('', rezfile+word+'.txt')
wprot = codecs.open(_fnf, "a+", "utf-8")
wprot.write('%s' % title + '\n' + contents)
contents = reddir
else:
contents = title + '\n' + contents
if not self.acceptallnew or not auto:
choice = None
if not auto:
if self.acceptallnewnotauto:
contents = contents #+ '{{patikrinti}}\n'
choice = 'y'
else:
choice = 'e'
else:
choice = wikipedia.inputChoice(
u'\nDo you want to accept these changes?\n',
['Yes', 'No', 'Edit', 'All', 'AllNew', 'Skip', 'Change', "Quit"],
['y', 'N', 'e', 'a', 'an', 's', 'c', 'q'], 'N')
if choice == 'e':
editor = editarticle.TextEditor()
as_edited = editor.edit(contents)
# if user didn't press Cancel
if as_edited and as_edited != contents:
contents = as_edited
elif choice == 'a':
self.acceptallnew = True
elif choice == 'an':
self.acceptallnew = True
elif choice == 's':
wikipedia.output(u"Not adding!")
self.append = 's'
return
elif choice == 'c':
self.append = None
elif choice == 'q':
wikipedia.output(u"Not adding!")
self.quit = True
return
elif choice != 'y':
wikipedia.output(u"Not adding!")
return
if choice == 'c':
choice = wikipedia.inputChoice(
u'\nDo you want to accept these changes?\n',
['Yes', 'No', 'Edit', 'All', 'AllNew', 'Skip', "Quit"],
['y', 'N', 'e', 'a', 'an', 's', 'q'], 'N')
if choice == 'e':
editor = editarticle.TextEditor()
as_edited = editor.edit(contents)
# if user didn't press Cancel
if as_edited and as_edited != contents:
contents = as_edited
elif choice == 'a':
self.acceptallnew = True
elif choice == 'an':
self.acceptallnew = True
elif choice == 's':
wikipedia.output(u"Not adding!")
self.append = 's'
return
elif choice == 'q':
wikipedia.output(u"Not adding!")
self.quit = True
return
elif choice != 'y':
wikipedia.output(u"Not adding!")
return
if self.autosummary:
comment = ''
wikipedia.setAction('')
# Remove trailing newlines (cause troubles when creating redirects)
contents = re.sub('^[\r\n]*','', contents)
if self.debug:
wikipedia.output("*** Debug mode ***\n" + \
"\03{lightpurple}word\03{default}: " + word + "\n" + \
"\03{lightpurple}contents\03{default}:\n" + contents + "\n" \
"\03{lightpurple}comment\03{default}: " + comment + "\n")
return
try:
page.put(contents, comment = comment, minorEdit = self.minor)
except wikipedia.LockedPage:
wikipedia.output(u"Page %s is locked; skipping." % title)
except wikipedia.EditConflict:
wikipedia.output(u'Skipping %s because of edit conflict' % title)
except wikipedia.SpamfilterError, error:
wikipedia.output(u'Cannot change %s because of spam blacklist entry %s' % (title, error.url))
class ReplaceRobot:
"""
A bot that can do text replacements.
"""
def __init__(self, generator, replacements, exceptions={},
acceptall=False, allowoverlap=False, recursive=False,
addedCat=None, sleep=None):
"""
Arguments:
* generator - A generator that yields Page objects.
* replacements - A list of 2-tuples of original text (as a
compiled regular expression) and replacement
text (as a string).
* exceptions - A dictionary which defines when not to change an
occurence. See below.
* acceptall - If True, the user won't be prompted before changes
are made.
* allowoverlap - If True, when matches overlap, all of them are
replaced.
* addedCat - If set to a value, add this category to every page
touched.
Structure of the exceptions dictionary:
This dictionary can have these keys:
title
A list of regular expressions. All pages with titles that
are matched by one of these regular expressions are skipped.
text-contains
A list of regular expressions. All pages with text that
contains a part which is matched by one of these regular
expressions are skipped.
inside
A list of regular expressions. All occurences are skipped which
lie within a text region which is matched by one of these
regular expressions.
inside-tags
A list of strings. These strings must be keys from the
exceptionRegexes dictionary in wikipedia.replaceExcept().
"""
self.generator = generator
self.replacements = replacements
self.exceptions = exceptions
self.acceptall = acceptall
self.allowoverlap = allowoverlap
self.recursive = recursive
if addedCat:
site = wikipedia.getSite()
cat_ns = site.category_namespaces()[0]
self.addedCat = wikipedia.Page(site,
cat_ns + ':' + addedCat)
self.sleep = sleep
#wikipedia.output(u'fix gen = %s' % self.replacements)
def isTitleExcepted(self, title):
"""
Iff one of the exceptions applies for the given title, returns True.
"""
if 'title' in self.exceptions:
for exc in self.exceptions['title']:
if exc.search(title):
return True
if 'require-title' in self.exceptions:
for req in self.exceptions['require-title']:
if not req.search(title):
return True
return False
def isTextExcepted(self, original_text):
"""
Iff one of the exceptions applies for the given page contents,
returns True.
"""
if 'text-contains' in self.exceptions:
for exc in self.exceptions['text-contains']:
if exc.search(original_text):
return True
if 'require-title' in self.exceptions:
for req in self.exceptions['require-title']:
if not req.search(title): # if not all requirements are met:
return True
if 'require-text' in self.exceptions:
ret = True
for exc in self.exceptions['require-text']:
if exc.search(original_text):
return False
return ret
return False
def doReplacements(self, original_text):
"""
Returns the text which is generated by applying all replacements to
the given text.
"""
new_text = original_text
exceptions = []
if 'inside-tags' in self.exceptions:
exceptions += self.exceptions['inside-tags']
if 'inside' in self.exceptions:
exceptions += self.exceptions['inside']
for old, new in self.replacements:
if self.sleep != None:
time.sleep(self.sleep)
new_text = wikipedia.replaceExcept(new_text, old, new, exceptions,
allowoverlap=self.allowoverlap)
return new_text
def run(self):
"""
Starts the robot.
"""
# Run the generator which will yield Pages which might need to be
# changed.
#wikipedia.output(u'fix gen = %s' % self.generator)
#return
for page in self.generator:
wikipedia.output(
u'File %s .'
% page.aslink())
if self.isTitleExcepted(page.title()):
wikipedia.output(
u'Skipping %s because the title is on the exceptions list.'
% page.aslink())
continue
try:
# Load the page's text from the wiki
original_text = page.get(get_redirect=True)
if not page.canBeEdited():
wikipedia.output(u"You can't edit page %s"
% page.aslink())
continue
except wikipedia.NoPage:
wikipedia.output(u'Page %s not found' % page.aslink())
continue
new_text = original_text
while True:
if self.isTextExcepted(new_text):
wikipedia.output(
u'Skipping %s because it contains text that is on the exceptions list.'
% page.aslink())
break
new_text = self.doReplacements(new_text)
if new_text == original_text:
wikipedia.output('No changes were necessary in %s'
% page.aslink())
break
if self.recursive:
newest_text = self.doReplacements(new_text)
while newest_text!=new_text:
new_text = newest_text
newest_text = self.doReplacements(new_text)
if hasattr(self, "addedCat"):
cats = page.categories(nofollow_redirects=True)
if self.addedCat not in cats:
cats.append(self.addedCat)
new_text = wikipedia.replaceCategoryLinks(new_text,
cats)
# Show the title of the page we're working on.
# Highlight the title in purple.
wikipedia.output(u"\n\n>>> \03{lightpurple}%s\03{default} <<<"
% page.title())
wikipedia.showDiff(original_text, new_text)
if self.acceptall:
break
choice = wikipedia.inputChoice(
u'Do you want to accept these changes?',
['Yes', 'No', 'Edit', 'open in Browser', 'All', "Quit"],
['y', 'N', 'e', 'b', 'a', 'q'], 'N')
if choice == 'e':
editor = editarticle.TextEditor()
as_edited = editor.edit(original_text)
# if user didn't press Cancel
if as_edited and as_edited != new_text:
new_text = as_edited
continue
if choice == 'b':
webbrowser.open("http://%s%s" % (
page.site().hostname(),
page.site().nice_get_address(page.title())
))
wikipedia.input("Press Enter when finished in browser.")
original_text = page.get(get_redirect=True, force=True)
new_text = original_text
continue
if choice == 'q':
return
if choice == 'a':
self.acceptall = True
if choice == 'y':
page.put_async(new_text)
# choice must be 'N'
break
if self.acceptall and new_text != original_text:
try:
page.put(new_text)
except wikipedia.EditConflict:
wikipedia.output(u'Skipping %s because of edit conflict'
% (page.title(),))
except wikipedia.SpamfilterError, e:
wikipedia.output(
u'Cannot change %s because of blacklist entry %s'
% (page.title(), e.url))
except wikipedia.PageNotSaved, error:
wikipedia.output(u'Error putting page: %s'
% (error.args,))
except wikipedia.LockedPage:
wikipedia.output(u'Skipping %s (locked page)'
% (page.title(),))
def prepareRegexForMySQL(pattern):
pattern = pattern.replace('\s', '[:space:]')
pattern = pattern.replace('\d', '[:digit:]')
pattern = pattern.replace('\w', '[:alnum:]')
pattern = pattern.replace("'", "\\" + "'")
#pattern = pattern.replace('\\', '\\\\')
#for char in ['[', ']', "'"]:
# pattern = pattern.replace(char, '\%s' % char)
return pattern
def noskiem(word):
return word.replace('~', '')
def skiem(word):
return word.replace('~~', '-').replace('~','')
def cikleWords (wordsList, force, append, acceptallnew=False, acceptallnewnotauto=False,
acceptallgroups=False, prot=None, test=True, fromword = None, zodziai={}):
acceptallgroup = acceptallgroups
for word in wordsList:
#wikipedia.output(u'word <<< %s >>>.' % word)
wordform = fixeswords.wordforms[word['dalis']][word['form']]
eti = u''
if word['fs0'] != u'':
t1 = Template(wordform[word['fs0']])
eti = t1.substitute(s1 = noskiem(word['s1']), s2 = noskiem(word['s2']),
s3 = noskiem(word['s3']), s4 = noskiem(word['s4']), s5 = noskiem(word['s5']),
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'],
fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
## if bot.append == 's':
## bot.append = None
subwordsList = []
for word2 in wordform['wordsList']:
#wikipedia.output(u'word2 <<< %s >>>.' % word2)
#wikipedia.output(u'word <<< %s >>>.' % word)
reiksmesimas = ''
reiksmesejas = ''
reiksmeseja = ''
reiksmesojas = ''
reiksmesoja = ''
if u'reiksmesimas' in word:
reiksmesimas = word[u'reiksmesimas']
if u'reiksmesejas' in word:
reiksmesejas = word[u'reiksmesejas']
if u'reiksmeseja' in word:
reiksmeseja = word[u'reiksmeseja']
if u'reiksmesojas' in word:
reiksmesojas = word[u'reiksmesojas']
if u'reiksmesoja' in word:
reiksmesoja = word[u'reiksmesoja']
lg1 = Template(word2['g1'])
lg1 = lg1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
ls1 = Template(word2['s1'])
ls1 = ls1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
ls2 = Template(word2['s2'])
ls2 = ls2.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
ls3 = Template(word2['s3'])
ls3 = ls3.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
ls4 = Template(word2['s4'])
ls4 = ls4.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
ls5 = Template(word2['s5'])
ls5 = ls5.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
lfs0 = Template(word2['fs0'])
lfs0 = noskiem(lfs0.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
lfs1 = Template(word2['fs1'])
lfs1 = noskiem(lfs1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
lfs2 = Template(word2['fs2'])
lfs2 = noskiem(lfs2.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
lfs3 = Template(word2['fs3'])
lfs3 = noskiem(lfs3.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
lfs4 = Template(word2['fs4'])
lfs4 = noskiem(lfs4.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
lfs5 = Template(word2['fs5'])
lfs5 = noskiem(lfs5.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
ltipas = Template(word2['tipas'])
ltipas = noskiem(ltipas.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
lreiksmes = Template(word2['reiksmes'])
lreiksmes = noskiem(lreiksmes.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'],
tipas = word['tipas'], form = word['form'],
reiksmesimas = reiksmesimas, reiksmesejas = reiksmesejas, reiksmeseja = reiksmeseja,
reiksmesojas = reiksmesojas, reiksmesoja = reiksmesoja))
lpozymis = Template(word2['pozymis'])
lpozymis = noskiem(lpozymis.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'],
pozymis = word['pozymis'], x = u'${x}'))
sins = word2['sinonimai']
lsinonimai = []
for sin in sins:
lsinonimai.append(sin)
isrs = word2['israiskos']
lisraiskos = []
for isr in isrs:
lisraiskos.append(isr)
newword = {'dalis': word2['dalis'],
'form': word2['form'],
'g1': lg1, 's1': ls1, 's2': ls2, 's3': ls3, 's4': ls4, 's5': ls5,
'fs0': lfs0, 'fs1': lfs1, 'fs2': lfs2,
'pozymis': lpozymis,
'israiskos': lisraiskos,
'sinonimai': lsinonimai,
'reiksmes': lreiksmes,
'fs3': lfs3, 'fs4': eti, 'fs5': lfs5, 'tipas': ltipas,
}
if u'List' in word2:
newword['List'] = word2['List']
if '[[neig.]] [[neig.]] ' not in ltipas:
subwordsList.append(newword)
#wikipedia.output(u'subwordsList <<< %s >>>.' % subwordsList)
#return
if u'List' in word:
for word2 in word['List']:
#wikipedia.output(u'word2 <<< %s >>>.' % word2)
#wikipedia.output(u'word <<< %s >>>.' % word)
reiksmesimas = ''
reiksmesejas = ''
reiksmeseja = ''
reiksmesojas = ''
reiksmesoja = ''
if u'reiksmesimas' in word2:
reiksmesimas = word2[u'reiksmesimas']
if u'reiksmesejas' in word2:
reiksmesejas = word2[u'reiksmesejas']
if u'reiksmeseja' in word2:
reiksmeseja = word2[u'reiksmeseja']
if u'reiksmesojas' in word2:
reiksmesojas = word2[u'reiksmesojas']
if u'reiksmesoja' in word2:
reiksmesoja = word2[u'reiksmesoja']
lg1 = Template(word2['g1'])
lg1 = lg1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
ls1 = Template(word2['s1'])
ls1 = ls1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
ls2 = Template(word2['s2'])
ls2 = ls2.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
ls3 = Template(word2['s3'])
ls3 = ls3.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
ls4 = Template(word2['s4'])
ls4 = ls4.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
ls5 = Template(word2['s5'])
ls5 = ls5.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
lfs0 = Template(word2['fs0'])
lfs0 = noskiem(lfs0.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
lfs1 = Template(word2['fs1'])
lfs1 = noskiem(lfs1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
lfs2 = Template(word2['fs2'])
lfs2 = noskiem(lfs2.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
lfs3 = Template(word2['fs3'])
lfs3 = noskiem(lfs3.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
lfs4 = Template(word2['fs4'])
lfs4 = noskiem(lfs4.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
lfs5 = Template(word2['fs5'])
lfs5 = noskiem(lfs5.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
ltipas = Template(word2['tipas'])
ltipas = noskiem(ltipas.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form']))
lreiksmes = Template(word2['reiksmes'])
lreiksmes = noskiem(lreiksmes.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'],
tipas = word['tipas'], form = word['form'],
reiksmesimas = reiksmesimas, reiksmesejas = reiksmesejas, reiksmeseja = reiksmeseja,
reiksmesojas = reiksmesojas, reiksmesoja = reiksmesoja))
lpozymis = Template(word2['pozymis'])
lpozymis = noskiem(lpozymis.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'], fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'],
pozymis = word['pozymis'], x = u'${x}'))
sins = word2['sinonimai']
lsinonimai = []
for sin in sins:
lsinonimai.append(sin)
isrs = word2['israiskos']
lisraiskos = []
for isr in isrs:
lisraiskos.append(isr)
newword = {'dalis': word2['dalis'],
'form': word2['form'],
'g1': lg1, 's1': ls1, 's2': ls2, 's3': ls3, 's4': ls4, 's5': ls5,
'fs0': lfs0, 'fs1': lfs1, 'fs2': lfs2,
'pozymis': lpozymis,
'israiskos': lisraiskos,
'sinonimai': lsinonimai,
'reiksmes': lreiksmes,
'fs3': lfs3, 'fs4': eti, 'fs5': lfs5, 'tipas': ltipas,
}
if u'List' in word2:
newword['List'] = word2['List']
if '[[neig.]] [[neig.]] ' not in ltipas:
subwordsList.append(newword)
wordsll = []
for wrd in subwordsList:
wrdform = fixeswords.wordforms[wrd['dalis']][wrd['form']]
wrdref = Template(wrdform['ref'])
wrdref = wrdref.substitute(s1 = noskiem(wrd['s1']), s2 = noskiem(wrd['s2']), s3 = noskiem(wrd['s3']),
s4 = noskiem(wrd['s4']), s5 = noskiem(wrd['s5']),
fs0 = wrd['fs0'], fs1 = wrd['fs1'], fs2 = wrd['fs2'], fs3 = wrd['fs3'],
fs4 = wrd['fs4'], fs5 = wrd['fs5'], tipas = wrd['tipas'], form = wrd['form'])
if wrdref != u'':
wordsllt = u'* '+wrdref
if wrd[u'tipas'] != u'':
wordsllt += u' ('+wrd[u'tipas']+u')'
pozymis = Template(wrd['pozymis'])
pozymis = pozymis.substitute(x = u'{{x}}')
wordsllt += pozymis
wordsll.append(wordsllt)
wordsll.sort()
for wrd in wordsll:
wikipedia.output(u' <<<\03{lightpurple}%s\03{default}>>> ' % wrd)
Words = []
for wordtemp, form in wordform['forms']:
t1 = Template(wordtemp)
t1 = t1.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'],
fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
skiemt1 = skiem(t1)
noskiemt1 = noskiem(t1)
ps1 = noskiem(word['s1'])
pg1 = noskiem(word['g1'])
tf = Template(form)
tf = tf.substitute(s1 = word['s1'], s2 = word['s2'], s3 = word['s3'], s4 = word['s4'], s5 = word['s5'],
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'],
fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'])
t2 = Template(wordform[tf]['text'])
sinonimai = u''
if len(word['sinonimai']) > 0:
sinonimai = u'==== Sinonimai ====\n\n{{sin-top|kalba=lt|vardas='+noskiemt1+u'|tipas=bendrai}}\n'
for wordii,sin in enumerate(word['sinonimai']):
sinonimai += u'* {{t+|lt|'+sin+u'}}\n'
if wordii < len(word['sinonimai'])/2.0 <= wordii+1:
sinonimai += u'{{sin-mid}}\n'
sinonimai += u'{{sin-bottom}}\n\n'
antonimai = u''
antonimas = u''
if noskiemt1.find('ne') == 0 and ('[[ne-]]' in word['fs4']
or '[[ne-]]' in word['fs5']):
antonimas = noskiemt1[2:]
antonimai = u'==== Antonimai ====\n\n{{ant-top|kalba=lt|vardas='+noskiemt1+u'|tipas=bendrai}}\n'
antonimai += u'* {{t+|lt|'+noskiemt1[2:]+u'}}\n'
antonimai += u'{{ant-mid}}\n{{ant-bottom}}\n\n'
else:
if word['fs5'].find(u'{{sangrž.}}') == 0:
antonimas = 'nesi'+noskiemt1
if antonimas.endswith('si'):
antonimas = antonimas[:-2]
if antonimas.endswith('s'):
antonimas = antonimas[:-1]
antonimai = u'==== Antonimai ====\n\n{{ant-top|kalba=lt|vardas='+noskiemt1+u'|tipas=bendrai}}\n'
antonimai += u'* {{t+|lt|'+antonimas+u'}}\n'
antonimai += u'{{ant-mid}}\n{{ant-bottom}}\n\n'
else:
antonimas = 'ne'+noskiemt1
antonimai = u'==== Antonimai ====\n\n{{ant-top|kalba=lt|vardas='+noskiemt1+u'|tipas=bendrai}}\n'
antonimai += u'* {{t+|lt|ne'+noskiemt1+u'}}\n'
antonimai += u'{{ant-mid}}\n{{ant-bottom}}\n\n'
isvestiniai = u''
if len(wordsll) > 0:
isvestiniai = u'==== Išvestiniai žodžiai ====\n\n{{rel-top|kalba=lt|vardas='+noskiemt1+u'|tipas=Išvestiniai žodžiai}}\n'
for wordii,wrd in enumerate(wordsll):
isvestiniai += wrd+u'\n'
if wordii < len(wordsll)/2.0 <= wordii+1:
isvestiniai = isvestiniai + u'{{rel-mid}}\n'
isvestiniai += u'{{rel-bottom}}\n\n'
israiskos = u''
if len(word['israiskos']) > 0:
israiskos = u'==== Išraiškos arba posakiai ====\n\n{{rel-top|kalba=lt|vardas='+noskiemt1+u'|tipas=Išraiškos arba posakiai}}\n'
for wordii,isr in enumerate(word['israiskos']):
israiskos += u'* {{t+|lt|'+isr+u'}}\n'
if wordii < len(word['israiskos'])/2.0 <= wordii+1:
israiskos += u'{{rel-mid}}\n'
israiskos += u'{{rel-bottom}}\n\n'
vertimai = u''
sabl = re.compile(u'^\#+\s+(?P<tipas>.*?)(?:\:|\.)$', re.M)
inmatch = 0
match = sabl.search(word['reiksmes'], inmatch)
while match != None:
inmatch = match.end()
vtipas = match.group('tipas')
if vtipas != None:
sabl1 = re.compile(u'\[\[(?P<n1>[^\[\]\|]*?)\|(?P<n2>[^\[\]\|]*?)\]\]', re.M)
match1 = sabl1.search(vtipas, 0)
while match1 != None:
n1 = match1.group('n1')
n2 = match1.group('n2')
vtipas = vtipas[:match1.start()] + n2 + vtipas[match1.end():]
match1 = sabl1.search(vtipas, 0)
sabl1 = re.compile(u'\[\[(?P<n1>[^\[\]\|]*?)\]\]', re.M)
match1 = sabl1.search(vtipas, 0)
while match1 != None:
n1 = match1.group('n1')
vtipas = vtipas[:match1.start()] + n1 + vtipas[match1.end():]
match1 = sabl1.search(vtipas, 0)
vertimai += u'{{trans-top|kalba=lt|vardas=' + noskiemt1 + u'|tipas='+ vtipas + u'}}\n' + \
u'{{trans-mid}}\n' + \
u'{{trans-bottom}}\n'
match = sabl.search(word['reiksmes'], inmatch)
if noskiemt1 not in zodziai:
zodziai[noskiemt1] = {}
if u'pagrf' not in zodziai[noskiemt1]:
zodziai[noskiemt1][u'pagrf'] = not wordform[tf]['auto']
elif not zodziai[noskiemt1][u'pagrf']:
zodziai[noskiemt1][u'pagrf'] = not wordform[tf]['auto']
pozymis = Template(word['pozymis'])
if zodziai[noskiemt1][u'pagrf']:
pozymis = pozymis.substitute(x = u'{{x|lt|p}}')
else:
pozymis = pozymis.substitute(x = u'{{x|lt|}}')
reddir = Template(u'#REDIRECT [[${reddir}]]')
reddir = reddir.substitute(reddir = ps1+pg1)
sabl2 = re.compile(u'\{\{x\|lt\|p?\}\}', re.M)
match2 = sabl2.search(pozymis, 0)
nenaud = False
if match2 != None:
nenaud = True
t2 = t2.substitute(s1 = noskiem(word['s1']), s2 = noskiem(word['s2']),
s3 = noskiem(word['s3']), s4 = noskiem(word['s4']), s5 = noskiem(word['s5']),
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'],
fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'],
reiksmes = word['reiksmes'], pozymis = pozymis,
sinonimai = sinonimai, antonimai = antonimai, isvestiniai = isvestiniai, israiskos = israiskos,
vertimai = vertimai, word = noskiemt1, words = skiemt1)
sabl3 = re.compile(u'\[\[\]\]\, \[\[\]\]\, \[\[\]\]', re.M)
match3 = sabl3.search(t2, 0)
t3 = Template(wordform[tf]['title'])
if match3 != None:
t2 = t2[:match3.start()] + t2[match3.end():]
t3 = t3.substitute(s1 = noskiem(word['s1']), s2 = noskiem(word['s2']),
s3 = noskiem(word['s3']), s4 = noskiem(word['s4']), s5 = noskiem(word['s5']),
fs0 = word['fs0'], fs1 = word['fs1'], fs2 = word['fs2'], fs3 = word['fs3'],
fs4 = word['fs4'], fs5 = word['fs5'], tipas = word['tipas'], form = word['form'],
word = noskiemt1, words = skiemt1)
Words.append((noskiemt1, {'title':t3, 'contents':t2, 'auto':not zodziai[noskiemt1][u'pagrf'],
'reddir':reddir, 'nenaud':nenaud}, skiemt1, antonimas))
preloadingGen = PageCreateReader(Words)
bot = PageCreateRobot(preloadingGen, force, append, wordform['msg'], acceptallnew=acceptallnew,
acceptallnewnotauto=acceptallnewnotauto, prot=prot, test=test, fromword = fromword)
wikipedia.output(u'Bus formuojami: ')
for wordl, descr, wordlsk, anton in Words:
wikipedia.output(u' <<<\03{lightpurple}%s\03{default}>>> <<<\03{lightpurple}%s\03{default}>>> <<<\03{lightpurple}%s\03{default}>>> ' % (wordl, wordlsk, anton))
if acceptallgroup:
fromword = bot.run()
else:
choice = wikipedia.inputChoice(
u'Ar formuoti?',
['Create', 'CreateAll', 'No'],
['c', 'ca', 'N'], 'N')
if choice == 'c':
fromword = bot.run()
elif choice == 'ca':
acceptallgroup = True
fromword = bot.run()
(acceptallgroup, fromword, zodziai) = cikleWords (subwordsList, force, bot.append, acceptallnew=acceptallnew,
acceptallnewnotauto=acceptallnewnotauto, acceptallgroups=acceptallgroup, prot=prot,
test=test, fromword = fromword, zodziai=zodziai)
return (acceptallgroup, fromword, zodziai)
def main(*args):
add_cat = None
gen = None
# summary message
summary_commandline = None
# Array which will collect commandline parameters.
# First element is original text, second element is replacement text.
commandline_replacements = []
# A list of 2-tuples of original text and replacement text.
replacements = []
# Don't edit pages which contain certain texts.
exceptions = {
'title': [],
'text-contains': [],
'require-text': [],
'inside': [],
'inside-tags': [],
'require-title': [], # using a seperate requirements dict needs some
} # major refactoring of code.
# Should the elements of 'replacements' and 'exceptions' be interpreted
# as regular expressions?
regex = False
# Predefined fixes from dictionary 'fixes' (see above).
fix = None
force = False
append = 'b'
summary = None
minor = False
autosummary = False
debug = False
# the dump's path, either absolute or relative, which will be used
# if -xml flag is present
xmlFilename = None
useSql = False
PageTitles = []
# will become True when the user presses a ('yes to all') or uses the
# -always flag.
acceptall = False
# Will become True if the user inputs the commandline parameter -nocase
caseInsensitive = False
# Will become True if the user inputs the commandline parameter -dotall
dotall = False
# Will become True if the user inputs the commandline parameter -multiline
multiline = False
# Do all hits when they overlap
allowoverlap = False
# Do not recurse replacement
recursive = False
# This factory is responsible for processing command line arguments
# that are also used by other scripts and that determine on which pages
# to work on.
genFactory = pagegenerators.GeneratorFactory()
# Load default summary message.
# BUG WARNING: This is probably incompatible with the -lang parameter.
wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg))
# Between a regex and another (using -fix) sleep some time (not to waste
# too much CPU
sleep = None
#Set the regular expression flags
flags = re.UNICODE
if caseInsensitive:
flags = flags | re.IGNORECASE
if dotall:
flags = flags | re.DOTALL
if multiline:
flags = flags | re.MULTILINE
# Read commandline parameters.
for arg in wikipedia.handleArgs(*args):
if arg == '-regex':
regex = True
elif arg.startswith('-xmlstart'):
if len(arg) == 9:
xmlStart = wikipedia.input(
u'Please enter the dumped article to start with:')
else:
xmlStart = arg[10:]
elif arg.startswith('-xml'):
if len(arg) == 4:
xmlFilename = wikipedia.input(
u'Please enter the XML dump\'s filename:')
else:
xmlFilename = arg[5:]
elif arg =='-sql':
useSql = True
elif arg.startswith('-page'):
if len(arg) == 5:
PageTitles.append(wikipedia.input(
u'Which page do you want to change?'))
else:
PageTitles.append(arg[6:])
elif arg.startswith('-excepttitle:'):
exceptions['title'].append(arg[13:])
elif arg.startswith('-requiretitle:'):
exceptions['require-title'].append(arg[14:])
elif arg.startswith('-excepttext:'):
exceptions['text-contains'].append(arg[12:])
elif arg.startswith('-exceptinside:'):
exceptions['inside'].append(arg[14:])
elif arg.startswith('-exceptinsidetag:'):
exceptions['inside-tags'].append(arg[17:])
elif arg == "-appendtop":
append = "t"
elif arg == "-appendbottom":
append = "b"
elif arg == "-force":
force=True
elif arg == '-minor':
minor = True
elif arg.startswith("-summary:"):
summary = arg[9:]
elif arg == '-autosummary':
autosummary = True
elif arg.startswith('-fix:'):
fix = arg[5:]
elif arg.startswith('-sleep:'):
sleep = float(arg[7:])
elif arg == '-always':
acceptall = True
elif arg == '-recursive':
recursive = True
elif arg == '-nocase':
caseInsensitive = True
elif arg == '-dotall':
dotall = True
elif arg == '-multiline':
multiline = True
elif arg.startswith('-addcat:'):
add_cat = arg[len('addcat:'):]
elif arg.startswith('-summary:'):
wikipedia.setAction(arg[len('-summary:'):])
summary_commandline = True
elif arg.startswith('-allowoverlap'):
allowoverlap = True
else:
if not genFactory.handleArg(arg):
commandline_replacements.append(arg)
if len(commandline_replacements) % 2:
raise wikipedia.Error, 'require even number of replacements.'
elif len(commandline_replacements) == 2 and fix == None:
replacements.append((commandline_replacements[0],
commandline_replacements[1]))
if summary_commandline == None:
wikipedia.setAction(wikipedia.translate(wikipedia.getSite(), msg )
% (' (-' + commandline_replacements[0] + ' +'
+ commandline_replacements[1] + ')'))
elif len(commandline_replacements) > 1:
if fix == None:
for i in xrange (0, len(commandline_replacements), 2):
replacements.append((commandline_replacements[i],
commandline_replacements[i + 1]))
if summary_commandline == None:
pairs = [( commandline_replacements[i],
commandline_replacements[i + 1] )
for i in range(0, len(commandline_replacements), 2)]
replacementsDescription = '(%s)' % ', '.join(
[('-' + pair[0] + ' +' + pair[1]) for pair in pairs])
wikipedia.setAction(
wikipedia.translate(wikipedia.getSite(), msg )
% replacementsDescription)
else:
raise wikipedia.Error(
'Specifying -fix with replacements is undefined')
elif fix == None:
old = wikipedia.input(u'Please enter the text that should be replaced:')
new = wikipedia.input(u'Please enter the new text:')
change = '(-' + old + ' +' + new
replacements.append((old, new))
while True:
old = wikipedia.input(
u'Please enter another text that should be replaced, or press Enter to start:')
if old == '':
change = change + ')'
break
new = wikipedia.input(u'Please enter the new text:')
change = change + ' & -' + old + ' +' + new
replacements.append((old, new))
if not summary_commandline == True:
default_summary_message = wikipedia.translate(wikipedia.getSite(), msg) % change
wikipedia.output(u'The summary message will default to: %s'
% default_summary_message)
summary_message = wikipedia.input(
u'Press Enter to use this default message, or enter a description of the\nchanges your bot will make:')
if summary_message == '':
summary_message = default_summary_message
wikipedia.setAction(summary_message)
elif fix not in ('word', 'nesusije') :
# Perform one of the predefined actions.
try:
fix = fixeswords.fixes[fix]
except KeyError:
wikipedia.output(u'Available predefined fixes are: %s'
% fixeswords.fixes.keys())
return
if 'regex' in fix:
regex = fix['regex']
if 'msg' in fix:
wikipedia.setAction(
wikipedia.translate(wikipedia.getSite(), fix['msg']))
if 'exceptions' in fix:
exceptions = fix['exceptions']
if 'nocase' in fix:
caseInsensitive = fix['nocase']
replacements = fix['replacements']
#wikipedia.output(u'fix get = %s' % fix)
# Pre-compile all regular expressions here to save time later
for i in range(len(replacements)):
old, new = replacements[i]
if not regex:
old = re.escape(old)
oldR = re.compile(old, flags)
replacements[i] = oldR, new
for exceptionCategory in ['title', 'require-title', 'text-contains', 'inside', 'require-text']:
if exceptionCategory in exceptions:
patterns = exceptions[exceptionCategory]
if not regex:
patterns = [re.escape(pattern) for pattern in patterns]
patterns = [re.compile(pattern, flags) for pattern in patterns]
exceptions[exceptionCategory] = patterns
#wikipedia.output(u'fix rep = %s' % replacements)
#wikipedia.output(u'fix page = %s' % PageTitles)
#return
if xmlFilename:
try:
xmlStart
except NameError:
xmlStart = None
gen = XmlDumpReplacePageGenerator(xmlFilename, xmlStart,
replacements, exceptions)
elif useSql:
whereClause = 'WHERE (%s)' % ' OR '.join(
["old_text RLIKE '%s'" % prepareRegexForMySQL(old.pattern)
for (old, new) in replacements])
if exceptions:
exceptClause = 'AND NOT (%s)' % ' OR '.join(
["old_text RLIKE '%s'" % prepareRegexForMySQL(exc.pattern)
for exc in exceptions])
else:
exceptClause = ''
query = u"""
SELECT page_namespace, page_title
FROM page
JOIN text ON (page_id = old_id)
%s
%s
LIMIT 200""" % (whereClause, exceptClause)
gen = pagegenerators.MySQLPageGenerator(query)
elif PageTitles:
pages = [wikipedia.Page(wikipedia.getSite(), PageTitle)
for PageTitle in PageTitles]
gen = iter(pages)
if fix not in ('word', 'nesusije'):
gen = genFactory.getCombinedGenerator(gen)
if fix not in ('word', 'nesusije'):
if not gen:
# syntax error, show help text from the top of this file
wikipedia.showHelp('replace')
return
if xmlFilename:
# XML parsing can be quite slow, so use smaller batches and
# longer lookahead.
preloadingGen = pagegenerators.PreloadingGenerator(gen,
pageNumber=20, lookahead=100)
elif fix not in ('word', 'nesusije'):
preloadingGen = pagegenerators.PreloadingGenerator(gen, pageNumber=60)
_fnf = os.path.join('', 'protokolas.txt')
prot = codecs.open(_fnf, "w+", "utf-8")
if fix == 'word':
fromword = None
zodziai = {}
(acceptallgroup, fromword, zodziai) = cikleWords(fixeswords.wordsList, force, append, acceptallnew=True,
acceptallnewnotauto=True, acceptallgroups=False, prot=prot,
test=True, fromword=fromword, zodziai=zodziai)
_fnf = os.path.join('', 'protokolas2.txt')
prot = codecs.open(_fnf, "w+", "utf-8")
wikipedia.output(u'<<<\03{lightpurple} Prasideda straipsnių kūrimo etapas. \03{default}>>>')
(acceptallgroup, fromword, zodziai) = cikleWords(fixeswords.wordsList, force, append, acceptallnew=True,
acceptallnewnotauto=True, acceptallgroups=False, prot=prot,
test=False, fromword=fromword, zodziai=zodziai)
elif fix == 'nesusije':
wikipedia.output(u'fix = %s' % fix)
preloadingGen = PageNesusijeReader(nesusije.wordsList)
bot = PageNesusijeRobot(preloadingGen)
bot.run()
else:
bot = ReplaceRobot(preloadingGen, replacements, exceptions, acceptall, allowoverlap, recursive, add_cat, sleep)
#wikipedia.output(u'fix gen = %s' % preloadingGen)
#return
bot.run()
if __name__ == "__main__":
try:
main()
finally:
wikipedia.stopme()