#!/usr/bin/python
"""This script generates a family file from a given URL.
Usage::
generate_family_file.py [<url>] [<name>] [<dointerwiki>] [<verify>]
Parameters are optional. They must be given consecutively but may be
omitted if there is no successor parameter. The parameters are::
<url>: an url from where the family settings are loaded
<name>: the family name without "_family.py" tail.
<dointerwiki>: predefined answer (y|n) to add multiple language
<verify>: disable certificate validaton `(y|n)
Example::
generate_family_file.py https://www.mywiki.bogus/wiki/Main_Page mywiki
This will create the file mywiki_family.py in pywikibot/families folder
"""
#
# (C) Pywikibot team, 2010-2021
#
# Distributed under the terms of the MIT license
#
import codecs
import os
import string
import sys
from os import environ, getenv
from typing import Optional
from urllib.parse import urlparse
# see pywikibot.family.py
# Legal characters for Family name and Family langs keys
NAME_CHARACTERS = string.ascii_letters + string.digits
# nds_nl code alias requires "_"n
# dash must be the last char to be reused as regex in update_linktrails
CODE_CHARACTERS = string.ascii_lowercase + string.digits + '_-'
[docs]class FamilyFileGenerator:
"""Family file creator object."""
def __init__(self,
url: Optional[str] = None,
name: Optional[str] = None,
dointerwiki: Optional[str] = None,
verify: Optional[str] = None):
"""
Parameters are optional. If not given the script asks for the values.
:param url: an url from where the family settings are loaded
:param name: the family name without "_family.py" tail.
:param dointerwiki: Predefined answer to add multiple language
codes. Pass `Y` or `y` for yes `N` or `n` for no and
`E` or `e` if you want to edit the collection of sites.
:param verify: If a certificate verification failes, you may
pass `Y` or `y` to disable certificate validaton `N` or `n`
to keep it enabled.
"""
# from pywikibot.site_detect import MWSite
# when required but disable user-config checks
# so the family can be created first,
# and then used when generating the user-config
self.Wiki = _import_with_no_user_config(
'pywikibot.site_detect').site_detect.MWSite
self.base_url = url
self.name = name
self.dointerwiki = dointerwiki
self.verify = verify
self.wikis = {} # {'https://wiki/$1': Wiki('https://wiki/$1'), ...}
self.langs = [] # [Wiki('https://wiki/$1'), ...]
[docs] def get_params(self):
"""Ask for parameters if necessary."""
if self.base_url is None:
self.base_url = input('Please insert URL to wiki: ')
if not self.base_url:
return False
if self.name is None:
self.name = input('Please insert a short name (eg: freeciv): ')
if not self.name:
return False
if any(x not in NAME_CHARACTERS for x in self.name):
print('ERROR: Name of family "{}" must be ASCII letters and '
'digits [a-zA-Z0-9]'.format(self.name))
return False
return True
[docs] def get_wiki(self):
"""Get wiki from base_url."""
import pywikibot
from pywikibot.exceptions import FatalServerError
print('Generating family file from ' + self.base_url)
for verify in (True, False):
try:
w = self.Wiki(self.base_url, verify=verify)
except FatalServerError:
print('ERROR: '
+ pywikibot.comms.http.SSL_CERT_VERIFY_FAILED_MSG)
pywikibot.exception()
if not pywikibot.bot.input_yn(
'Retry with disabled ssl certificate validation',
default=self.verify, automatic_quit=False,
force=self.verify is not None):
break
else:
return w, verify
return None, None
[docs] def run(self):
"""Main method, generate family file."""
if not self.get_params():
return
w, verify = self.get_wiki()
if w is None:
return
self.wikis[w.lang] = w
print('\n=================================='
'\nAPI url: {w.api}'
'\nMediaWiki version: {w.version}'
'\n==================================\n'.format(w=w))
self.getlangs(w)
self.getapis()
self.writefile(verify)
[docs] def getlangs(self, w):
"""Determine language of a site."""
print('Determining other languages...', end='')
try:
self.langs = w.langs
print(' '.join(sorted(wiki['prefix'] for wiki in self.langs)))
except Exception as e:
self.langs = []
print(e, '; continuing...')
if len([lang for lang in self.langs if lang['url'] == w.iwpath]) == 0:
if w.private_wiki:
w.lang = self.name
self.langs.append({'language': w.lang,
'local': '',
'prefix': w.lang,
'url': w.iwpath})
code_len = len(self.langs)
if code_len > 1:
if self.dointerwiki is None:
makeiw = input(
'\nThere are {} languages available.'
'\nDo you want to generate interwiki links? '
'This might take a long time. ([y]es/[N]o/[e]dit)'
.format(code_len)).lower()
else:
makeiw = self.dointerwiki
if makeiw == 'n':
self.langs = [wiki for wiki in self.langs
if wiki['url'] == w.iwpath]
elif makeiw == 'e':
for wiki in self.langs:
print(wiki['prefix'], wiki['url'])
do_langs = input('Which languages do you want: ')
self.langs = [wiki for wiki in self.langs
if wiki['prefix'] in do_langs
or wiki['url'] == w.iwpath]
for wiki in self.langs:
assert all(x in CODE_CHARACTERS for x in wiki['prefix']), \
'Family {} code {} must be ASCII lowercase ' \
'letters and digits [a-z0-9] or underscore/dash [_-]' \
.format(self.name, wiki['prefix'])
[docs] def getapis(self):
"""Load other language pages."""
print('Loading wikis... ')
for lang in self.langs:
key = lang['prefix']
print(' * {}... '.format(key), end='')
if key not in self.wikis:
try:
self.wikis[key] = self.Wiki(lang['url'])
print('downloaded')
except Exception as e:
print(e)
else:
print('in cache')
[docs] def writefile(self, verify):
"""Write the family file."""
fn = os.path.join(os.path.dirname(os.path.realpath(__file__)),
'pywikibot', 'families',
'{}_family.py'.format(self.name))
print('Writing %s... ' % fn)
try:
open(fn)
if input('{} already exists. Overwrite? (y/n)'
.format(fn)).lower() == 'n':
print('Terminating.')
sys.exit(1)
except IOError: # file not found
pass
code_hostname_pairs = '\n '.join(
"'{code}': '{hostname}',".format(
code=k, hostname=urlparse(w.server).netloc
) for k, w in self.wikis.items())
code_path_pairs = '\n '.join(
"'{code}': '{path}',".format(code=k, path=w.scriptpath)
for k, w in self.wikis.items())
code_protocol_pairs = '\n '.join(
"'{code}': '{protocol}',".format(
code=k, protocol=urlparse(w.server).scheme
) for k, w in self.wikis.items())
content = family_template % {
'url': self.base_url, 'name': self.name,
'code_hostname_pairs': code_hostname_pairs,
'code_path_pairs': code_path_pairs,
'code_protocol_pairs': code_protocol_pairs}
if not verify:
# assuming this is the same for all codes
content += """
def verify_SSL_certificate(self, code: str) -> bool:
return False
"""
with codecs.open(fn, 'w', 'utf-8') as fh:
fh.write(content)
family_template = """\
\"\"\"
This family file was auto-generated by generate_family_file.py script.
Configuration parameters:
url = %(url)s
name = %(name)s
Please do not commit this to the Git repository!
\"\"\"
from pywikibot import family
class Family(family.Family): # noqa: D101
name = '%(name)s'
langs = {
%(code_hostname_pairs)s
}
def scriptpath(self, code):
return {
%(code_path_pairs)s
}[code]
def protocol(self, code):
return {
%(code_protocol_pairs)s
}[code]
"""
def _import_with_no_user_config(*import_args):
"""Return __import__(*import_args) without loading user-config.py."""
orig_no_user_config = getenv('PYWIKIBOT_NO_USER_CONFIG') or getenv(
'PYWIKIBOT2_NO_USER_CONFIG')
environ['PYWIKIBOT_NO_USER_CONFIG'] = '2'
result = __import__(*import_args)
# Reset this flag
if not orig_no_user_config:
del environ['PYWIKIBOT_NO_USER_CONFIG']
else:
environ['PYWIKIBOT_NO_USER_CONFIG'] = orig_no_user_config
return result
[docs]def main():
"""Process command line arguments and generate a family file."""
if len(sys.argv) > 1 and sys.argv[1] == '-help':
print(__doc__)
else:
FamilyFileGenerator(*sys.argv[1:]).run()
if __name__ == '__main__':
main()