Source code for pywikibot.site._namespace

"""Objects representing Namespaces of MediaWiki site."""
#
# (C) Pywikibot team, 2008-2021
#
# Distributed under the terms of the MIT license.
#
from collections.abc import Iterable, Mapping
from enum import IntEnum
from typing import Optional, Union

from pywikibot.backports import List
from pywikibot.tools import ComparableMixin, SelfCallMixin


class BuiltinNamespace(IntEnum):

    """Builtin namespace enum."""

    MEDIA = -2
    SPECIAL = -1
    MAIN = 0
    TALK = 1
    USER = 2
    USER_TALK = 3
    PROJECT = 4
    PROJECT_TALK = 5
    FILE = 6
    FILE_TALK = 7
    MEDIAWIKI = 8
    MEDIAWIKI_TALK = 9
    TEMPLATE = 10
    TEMPLATE_TALK = 11
    HELP = 12
    HELP_TALK = 13
    CATEGORY = 14
    CATEGORY_TALK = 15


[docs]class Namespace(Iterable, ComparableMixin): """ Namespace site data object. This is backwards compatible with the structure of entries in site._namespaces which were a list of:: [customised namespace, canonical namespace name?, namespace alias*] If the canonical_name is not provided for a namespace between -2 and 15, the MediaWiki built-in names are used. Image and File are aliases of each other by default. If only one of canonical_name and custom_name are available, both properties will have the same value. """ # These are the MediaWiki built-in names for MW 1.14+. # Namespace prefixes are always case-insensitive, but the # canonical forms are capitalized. canonical_namespaces = { -2: 'Media', -1: 'Special', 0: '', 1: 'Talk', 2: 'User', 3: 'User talk', 4: 'Project', 5: 'Project talk', 6: 'File', 7: 'File talk', 8: 'MediaWiki', 9: 'MediaWiki talk', 10: 'Template', 11: 'Template talk', 12: 'Help', 13: 'Help talk', 14: 'Category', 15: 'Category talk', } def __init__(self, id, canonical_name: Optional[str] = None, custom_name: Optional[str] = None, aliases: Optional[List[str]] = None, **kwargs): """Initializer. :param canonical_name: Canonical name :param custom_name: Name defined in server LocalSettings.php :param aliases: Aliases """ self.id = id canonical_name = canonical_name or self.canonical_namespaces.get(id) assert custom_name is not None or canonical_name is not None, \ 'Namespace needs to have at least one name' self.custom_name = custom_name \ if custom_name is not None else canonical_name self.canonical_name = canonical_name \ if canonical_name is not None else custom_name if aliases: self.aliases = aliases elif id in (6, 7): alias = 'Image' if id == 7: alias += ' talk' self.aliases = [alias] else: self.aliases = [] for key, value in kwargs.items(): setattr(self, key, value) def _distinct(self): if self.custom_name == self.canonical_name: return [self.canonical_name] + self.aliases return [self.custom_name, self.canonical_name] + self.aliases def _contains_lowercase_name(self, name): """Determine a lowercase normalised name is a name of this namespace. :rtype: bool """ return name in (x.lower() for x in self._distinct()) def __contains__(self, item: str) -> bool: """Determine if item is a name of this namespace. The comparison is case insensitive, and item may have a single colon on one or both sides of the name. :param item: name to check """ if item == '' and self.id == 0: return True name = Namespace.normalize_name(item) if not name: return False return self._contains_lowercase_name(name.lower()) def __len__(self): """Obtain length of the iterable.""" if self.custom_name == self.canonical_name: return len(self.aliases) + 1 return len(self.aliases) + 2 def __iter__(self): """Return an iterator.""" return iter(self._distinct()) def __getitem__(self, index): """Obtain an item from the iterable.""" if self.custom_name != self.canonical_name: if index == 0: return self.custom_name index -= 1 return self.canonical_name if index == 0 else self.aliases[index - 1] @staticmethod def _colons(id, name): """Return the name with required colons, depending on the ID.""" if id == 0: return ':' if id in (6, 14): return ':' + name + ':' return name + ':' def __str__(self): """Return the canonical string representation.""" return self.canonical_prefix()
[docs] def canonical_prefix(self): """Return the canonical name with required colons.""" return Namespace._colons(self.id, self.canonical_name)
[docs] def custom_prefix(self): """Return the custom name with required colons.""" return Namespace._colons(self.id, self.custom_name)
def __int__(self): """Return the namespace id.""" return self.id def __index__(self): """Return the namespace id.""" return self.id def __hash__(self): """Return the namespace id.""" return self.id def __eq__(self, other): """Compare whether two namespace objects are equal.""" if isinstance(other, int): return self.id == other if isinstance(other, Namespace): return self.id == other.id if isinstance(other, str): return other in self return False def __ne__(self, other): """Compare whether two namespace objects are not equal.""" return not self.__eq__(other) def __mod__(self, other): """Apply modulo on the namespace id.""" return self.id.__mod__(other) def __sub__(self, other): """Apply subtraction on the namespace id.""" return self.id - other def __add__(self, other): """Apply addition on the namespace id.""" return self.id + other def _cmpkey(self): """Return the ID as a comparison key.""" return self.id def __repr__(self): """Return a reconstructable representation.""" standard_attr = ['id', 'custom_name', 'canonical_name', 'aliases'] extra = [(key, self.__dict__[key]) for key in sorted(self.__dict__) if key not in standard_attr] if extra: kwargs = ', ' + ', '.join( key + '={!r}'.format(value) for key, value in extra) else: kwargs = '' return '{}(id={}, custom_name={!r}, canonical_name={!r}, ' \ 'aliases={!r}{})' \ .format(self.__class__.__name__, self.id, self.custom_name, self.canonical_name, self.aliases, kwargs)
[docs] @staticmethod def default_case(id, default_case=None): """Return the default fixed case value for the namespace ID.""" # https://www.mediawiki.org/wiki/Manual:$wgCapitalLinkOverrides#Warning if id > 0 and id % 2 == 1: # the talk ns has the non-talk ns case id -= 1 if id in (-1, 2, 8): return 'first-letter' return default_case
[docs] @classmethod def builtin_namespaces(cls, case='first-letter'): """Return a dict of the builtin namespaces.""" return {i: cls(i, case=cls.default_case(i, case)) for i in range(-2, 16)}
[docs] @staticmethod def normalize_name(name): """ Remove an optional colon before and after name. TODO: reject illegal characters. """ if name == '': return '' name = name.replace('_', ' ') parts = name.split(':', 4) count = len(parts) if count > 3 or (count == 3 and parts[2]): return False # Discard leading colon if count >= 2 and not parts[0] and parts[1]: return parts[1].strip() if parts[0]: return parts[0].strip() return False
# Set Namespace.FOO to be BuiltinNamespace.FOO for each builtin namespace for item in BuiltinNamespace: setattr(Namespace, item.name, item)
[docs]class NamespacesDict(Mapping, SelfCallMixin): """ An immutable dictionary containing the Namespace instances. It adds a deprecation message when called as the 'namespaces' property of APISite was callable. """ def __init__(self, namespaces): """Create new dict using the given namespaces.""" super().__init__() self._namespaces = namespaces self._namespace_names = {} for namespace in self._namespaces.values(): for name in namespace: self._namespace_names[name.lower()] = namespace def __iter__(self): """Iterate over all namespaces.""" return iter(self._namespaces) def __getitem__(self, key: Union[Namespace, int, str]) -> Namespace: """ Get the namespace with the given key. :param key: namespace key """ if isinstance(key, (Namespace, int)): try: return self._namespaces[key] except KeyError: raise KeyError('{} is not a known namespace. Maybe you should ' 'clear the api cache.'.format(key)) namespace = self.lookup_name(key) if namespace: return namespace return super().__getitem__(key) def __getattr__(self, attr: Union[Namespace, int, str]) -> Namespace: """ Get the namespace with the given key. :param attr: namespace key """ # lookup_name access _namespaces if attr.isupper(): if attr == 'MAIN': return self[0] namespace = self.lookup_name(attr) if namespace: return namespace return self.__getattribute__(attr) def __len__(self): """Get the number of namespaces.""" return len(self._namespaces)
[docs] def lookup_name(self, name: str) -> Optional[Namespace]: """ Find the Namespace for a name also checking aliases. :param name: Name of the namespace. """ name = Namespace.normalize_name(name) if name is False: return None return self.lookup_normalized_name(name.lower())
[docs] def lookup_normalized_name(self, name: str) -> Optional[Namespace]: """ Find the Namespace for a name also checking aliases. The name has to be normalized and must be lower case. :param name: Name of the namespace. """ return self._namespace_names.get(name)
[docs] def resolve(self, identifiers) -> List[Namespace]: """ Resolve namespace identifiers to obtain Namespace objects. Identifiers may be any value for which int() produces a valid namespace id, except bool, or any string which Namespace.lookup_name successfully finds. A numerical string is resolved as an integer. :param identifiers: namespace identifiers :type identifiers: iterable of str or Namespace key, or a single instance of those types :return: list of Namespace objects in the same order as the identifiers :raises KeyError: a namespace identifier was not resolved :raises TypeError: a namespace identifier has an inappropriate type such as NoneType or bool """ if isinstance(identifiers, (str, Namespace)): identifiers = [identifiers] else: # convert non-iterators to single item list try: iter(identifiers) except TypeError: identifiers = [identifiers] # lookup namespace names, and assume anything else is a key. # int(None) raises TypeError; however, bool needs special handling. namespaces = self._namespaces result = [NotImplemented if isinstance(ns, bool) else self._lookup_name(ns) if isinstance(ns, str) and not ns.lstrip('-').isdigit() else namespaces[int(ns)] if int(ns) in namespaces else None for ns in identifiers] if NotImplemented in result: raise TypeError('identifiers contains inappropriate types: {!r}' .format(identifiers)) # Namespace.lookup_name returns None if the name is not recognised if None in result: raise KeyError( 'Namespace identifier(s) not recognised: {}' .format(','.join(str(identifier) for identifier, ns in zip(identifiers, result) if ns is None))) return result
def _lookup_name(self, name): name = Namespace.normalize_name(name) if name is False: return None name = name.lower() for namespace in self._namespaces.values(): if namespace._contains_lowercase_name(name): return namespace return None