1- """Identifier tools to represent IRC names (nick or channel)."""
1+ """Identifier tools to represent IRC names (nick or channel).
2+
3+ Nick and channel are defined by their names, which are "identifiers": their
4+ names are used to differentiate users from each others, channels from each
5+ others. To ensure that two channels or two users are the same, their
6+ identifiers must be processed to be compared properly. This process depends on
7+ which RFC and how that RFC is implemented by the server: IRC being an old
8+ protocol, different RFCs have differents version of that process:
9+
10+ * :rfc:`1549#section-2.2`: ASCII characters, and ``[]\\ `` are mapped to ``{}|``
11+ * :rfc:`2812#section-2.2`: same as in the previous RFC, adding ``~`` mapped to
12+ ``^``
13+
14+ Then when ISUPPORT was added, the `CASEMAPPING parameter`__ was defined so the
15+ server can say which process to apply:
16+
17+ * ``ascii``: only ``[A-Z]`` must be mapped to ``[a-z]`` (implemented by
18+ :func:`ascii_lower`)
19+ * ``rfc1459``: follow :rfc:`2812`; because of how it was implemented in most
20+ server (implemented by :func:`rfc1459_lower`)
21+ * A strict version of :rfc:`1459` also exist but it is not recommended
22+ (implemented by :func:`rfc1459_strict_lower`)
23+
24+ As a result, the :class:`Identifier` class requires a casemapping function,
25+ which should be provided by the :class:`bot<sopel.bot.Sopel>`.
26+
27+ .. seealso::
28+
29+ The bot's :class:`make_identifier<sopel.bot.Sopel.make_identifier>` method
30+ should be used to instantiate an :class:`Identifier` to honor the
31+ ``CASEMAPPING`` parameter.
32+
33+ .. __: https://modern.ircdocs.horse/index.html#casemapping-parameter
34+ """
235from __future__ import generator_stop
336
437import string
1851
1952
2053def ascii_lower (text : str ) -> str :
21- """Lower ``text`` according to the ASCII CASEMAPPING"""
54+ """Lower ``text`` according to the ``ascii`` value of ``CASEMAPPING``.
55+
56+ In that version, only ``[A-Z]`` are to be mapped to their lowercase
57+ equivalent (``[a-z]``). Non-ASCII characters are kept unmodified.
58+ """
2259 return text .translate (ASCII_TABLE )
2360
2461
2562def rfc1459_lower (text : str ) -> str :
26- """Lower ``text`` according to :rfc:`1459` (with ``~`` mapped to ``^``) .
63+ """Lower ``text`` according to :rfc:`2812` .
2764
28- Similar to :func:`rfc1459_strict_lower`, but also maps ``~`` to
29- ``^`` as defined for the ``rfc1459`` value of the
30- `CASEMAPPING parameter`__.
65+ Similar to :func:`rfc1459_strict_lower`, but also maps ``~`` to ``^``, as
66+ per :rfc:`2812#section-2.2`:
67+
68+ Because of IRC's Scandinavian origin, the characters ``{}|^`` are
69+ considered to be the lower case equivalents of the characters
70+ ``[]\\ ~``, respectively.
71+
72+ .. note::
73+
74+ This is an implementation of the `CASEMAPPING parameter`__ for the
75+ value ``rfc1459``, which doesn't use :rfc:`1459` but its updated version
76+ :rfc:`2812`.
3177
3278 .. __: https://modern.ircdocs.horse/index.html#casemapping-parameter
3379 """
@@ -37,12 +83,11 @@ def rfc1459_lower(text: str) -> str:
3783def rfc1459_strict_lower (text : str ) -> str :
3884 """Lower ``text`` according to :rfc:`1459` (strict version).
3985
40- As per ` section 2.2`__ :
86+ As per :rfc:`1459# section- 2.2`:
4187
4288 Because of IRC's scandanavian origin, the characters ``{}|`` are
4389 considered to be the lower case equivalents of the characters ``[]\\ ``.
4490
45- .. __: https://datatracker.ietf.org/doc/html/rfc1459#section-2.2
4691 """
4792 return text .translate (RFC1459_STRICT_TABLE )
4893
@@ -53,14 +98,37 @@ def rfc1459_strict_lower(text: str) -> str:
5398class Identifier (str ):
5499 """A ``str`` subclass which acts appropriately for IRC identifiers.
55100
101+ :param str identifier: IRC identifier
102+ :param casemapping: a casemapping function (optional keyword argument)
103+ :type casemapping: Callable[[:class:`str`], :class:`str`]
104+
56105 When used as normal ``str`` objects, case will be preserved.
57106 However, when comparing two Identifier objects, or comparing an Identifier
58107 object with a ``str`` object, the comparison will be case insensitive.
59- This case insensitivity includes the case convention conventions regarding
60- ``[]``, ``{}``, ``|``, ``\\ ``, ``^`` and ``~`` described in RFC 2812.
108+
109+ This case insensitivity uses the provided ``casemapping`` function,
110+ following the rules for the `CASEMAPPING parameter`__ from ISUPPORT. By
111+ default, it uses :func:`rfc1459_lower`, following :rfc:`2812#section-2.2`.
112+
113+ .. note::
114+
115+ To instantiate an ``Identifier`` with the appropriate ``casemapping``
116+ function, it is best to rely on
117+ :meth:`bot.make_identifier<sopel.irc.AbstractBot.make_identifier>`.
118+
119+ .. versionchanged:: 8.0
120+
121+ The ``casemapping`` parameter has been added.
122+
123+ .. __: https://modern.ircdocs.horse/index.html#casemapping-parameter
61124 """
62- def __new__ (cls , * args , ** kwargs ) -> 'Identifier' :
63- return str .__new__ (cls , * args )
125+ def __new__ (
126+ cls ,
127+ identifier : str ,
128+ * ,
129+ casemapping : Casemapping = rfc1459_lower ,
130+ ) -> 'Identifier' :
131+ return str .__new__ (cls , identifier )
64132
65133 def __init__ (
66134 self ,
@@ -73,22 +141,41 @@ def __init__(
73141 """Casemapping function to lower the identifier."""
74142 self ._lowered = self .casemapping (identifier )
75143
76- def lower (self ):
77- """Get the RFC 2812 -compliant lowercase version of this identifier.
144+ def lower (self ) -> str :
145+ """Get the IRC -compliant lowercase version of this identifier.
78146
79- :return: RFC 2812-compliant lowercase version of the
80- :py:class:`Identifier` instance
81- :rtype: str
147+ :return: IRC-compliant lowercase version used for case-insensitive
148+ comparisons
149+
150+ The behavior of this method depends on the identifier's casemapping
151+ function, which should be selected based on the ``CASEMAPPING``
152+ parameter from ``ISUPPORT``.
153+
154+ .. versionchanged:: 8.0
155+
156+ Now use the :attr:`casemapping` function to lower the identifier.
82157 """
83158 return self .casemapping (self )
84159
85160 @staticmethod
86161 def _lower (identifier : str ):
87- """Convert an identifier to lowercase per RFC 2812.
162+ """Convert an identifier to lowercase per :rfc:` 2812` .
88163
89164 :param str identifier: the identifier (nickname or channel) to convert
90165 :return: RFC 2812-compliant lowercase version of ``identifier``
91166 :rtype: str
167+
168+ :meta public:
169+
170+ .. versionchanged:: 8.0
171+
172+ Previously, this would lower all non-ASCII characters. It now uses
173+ a strict implementation of the ``CASEMAPPING`` parameter. This is
174+ now equivalent to call :func:`rfc1459_lower`.
175+
176+ If the ``identifier`` is an instance of :class:`Identifier`, this
177+ will call that identifier's :meth:`lower` method instead.
178+
92179 """
93180 if isinstance (identifier , Identifier ):
94181 return identifier .lower ()
@@ -98,19 +185,27 @@ def _lower(identifier: str):
98185 def _lower_swapped (identifier : str ):
99186 """Backward-compatible version of :meth:`_lower`.
100187
101- :param str identifier: the identifier (nickname or channel) to convert
188+ :param identifier: the identifier (nickname or channel) to convert
102189 :return: RFC 2812-non-compliant lowercase version of ``identifier``
103190 :rtype: str
104191
105- This is what the old :meth:`_lower` function did before Sopel 7.0. It maps
106- ``{}``, ``[]``, ``|``, ``\\ ``, ``^``, and ``~`` incorrectly.
192+ This is what the old :meth:`_lower` function did before Sopel 7.0. It
193+ maps ``{}``, ``[]``, ``|``, ``\\ ``, ``^``, and ``~`` incorrectly.
194+
195+ You shouldn't use this unless you need to migrate stored values from
196+ the previous, incorrect "lowercase" representation to the correct one.
197+
198+ :meta public:
199+
200+ .. versionadded: 7.0
107201
108- You shouldn't use this unless you need to migrate stored values from the
109- previous, incorrect "lowercase" representation to the correct one .
202+ This method was added to ensure migration of improperly lowercased
203+ data: it reverts the data back to the previous lowercase rules .
110204 """
111205 # The tilde replacement isn't needed for identifiers, but is for
112206 # channels, which may be useful at some point in the future.
113- low = identifier .lower ().replace ('{' , '[' ).replace ('}' , ']' )
207+ # Always convert to str, to prevent using custom casemapping
208+ low = str (identifier ).lower ().replace ('{' , '[' ).replace ('}' , ']' )
114209 low = low .replace ('|' , '\\ ' ).replace ('^' , '~' )
115210 return low
116211
0 commit comments