Skip to content

Commit 9497c45

Browse files
authored
Merge pull request #2433 from SnoopJ/chore/gh2230-tidy-up
url: fix premature loop termination, docstring cleanup & slight refactoring
2 parents 2a09b9b + 2051f95 commit 9497c45

File tree

1 file changed

+51
-22
lines changed

1 file changed

+51
-22
lines changed

sopel/modules/url.py

Lines changed: 51 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
from ipaddress import ip_address
1414
import logging
1515
import re
16-
from typing import Generator, List, Optional, Tuple, TYPE_CHECKING
16+
from typing import Generator, List, NamedTuple, Optional, TYPE_CHECKING
1717
from urllib.parse import urlparse
1818

1919
import dns.resolver
@@ -289,10 +289,10 @@ def title_command(bot: SopelWrapper, trigger: Trigger):
289289
# needs to be a list so len() can be checked later
290290
urls = list(web.search_urls(trigger))
291291

292-
for url, title, domain, tinyurl, dispatched in process_urls(
292+
for url, title, domain, tinyurl, ignored in process_urls(
293293
bot, trigger, urls, requested=True
294294
):
295-
if dispatched:
295+
if ignored:
296296
result_count += 1
297297
continue
298298
message = "%s | %s" % (title, domain)
@@ -317,8 +317,13 @@ def title_command(bot: SopelWrapper, trigger: Trigger):
317317
def title_auto(bot: SopelWrapper, trigger: Trigger):
318318
"""
319319
Automatically show titles for URLs. For shortened URLs/redirects, find
320-
where the URL redirects to and show the title for that (or call a function
321-
from another plugin to give more information).
320+
where the URL redirects to and show the title for that.
321+
322+
.. note::
323+
324+
URLs that match (before redirection) any other registered callbacks
325+
will *not* have their titles shown.
326+
322327
"""
323328
# Enabled or disabled by feature flag
324329
if not bot.settings.url.enable_auto_title:
@@ -342,8 +347,8 @@ def title_auto(bot: SopelWrapper, trigger: Trigger):
342347
continue
343348
urls.append(url)
344349

345-
for url, title, domain, tinyurl, dispatched in process_urls(bot, trigger, urls):
346-
if not dispatched:
350+
for url, title, domain, tinyurl, ignored in process_urls(bot, trigger, urls):
351+
if not ignored:
347352
message = '%s | %s' % (title, domain)
348353
if tinyurl:
349354
message += ' ( %s )' % tinyurl
@@ -353,16 +358,30 @@ def title_auto(bot: SopelWrapper, trigger: Trigger):
353358
bot.memory["last_seen_url"][trigger.sender] = url
354359

355360

361+
class URLInfo(NamedTuple):
362+
"""Helper class for information about a URL handled by this plugin."""
363+
364+
url: str
365+
366+
title: Optional[str]
367+
"""The title associated with ``url``, if appropriate."""
368+
369+
hostname: Optional[str]
370+
"""The hostname associated with ``url``, if appropriate."""
371+
372+
tinyurl: Optional[str]
373+
"""A shortened form of ``url``, if appropriate."""
374+
375+
ignored: bool
376+
"""Whether or not this URL matches any registered callbacks or is explicitly excluded."""
377+
378+
356379
def process_urls(
357380
bot: SopelWrapper,
358381
trigger: Trigger,
359382
urls: List[str],
360383
requested: bool = False,
361-
) -> Generator[
362-
Tuple[str, Optional[str], Optional[str], Optional[str], bool],
363-
None,
364-
None,
365-
]:
384+
) -> Generator[URLInfo, None, None]:
366385
"""
367386
For each URL in the list, ensure it should be titled, and do so.
368387
@@ -371,15 +390,25 @@ def process_urls(
371390
:param urls: The URLs detected in the triggering message
372391
:param requested: Whether the title was explicitly requested (vs automatic)
373392
374-
See if it's handled by another plugin. If not, find where it redirects to,
375-
if anywhere. If that redirected URL should be handled by another plugin,
376-
dispatch the callback for it. Return a list of
377-
(url, title, hostname, tinyurl, dispatched) tuples for each URL.
393+
Yields a tuple ``(url, title, hostname, tinyurl, ignored)`` for each URL.
394+
395+
.. note:
396+
397+
If a URL in ``urls`` has any registered callbacks, this function will NOT
398+
retrieve the title, and considers the URL as dispatched to those callbacks.
399+
In this case, only the ``url`` and ``ignored=True`` will be set; all
400+
other values will be ``None``.
401+
402+
.. note:
378403
379-
If a callback was dispatched, only the url and dispatched=True will be set.
404+
For titles explicitly requested by the user, ``exclusion_char`` and
405+
exclusions from the ``.urlban``/``.urlpban`` commands are skipped.
406+
407+
.. versionchanged:: 8.0
408+
409+
This function **does not** notify callbacks registered for URLs
410+
redirected to from URLs passed to this function. See #2432, #2230.
380411
381-
For titles explicitly requested by the user, exclusion_char and excludes
382-
are skipped.
383412
"""
384413
shorten_url_length = bot.config.url.shorten_url_length
385414
for url in urls:
@@ -389,10 +418,10 @@ def process_urls(
389418

390419
parsed_url = urlparse(url)
391420

392-
# Check the URL does not match an existing URL callback
393421
if check_callbacks(bot, url, use_excludes=not requested):
394-
yield (url, None, None, None, True)
395-
return
422+
# URL matches a callback OR is excluded, ignore
423+
yield URLInfo(url, None, None, None, True)
424+
continue
396425

397426
# Prevent private addresses from being queried if enable_private_resolution is False
398427
# FIXME: This does nothing when an attacker knows how to host a 302

0 commit comments

Comments
 (0)