1010"""
1111from __future__ import annotations
1212
13- import ipaddress
13+ from ipaddress import ip_address
1414import logging
1515import re
16+ from typing import Generator , Optional , Tuple
1617from urllib .parse import urlparse
1718
1819import dns .resolver
1920import requests
2021from urllib3 .exceptions import LocationValueError # type: ignore[import]
2122
2223from sopel import plugin , tools
23- from sopel .config import types
24+ from sopel .bot import Sopel
25+ from sopel .config import Config , types
2426from sopel .tools import web
27+ from sopel .trigger import Trigger
2528
2629
2730LOGGER = logging .getLogger (__name__ )
@@ -62,22 +65,18 @@ class UrlSection(types.StaticSection):
6265 """If greater than 0, the title fetcher will include a TinyURL version of links longer than this many characters."""
6366 enable_private_resolution = types .BooleanAttribute (
6467 'enable_private_resolution' , default = False )
65- """Enable URL lookups for RFC1918 addresses"""
66- enable_dns_resolution = types .BooleanAttribute (
67- 'enable_dns_resolution' , default = False )
68- """Enable DNS resolution for all domains to validate if there are RFC1918 resolutions"""
68+ """Enable requests to private and local network IP addresses"""
6969
7070
71- def configure (config ):
71+ def configure (config : Config ):
7272 """
7373 | name | example | purpose |
7474 | ---- | ------- | ------- |
7575 | enable_auto_title | yes | Enable auto-title. |
7676 | exclude | https?://git\\ \\ .io/.* | A list of regular expressions for URLs for which the title should not be shown. |
7777 | exclusion\\ _char | ! | A character (or string) which, when immediately preceding a URL, will stop the URL's title from being shown. |
7878 | shorten\\ _url\\ _length | 72 | If greater than 0, the title fetcher will include a TinyURL version of links longer than this many characters. |
79- | enable\\ _private\\ _resolution | False | Enable URL lookups for RFC1918 addresses. |
80- | enable\\ _dns\\ _resolution | False | Enable DNS resolution for all domains to validate if there are RFC1918 resolutions. |
79+ | enable\\ _private\\ _resolution | False | Enable requests to private and local network IP addresses. |
8180 """
8281 config .define_section ('url' , UrlSection )
8382 config .url .configure_setting (
@@ -100,15 +99,11 @@ def configure(config):
10099 )
101100 config .url .configure_setting (
102101 'enable_private_resolution' ,
103- 'Enable URL lookups for RFC1918 addresses?'
104- )
105- config .url .configure_setting (
106- 'enable_dns_resolution' ,
107- 'Enable DNS resolution for all domains to validate if there are RFC1918 resolutions?'
102+ 'Enable requests to private and local network IP addresses?'
108103 )
109104
110105
111- def setup (bot ):
106+ def setup (bot : Sopel ):
112107 bot .config .define_section ('url' , UrlSection )
113108
114109 if bot .config .url .exclude :
@@ -139,7 +134,7 @@ def setup(bot):
139134 bot .memory ['shortened_urls' ] = tools .SopelMemory ()
140135
141136
142- def shutdown (bot ):
137+ def shutdown (bot : Sopel ):
143138 # Unset `url_exclude` and `last_seen_url`, but not `shortened_urls`;
144139 # clearing `shortened_urls` will increase API calls. Leaving it in memory
145140 # should not lead to unexpected behavior.
@@ -154,7 +149,7 @@ def shutdown(bot):
154149@plugin .example ('.urlpexclude example\\ .com/\\ w+' , user_help = True )
155150@plugin .example ('.urlexclude example.com/path' , user_help = True )
156151@plugin .output_prefix ('[url] ' )
157- def url_ban (bot , trigger ):
152+ def url_ban (bot : Sopel , trigger : Trigger ):
158153 """Exclude a URL from auto title.
159154
160155 Use ``urlpexclude`` to exclude a pattern instead of a URL.
@@ -199,7 +194,7 @@ def url_ban(bot, trigger):
199194@plugin .example ('.urlpallow example\\ .com/\\ w+' , user_help = True )
200195@plugin .example ('.urlallow example.com/path' , user_help = True )
201196@plugin .output_prefix ('[url] ' )
202- def url_unban (bot , trigger ):
197+ def url_unban (bot : Sopel , trigger : Trigger ):
203198 """Allow a URL for auto title.
204199
205200 Use ``urlpallow`` to allow a pattern instead of a URL.
@@ -246,30 +241,27 @@ def url_unban(bot, trigger):
246241 'Google | www.google.com' ,
247242 online = True , vcr = True )
248243@plugin .output_prefix ('[url] ' )
249- def title_command (bot , trigger ):
244+ def title_command (bot : Sopel , trigger : Trigger ):
250245 """
251246 Show the title or URL information for the given URL, or the last URL seen
252247 in this channel.
253248 """
249+ result_count = 0
250+
254251 if not trigger .group (2 ):
255252 if trigger .sender not in bot .memory ['last_seen_url' ]:
256253 return
257- matched = check_callbacks (
258- bot , bot .memory ['last_seen_url' ][trigger .sender ])
259- if matched :
260- return
261- else :
262- urls = [bot .memory ['last_seen_url' ][trigger .sender ]]
254+ urls = [bot .memory ["last_seen_url" ][trigger .sender ]]
263255 else :
264- urls = list ( # needs to be a list so len() can be checked later
265- web .search_urls (
266- trigger ,
267- exclusion_char = bot .config .url .exclusion_char
268- )
269- )
256+ # needs to be a list so len() can be checked later
257+ urls = list (web .search_urls (trigger ))
270258
271- result_count = 0
272- for url , title , domain , tinyurl in process_urls (bot , trigger , urls ):
259+ for url , title , domain , tinyurl , dispatched in process_urls (
260+ bot , trigger , urls , requested = True
261+ ):
262+ if dispatched :
263+ result_count += 1
264+ continue
273265 message = '%s | %s' % (title , domain )
274266 if tinyurl :
275267 message += ' ( %s )' % tinyurl
@@ -289,7 +281,7 @@ def title_command(bot, trigger):
289281
290282@plugin .rule (r'(?u).*(https?://\S+).*' )
291283@plugin .output_prefix ('[url] ' )
292- def title_auto (bot , trigger ):
284+ def title_auto (bot : Sopel , trigger : Trigger ):
293285 """
294286 Automatically show titles for URLs. For shortened URLs/redirects, find
295287 where the URL redirects to and show the title for that (or call a function
@@ -311,55 +303,68 @@ def title_auto(bot, trigger):
311303 urls = web .search_urls (
312304 trigger , exclusion_char = bot .config .url .exclusion_char , clean = True )
313305
314- for url , title , domain , tinyurl in process_urls (bot , trigger , urls ):
315- message = '%s | %s' % (title , domain )
316- if tinyurl :
317- message += ' ( %s )' % tinyurl
318- # Guard against responding to other instances of this bot.
319- if message != trigger :
320- bot .say (message )
321- bot .memory ['last_seen_url' ][trigger .sender ] = url
306+ for url , title , domain , tinyurl , dispatched in process_urls (bot , trigger , urls ):
307+ if not dispatched :
308+ message = '%s | %s' % (title , domain )
309+ if tinyurl :
310+ message += ' ( %s )' % tinyurl
311+ # Guard against responding to other instances of this bot.
312+ if message != trigger :
313+ bot .say (message )
314+ bot .memory ["last_seen_url" ][trigger .sender ] = url
322315
323316
324- def process_urls (bot , trigger , urls ):
317+ def process_urls (
318+ bot : Sopel , trigger : Trigger , urls : List [str ], requested : bool = False
319+ ) -> Generator [Tuple [str , str , Optional [str ], Optional [str ], bool ], None , None ]:
325320 """
326- For each URL in the list, ensure that it isn't handled by another plugin.
327- If not, find where it redirects to, if anywhere. If that redirected URL
328- should be handled by another plugin, dispatch the callback for it.
329- Return a list of (title, hostname) tuples for each URL which is not handled
330- by another plugin.
321+ For each URL in the list, ensure it should be titled, and do so.
322+
323+ See if it's handled by another plugin. If not, find where it redirects to,
324+ if anywhere. If that redirected URL should be handled by another plugin,
325+ dispatch the callback for it. Return a list of
326+ (url, title, hostname, tinyurl, dispatched) tuples for each URL.
327+
328+ If a callback was dispatched, only the url and dispatched=True will be set.
329+
330+ For titles explicitly requested by the user, exclusion_char and excludes
331+ are skipped.
332+
333+ :param bot: Sopel instance
334+ :param trigger: The trigger object for this event
335+ :param urls: The URLs detected in the triggering message
336+ :param requested: Whether the title was explicitly requested (vs automatic)
331337 """
332338 shorten_url_length = bot .config .url .shorten_url_length
333339 for url in urls :
334340 # Exclude URLs that start with the exclusion char
335- if url .startswith (bot .config .url .exclusion_char ):
341+ if not requested and url .startswith (bot .config .url .exclusion_char ):
336342 continue
337343
344+ parsed_url = urlparse (url )
345+
338346 # Check the URL does not match an existing URL callback
339- if check_callbacks (bot , url ):
340- continue
347+ if check_callbacks (bot , url , use_excludes = not requested ):
348+ yield (url , None , None , None , True )
349+ return
341350
342351 # Prevent private addresses from being queried if enable_private_resolution is False
352+ # FIXME: This does nothing when an attacker knows how to host a 302
353+ # FIXME: This whole concept has a TOCTOU issue
343354 if not bot .config .url .enable_private_resolution :
344- parsed = urlparse (url )
345- # Check if it's an address like http://192.168.1.1
346355 try :
347- if ipaddress .ip_address (parsed .hostname ).is_private or ipaddress .ip_address (parsed .hostname ).is_loopback :
348- LOGGER .debug ('Ignoring private URL: %s' , url )
349- continue
356+ ips = [ip_address (parsed_url .hostname )]
350357 except ValueError :
351- pass
352-
353- # Check if domains are RFC1918 addresses if enable_dns_resolutions is set
354- if bot .config .url .enable_dns_resolution :
355- private = False
356- for result in dns .resolver .query (parsed .hostname ):
357- if ipaddress .ip_address (result ).is_private or ipaddress .ip_address (parsed .hostname ).is_loopback :
358- private = True
359- break
360- if private :
361- LOGGER .debug ('Ignoring private URL: %s' , url )
362- continue
358+ ips = [ip_address (ip ) for ip in dns .resolver .query (parsed_url .hostname )]
359+
360+ private = False
361+ for ip in ips :
362+ if ip .is_private or ip .is_loopback :
363+ private = True
364+ break
365+ if private :
366+ LOGGER .debug ('Ignoring private URL: %s' , url )
367+ continue
363368
364369 # Call the URL to get a title, if possible
365370 title = find_title (url )
@@ -373,14 +378,15 @@ def process_urls(bot, trigger, urls):
373378 if (shorten_url_length > 0 ) and (len (url ) > shorten_url_length ):
374379 tinyurl = get_or_create_shorturl (bot , url )
375380
376- yield (url , title , get_hostname ( url ) , tinyurl )
381+ yield (url , title , parsed_url . hostname , tinyurl , False )
377382
378383
379- def check_callbacks (bot , url ) :
384+ def check_callbacks (bot : Sopel , url : str , use_excludes : bool = True ) -> bool :
380385 """Check if ``url`` is excluded or matches any URL callback patterns.
381386
382387 :param bot: Sopel instance
383- :param str url: URL to check
388+ :param url: URL to check
389+ :param use_excludes: Use or ignore the configured exclusion lists
384390 :return: True if ``url`` is excluded or matches any URL callback pattern
385391
386392 This function looks at the ``bot.memory`` for ``url_exclude`` patterns and
@@ -400,16 +406,21 @@ def check_callbacks(bot, url):
400406
401407 """
402408 # Check if it matches the exclusion list first
403- matched = any (regex .search (url ) for regex in bot .memory ['url_exclude' ])
409+ excluded = False
410+ if use_excludes :
411+ excluded = any (regex .search (url ) for regex in bot .memory ["url_exclude" ])
404412 return (
405- matched or
413+ excluded or
406414 any (bot .search_url_callbacks (url )) or
407415 bot .rules .check_url_callback (bot , url )
408416 )
409417
410418
411- def find_title (url , verify = True ):
412- """Return the title for the given URL."""
419+ def find_title (url : str , verify : bool = True ) -> Optional [str ]:
420+ """Return the title for the given URL.
421+
422+ :param verify: Whether to require a valid certificate when using https
423+ """
413424 try :
414425 response = requests .get (url , stream = True , verify = verify ,
415426 headers = DEFAULT_HEADERS )
@@ -453,26 +464,12 @@ def find_title(url, verify=True):
453464 return title or None
454465
455466
456- def get_hostname (url ):
457- idx = 7
458- if url .startswith ('https://' ):
459- idx = 8
460- elif url .startswith ('ftp://' ):
461- idx = 6
462- hostname = url [idx :]
463- slash = hostname .find ('/' )
464- if slash != - 1 :
465- hostname = hostname [:slash ]
466- return hostname
467-
468-
469- def get_or_create_shorturl (bot , url ):
467+ def get_or_create_shorturl (bot : Sopel , url : str ) -> str :
470468 """Get or create a short URL for ``url``
471469
472470 :param bot: Sopel instance
473- :param str url: URL to get or create a short URL for
471+ :param url: URL to get or create a short URL for
474472 :return: A short URL
475- :rtype: str
476473
477474 It gets the short URL for ``url`` from the bot's memory if it exists.
478475 Otherwise, it creates a short URL (see :func:`get_tinyurl`), stores it
@@ -488,7 +485,7 @@ def get_or_create_shorturl(bot, url):
488485 return tinyurl
489486
490487
491- def get_tinyurl (url ) :
488+ def get_tinyurl (url : str ) -> Optional [ str ] :
492489 """Returns a shortened tinyURL link of the URL"""
493490 base_url = "https://tinyurl.com/api-create.php"
494491 tinyurl = "%s?%s" % (base_url , web .urlencode ({'url' : url }))
0 commit comments