Skip to content

Commit 9229c20

Browse files
SnoopJdgwExirel
committed
wikipedia: support query strings in Wikipedia URLs (closes #2412)
Co-authored-by: dgw <[email protected]> Co-authored-by: Florian Strzelecki <[email protected]>
1 parent 778cf79 commit 9229c20

File tree

1 file changed

+9
-8
lines changed

1 file changed

+9
-8
lines changed

sopel/modules/wikipedia.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -10,12 +10,12 @@
1010
from html.parser import HTMLParser
1111
import logging
1212
import re
13+
from urllib.parse import quote, unquote, urlparse
1314

1415
from requests import get
1516

1617
from sopel import plugin
1718
from sopel.config import types
18-
from sopel.tools.web import quote, unquote
1919

2020

2121
LOGGER = logging.getLogger(__name__)
@@ -321,26 +321,27 @@ def mw_image_description(server, image):
321321
return desc
322322

323323

324-
# Matches a wikipedia page (excluding spaces and #, but not /File: links), with a separate optional field for the section
325-
@plugin.url(r'https?:\/\/([a-z]+(?:\.m)?\.wikipedia\.org)\/wiki\/((?!File\:)[^ #]+)#?([^ ]*)')
324+
# Matches a Wikipedia link (excluding /File: pages)
325+
@plugin.url(r'https?:\/\/([a-z]+(?:\.m)?\.wikipedia\.org)\/wiki\/((?!File\:)[^ ]+)')
326326
@plugin.output_prefix(PLUGIN_OUTPUT_PREFIX)
327327
def mw_info(bot, trigger, match=None):
328328
"""Retrieves and outputs a snippet of the linked page."""
329329
server = match.group(1)
330-
query = unquote(match.group(2))
331-
section = unquote(match.group(3))
330+
page_info = urlparse(match.group(2))
331+
article = unquote(page_info.path)
332+
section = unquote(page_info.fragment)
332333

333334
if section:
334335
if section.startswith('cite_note-'): # Don't bother trying to retrieve a snippet when cite-note is linked
335-
say_snippet(bot, trigger, server, query, show_url=False)
336+
say_snippet(bot, trigger, server, article, show_url=False)
336337
elif section.startswith('/media'):
337338
# gh2316: media fragments are usually images; try to get an image description
338339
image = section[7:] # strip '/media' prefix in pre-3.9 friendly way
339340
say_image_description(bot, trigger, server, image)
340341
else:
341-
say_section(bot, trigger, server, query, section)
342+
say_section(bot, trigger, server, article, section)
342343
else:
343-
say_snippet(bot, trigger, server, query, show_url=False)
344+
say_snippet(bot, trigger, server, article, show_url=False)
344345

345346

346347
@plugin.command('wikipedia', 'wp')

0 commit comments

Comments
 (0)