|
8 | 8 | from __future__ import annotations |
9 | 9 |
|
10 | 10 | from html.parser import HTMLParser |
| 11 | +import logging |
11 | 12 | import re |
12 | 13 |
|
13 | 14 | from requests import get |
|
17 | 18 | from sopel.tools.web import quote, unquote |
18 | 19 |
|
19 | 20 |
|
| 21 | +LOGGER = logging.getLogger(__name__) |
| 22 | + |
20 | 23 | REDIRECT = re.compile(r'^REDIRECT (.*)') |
21 | 24 | PLUGIN_OUTPUT_PREFIX = '[wikipedia] ' |
22 | 25 |
|
@@ -272,18 +275,72 @@ def mw_section(server, query, section): |
272 | 275 | return text |
273 | 276 |
|
274 | 277 |
|
| 278 | +def say_image_description(bot, trigger, server, image): |
| 279 | + desc = mw_image_description(server, image) |
| 280 | + |
| 281 | + if desc: |
| 282 | + bot.say(desc, truncation=" […]") |
| 283 | + |
| 284 | + |
| 285 | +def mw_image_description(server, image): |
| 286 | + """Retrieves the description for the given image.""" |
| 287 | + params = "&".join([ |
| 288 | + "action=query", |
| 289 | + "prop=imageinfo", |
| 290 | + "format=json", |
| 291 | + "indexpageids=1", |
| 292 | + "iiprop=extmetadata", |
| 293 | + "iiextmetadatafilter=ImageDescription", |
| 294 | + "iilimit=1", |
| 295 | + "titles={image}".format(image=image), |
| 296 | + ]) |
| 297 | + url = "https://{server}/w/api.php?{params}".format(server=server, params=params) |
| 298 | + |
| 299 | + response = get(url) |
| 300 | + json = response.json() |
| 301 | + |
| 302 | + try: |
| 303 | + query_data = json["query"] |
| 304 | + pageids = query_data["pageids"] |
| 305 | + pages = query_data["pages"] |
| 306 | + |
| 307 | + page = pages[pageids[0]] |
| 308 | + |
| 309 | + raw_desc = page["imageinfo"][0]["extmetadata"]["ImageDescription"]["value"] |
| 310 | + |
| 311 | + except LookupError: |
| 312 | + LOGGER.exception("Error getting image description for %r, response was: %r", image, json) |
| 313 | + return None |
| 314 | + |
| 315 | + # Some descriptions contain markup, use WikiParser to discard that |
| 316 | + parser = WikiParser(image) |
| 317 | + parser.feed(raw_desc) |
| 318 | + desc = parser.get_result() |
| 319 | + desc = ' '.join(desc.split()) # collapse multiple whitespace chars |
| 320 | + |
| 321 | + return desc |
| 322 | + |
| 323 | + |
275 | 324 | # Matches a wikipedia page (excluding spaces and #, but not /File: links), with a separate optional field for the section |
276 | 325 | @plugin.url(r'https?:\/\/([a-z]+(?:\.m)?\.wikipedia\.org)\/wiki\/((?!File\:)[^ #]+)#?([^ ]*)') |
277 | 326 | @plugin.output_prefix(PLUGIN_OUTPUT_PREFIX) |
278 | 327 | def mw_info(bot, trigger, match=None): |
279 | 328 | """Retrieves and outputs a snippet of the linked page.""" |
280 | | - if match.group(3): |
281 | | - if match.group(3).startswith('cite_note-'): # Don't bother trying to retrieve a snippet when cite-note is linked |
282 | | - say_snippet(bot, trigger, match.group(1), unquote(match.group(2)), show_url=False) |
| 329 | + server = match.group(1) |
| 330 | + query = unquote(match.group(2)) |
| 331 | + section = unquote(match.group(3)) |
| 332 | + |
| 333 | + if section: |
| 334 | + if section.startswith('cite_note-'): # Don't bother trying to retrieve a snippet when cite-note is linked |
| 335 | + say_snippet(bot, trigger, server, query, show_url=False) |
| 336 | + elif section.startswith('/media'): |
| 337 | + # gh2316: media fragments are usually images; try to get an image description |
| 338 | + image = section[7:] # strip '/media' prefix in pre-3.9 friendly way |
| 339 | + say_image_description(bot, trigger, server, image) |
283 | 340 | else: |
284 | | - say_section(bot, trigger, match.group(1), unquote(match.group(2)), unquote(match.group(3))) |
| 341 | + say_section(bot, trigger, server, query, section) |
285 | 342 | else: |
286 | | - say_snippet(bot, trigger, match.group(1), unquote(match.group(2)), show_url=False) |
| 343 | + say_snippet(bot, trigger, server, query, show_url=False) |
287 | 344 |
|
288 | 345 |
|
289 | 346 | @plugin.command('wikipedia', 'wp') |
|
0 commit comments