Skip to content

Commit 2faba88

Browse files
committed
pronouns: generate shortest possible prefixes for linking
Inspiration for the actual prefix-finding: https://www.techiedelight.com/shortest-unique-prefix/
1 parent d5a86dd commit 2faba88

File tree

1 file changed

+58
-6
lines changed

1 file changed

+58
-6
lines changed

sopel/modules/pronouns.py

Lines changed: 58 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,18 +70,70 @@ def setup(bot):
7070
LOGGER.exception("Couldn't fetch full pronouns list; using default set.")
7171
return
7272

73-
fetched_sets = {}
7473
try:
75-
for line in r.text.splitlines():
76-
split_set = line.split('\t')
77-
short = '{}/.../{}'.format(split_set[0], split_set[-1])
78-
fetched_sets[short] = '/'.join(split_set)
74+
fetched_pairs = _process_pronoun_sets(r.text.splitlines())
7975
except Exception:
8076
# don't care what failed, honestly, since we aren't trying to fix it
8177
LOGGER.exception("Couldn't parse fetched pronouns; using default set.")
8278
return
79+
else:
80+
bot.memory['pronoun_sets'] = dict(fetched_pairs)
81+
82+
83+
def _process_pronoun_sets(set_list):
84+
trie = PronounTrie()
85+
trie.insert_list(set_list)
86+
yield from trie.get_pairs()
87+
88+
89+
class PronounTrieNode:
90+
def __init__(self, source=''):
91+
self.children = {}
92+
"""Child nodes are stored here."""
93+
94+
self.freq = 0
95+
"""Store how many times this node is visited during insertion."""
96+
97+
self.source = source
98+
"""The full pronoun set that caused this node's creation."""
99+
83100

84-
bot.memory['pronoun_sets'] = fetched_sets
101+
class PronounTrie:
102+
def __init__(self):
103+
self.root = PronounTrieNode()
104+
"""A Trie needs a root entry."""
105+
106+
def insert(self, pronoun_set):
107+
"""Insert a single pronoun set."""
108+
pronoun_set = pronoun_set.replace('\t', '/')
109+
cur = self.root
110+
for el in pronoun_set.split('/'):
111+
# create a new node if the path doesn't exist
112+
cur.children.setdefault(el, PronounTrieNode(pronoun_set))
113+
114+
# increment frequency
115+
cur.children[el].freq += 1
116+
117+
# go to the next node
118+
cur = cur.children[el]
119+
120+
def insert_list(self, set_list):
121+
"""Load a list of pronoun sets all at once."""
122+
for item in set_list:
123+
self.insert(item)
124+
125+
def get_pairs(self, root=None, prefix=''):
126+
"""Yield tuples of ``(prefix, full/pronoun/set)``."""
127+
if root is None:
128+
root = self.root
129+
130+
if root.freq == 1:
131+
yield prefix, root.source
132+
else:
133+
if prefix:
134+
prefix += '/'
135+
for word, node in root.children.items():
136+
yield from self.get_pairs(node, prefix + word)
85137

86138

87139
@plugin.command('pronouns')

0 commit comments

Comments
 (0)