Skip to content

Commit 691ec30

Browse files
committed
pronouns: generate shortest possible prefixes for linking
Inspiration for the actual prefix-finding: https://www.techiedelight.com/shortest-unique-prefix/
1 parent d5a86dd commit 691ec30

File tree

1 file changed

+59
-6
lines changed

1 file changed

+59
-6
lines changed

sopel/modules/pronouns.py

Lines changed: 59 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -70,18 +70,71 @@ def setup(bot):
7070
LOGGER.exception("Couldn't fetch full pronouns list; using default set.")
7171
return
7272

73-
fetched_sets = {}
7473
try:
75-
for line in r.text.splitlines():
76-
split_set = line.split('\t')
77-
short = '{}/.../{}'.format(split_set[0], split_set[-1])
78-
fetched_sets[short] = '/'.join(split_set)
74+
fetched_pairs = _process_pronoun_sets(r.text.splitlines())
7975
except Exception:
8076
# don't care what failed, honestly, since we aren't trying to fix it
8177
LOGGER.exception("Couldn't parse fetched pronouns; using default set.")
8278
return
79+
else:
80+
bot.memory['pronoun_sets'] = dict(fetched_pairs)
81+
82+
83+
def _process_pronoun_sets(set_list):
84+
trie = PronounTrie()
85+
trie.insert_list(set_list)
86+
import pdb; pdb.set_trace()
87+
yield from trie.get_pairs()
88+
89+
90+
class PronounTrieNode:
91+
def __init__(self, source=''):
92+
self.children = {}
93+
"""Child nodes are stored here."""
94+
95+
self.freq = 0
96+
"""Store how many times this node is visited during insertion."""
97+
98+
self.source = source
99+
"""The full pronoun set that caused this node's creation."""
100+
83101

84-
bot.memory['pronoun_sets'] = fetched_sets
102+
class PronounTrie:
103+
def __init__(self):
104+
self.root = PronounTrieNode()
105+
"""A Trie needs a root entry."""
106+
107+
def insert(self, pronoun_set):
108+
"""Insert a single pronoun set."""
109+
pronoun_set = pronoun_set.replace('\t', '/')
110+
cur = self.root
111+
for el in pronoun_set.split('/'):
112+
# create a new node if the path doesn't exist
113+
cur.children.setdefault(el, PronounTrieNode(pronoun_set))
114+
115+
# increment frequency
116+
cur.children[el].freq += 1
117+
118+
# go to the next node
119+
cur = cur.children[el]
120+
121+
def insert_list(self, set_list):
122+
"""Load a list of pronoun sets all at once."""
123+
for item in set_list:
124+
self.insert(item)
125+
126+
def get_pairs(self, root=None, prefix=''):
127+
"""Yield tuples of ``(prefix, full/pronoun/set)``."""
128+
if root is None:
129+
root = self.root
130+
131+
if root.freq == 1:
132+
yield prefix, root.source
133+
else:
134+
if prefix:
135+
prefix += '/'
136+
for word, node in root.children.items():
137+
yield from self.get_pairs(node, prefix + word)
85138

86139

87140
@plugin.command('pronouns')

0 commit comments

Comments
 (0)