Skip to content

Commit 14a7d4d

Browse files
committed
pronouns: generate shortest possible prefixes for linking
Inspiration for the actual prefix-finding: https://www.techiedelight.com/shortest-unique-prefix/
1 parent 41b7a13 commit 14a7d4d

File tree

1 file changed

+56
-8
lines changed

1 file changed

+56
-8
lines changed

sopel/modules/pronouns.py

Lines changed: 56 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -65,23 +65,71 @@ def setup(bot):
6565
r = requests.get(
6666
'https://github.com/witch-house/pronoun.is/raw/master/resources/pronouns.tab')
6767
r.raise_for_status()
68+
fetched_pairs = _process_pronoun_sets(r.text.splitlines())
6869
except requests.exceptions.RequestException:
6970
# don't do anything, just log the failure and use the hard-coded set
7071
LOGGER.exception("Couldn't fetch full pronouns list; using default set.")
7172
return
72-
73-
fetched_sets = {}
74-
try:
75-
for line in r.text.splitlines():
76-
split_set = line.split('\t')
77-
short = '{}/.../{}'.format(split_set[0], split_set[-1])
78-
fetched_sets[short] = '/'.join(split_set)
7973
except Exception:
8074
# don't care what failed, honestly, since we aren't trying to fix it
8175
LOGGER.exception("Couldn't parse fetched pronouns; using default set.")
8276
return
77+
else:
78+
bot.memory['pronoun_sets'] = dict(fetched_pairs)
79+
80+
81+
def _process_pronoun_sets(set_list):
82+
trie = PronounTrie()
83+
trie.insert_list(set_list)
84+
yield from trie.get_pairs()
85+
86+
87+
class PronounTrieNode:
88+
def __init__(self, source=''):
89+
self.children = {}
90+
"""Child nodes are stored here."""
91+
92+
self.freq = 0
93+
"""Store how many times this node is visited during insertion."""
8394

84-
bot.memory['pronoun_sets'] = fetched_sets
95+
self.source = source
96+
"""The full pronoun set that caused this node's creation."""
97+
98+
99+
class PronounTrie:
100+
def __init__(self):
101+
self.root = PronounTrieNode()
102+
"""A Trie needs a root entry."""
103+
104+
def insert(self, pronoun_set):
105+
"""Insert a single pronoun set."""
106+
pronoun_set = pronoun_set.replace('\t', '/')
107+
current_node = self.root
108+
for pronoun in pronoun_set.split('/'):
109+
# create a new node if the path doesn't exist
110+
# and use it as the current node
111+
current_node = current_node.children.setdefault(pronoun, PronounTrieNode(pronoun_set))
112+
113+
# increment frequency
114+
current_node.freq += 1
115+
116+
def insert_list(self, set_list):
117+
"""Load a list of pronoun sets all at once."""
118+
for item in set_list:
119+
self.insert(item)
120+
121+
def get_pairs(self, root=None, prefix=''):
122+
"""Yield tuples of ``(prefix, full/pronoun/set)``."""
123+
if root is None:
124+
root = self.root
125+
126+
if root.freq == 1:
127+
yield prefix, root.source
128+
else:
129+
if prefix:
130+
prefix += '/'
131+
for word, node in root.children.items():
132+
yield from self.get_pairs(node, prefix + word)
85133

86134

87135
@plugin.command('pronouns')

0 commit comments

Comments
 (0)