diff --git a/pydatastructs/__init__.py b/pydatastructs/__init__.py index ca6c0c75a..eecf5f905 100644 --- a/pydatastructs/__init__.py +++ b/pydatastructs/__init__.py @@ -3,3 +3,4 @@ from .miscellaneous_data_structures import * from .utils import * from .graphs import * +from .strings import * diff --git a/pydatastructs/strings/__init__.py b/pydatastructs/strings/__init__.py new file mode 100644 index 000000000..1ee05158f --- /dev/null +++ b/pydatastructs/strings/__init__.py @@ -0,0 +1,8 @@ +__all__ = [] + +from . import trie +from .trie import ( + Trie +) + +__all__.extend(trie.__all__) diff --git a/pydatastructs/strings/tests/__init__.py b/pydatastructs/strings/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/pydatastructs/strings/tests/test_trie.py b/pydatastructs/strings/tests/test_trie.py new file mode 100644 index 000000000..a8291f57d --- /dev/null +++ b/pydatastructs/strings/tests/test_trie.py @@ -0,0 +1,29 @@ +from pydatastructs import Trie + +def test_Trie(): + + strings = ["A", "to", "tea", "ted", "ten", "i", "in", "inn"] + trie = Trie() + for string in strings: + trie.insert(string) + + for string in strings: + assert trie.is_present(string) + + assert sorted(trie.strings_with_prefix("t")) == ['tea', 'ted', 'ten', 'to'] + assert sorted(trie.strings_with_prefix("te")) == ["tea", "ted", "ten"] + assert trie.strings_with_prefix("i") == ["i", "in", "inn"] + assert trie.strings_with_prefix("a") == [] + + remove_order = ["to", "tea", "ted", "ten", "inn", "in", "A"] + + assert trie.delete("z") is None + + for string in remove_order: + trie.delete(string) + for present in strings: + if present == string: + assert not trie.is_present(present) + else: + assert trie.is_present(present) + strings.remove(string) diff --git a/pydatastructs/strings/trie.py b/pydatastructs/strings/trie.py new file mode 100644 index 000000000..5c9f39a4e --- /dev/null +++ b/pydatastructs/strings/trie.py @@ -0,0 +1,167 @@ +from pydatastructs.utils.misc_util import TrieNode +from collections import deque +import copy + +__all__ = [ + 'Trie' +] + +Stack = Queue = deque + +class Trie(object): + """ + Represents the trie data structure for storing strings. + + Examples + ======== + + >>> from pydatastructs import Trie + >>> trie = Trie() + >>> trie.insert("a") + >>> trie.insert("aa") + >>> trie.strings_with_prefix("a") + ['a', 'aa'] + >>> trie.is_present("aa") + True + >>> trie.delete("aa") + True + >>> trie.is_present("aa") + False + + References + ========== + + .. [1] https://en.wikipedia.org/wiki/Trie + """ + + __slots__ = ['root'] + + @classmethod + def methods(cls): + return ['__new__', 'insert', 'is_present', 'delete', + 'strings_with_prefix'] + + def __new__(cls): + obj = object.__new__(cls) + obj.root = TrieNode() + return obj + + def insert(self, string: str) -> None: + """ + Inserts the given string into the trie. + + Parameters + ========== + + string: str + + Returns + ======= + + None + """ + walk = self.root + for char in string: + if walk.get_child(char) is None: + newNode = TrieNode(char) + walk.add_child(newNode) + walk = newNode + else: + walk = walk.get_child(char) + walk.is_terminal = True + + def is_present(self, string: str) -> bool: + """ + Checks if the given string is present as a prefix in the trie. + + Parameters + ========== + + string: str + + Returns + ======= + + True if the given string is present as a prefix; + False in all other cases. + """ + walk = self.root + for char in string: + if walk.get_child(char) is None: + return False + walk = walk.get_child(char) + return True + + def delete(self, string: str) -> bool: + """ + Deletes the given string from the trie. + + Parameters + ========== + + string: str + + Returns + ======= + + True if successfully deleted; + None if the string is not present in the trie. + """ + path = [] + walk = self.root + size = len(string) + for i in range(size): + char = string[i] + path.append(walk) + if walk.get_child(char) is None: + return None + walk = walk.get_child(char) + path.append(walk) + i = len(path) - 1 + path[i].is_terminal = False + while not path[i]._children and i >= 1: + path[i-1].remove_child(path[i].char) + i -= 1 + if path[i].is_terminal: + return True + return True + + def strings_with_prefix(self, string: str) -> list: + """ + Generates a list of all strings with the given prefix. + + Parameters + ========== + + string: str + + Returns + ======= + + strings: list + The list of strings with the given prefix. + """ + + def _collect(prefix: str, node: TrieNode, strings: list) -> str: + TrieNode_stack = Stack() + TrieNode_stack.append((node, prefix)) + while TrieNode_stack: + walk, curr_prefix = TrieNode_stack.pop() + if walk.is_terminal: + strings.append(curr_prefix + walk.char) + for child in walk._children: + TrieNode_stack.append((walk.get_child(child), curr_prefix + walk.char)) + + strings = [] + prefix = "" + walk = self.root + for char in string: + walk = walk.get_child(char) + if walk is None: + return strings + prefix += char + if walk.is_terminal: + strings.append(walk.char) + for child in walk._children: + _collect(prefix, walk.get_child(child), strings) + return strings diff --git a/pydatastructs/utils/__init__.py b/pydatastructs/utils/__init__.py index f8f2d8c46..da7ec6e7f 100644 --- a/pydatastructs/utils/__init__.py +++ b/pydatastructs/utils/__init__.py @@ -11,6 +11,7 @@ GraphEdge, Set, CartesianTreeNode, - RedBlackTreeNode + RedBlackTreeNode, + TrieNode ) __all__.extend(misc_util.__all__) diff --git a/pydatastructs/utils/misc_util.py b/pydatastructs/utils/misc_util.py index 54b801d29..10a6b0cdc 100644 --- a/pydatastructs/utils/misc_util.py +++ b/pydatastructs/utils/misc_util.py @@ -8,7 +8,8 @@ 'GraphEdge', 'Set', 'CartesianTreeNode', - 'RedBlackTreeNode' + 'RedBlackTreeNode', + 'TrieNode' ] _check_type = lambda a, t: isinstance(a, t) @@ -394,6 +395,39 @@ def __new__(cls, key, data=None): obj.parent, obj.size = [None]*2 return obj +class TrieNode(Node): + """ + Represents nodes in the trie data structure. + + Parameters + ========== + + char: The character stored in the current node. + Optional, by default None. + """ + + __slots__ = ['char', '_children', 'is_terminal'] + + @classmethod + def methods(cls): + return ['__new__', 'add_child', 'get_child', 'remove_child'] + + def __new__(cls, char=None): + obj = Node.__new__(cls) + obj.char = char + obj._children = dict() + obj.is_terminal = False + return obj + + def add_child(self, trie_node) -> None: + self._children[trie_node.char] = trie_node + + def get_child(self, char: str): + return self._children.get(char, None) + + def remove_child(self, char: str) -> None: + self._children.pop(char) + def _comp(u, v, tcomp): """ Overloaded comparator for comparing diff --git a/pydatastructs/utils/tests/test_code_quality.py b/pydatastructs/utils/tests/test_code_quality.py index bd3e7f21d..ac3a8cf67 100644 --- a/pydatastructs/utils/tests/test_code_quality.py +++ b/pydatastructs/utils/tests/test_code_quality.py @@ -96,7 +96,8 @@ def _apis(): pyds.DisjointSetForest, pyds.BinomialTree, pyds.TreeNode, pyds.MAryTreeNode, pyds.LinkedListNode, pyds.BinomialTreeNode, pyds.AdjacencyListGraphNode, pyds.AdjacencyMatrixGraphNode, pyds.GraphEdge, pyds.Set, pyds.BinaryIndexedTree, - pyds.CartesianTree, pyds.CartesianTreeNode, pyds.Treap, pyds.RedBlackTreeNode, pyds.RedBlackTree] + pyds.CartesianTree, pyds.CartesianTreeNode, pyds.Treap, pyds.RedBlackTreeNode, pyds.RedBlackTree, + pyds.Trie, pyds.TrieNode] def test_public_api(): pyds = pydatastructs