Binary Search Trees

In [3]:
class TreeNode:
    def __init__(self, key, val, left=None, right=None, parent=None):
        self.key = key
        self.payload = val
        self.left_child = left
        self.right_child= right
        self.parent = parent
        
    def __iter__(self):
        if self:
            if self.has_left_child():
                for elem in self.left_child:
                    yield elem
                    
            yield self.key
            
            if self.has_right_child():
                for elem in self.right_child:
                    yield elem
                    
    def has_left_child(self):
        return self.left_child
    
    def has_right_child(self):
        return self.right_child
    
    def is_left_child(self):
        return self.parent and self.parent.left_child is self
    
    def is_right_child(self):
        return self.parent and self.parent.right_child is self
    
    def is_root(self):
        return not self.parent
    
    def is_leaf(self):
        return not self.left_child and not self.right_child
    
    def has_any_children(self):
        return self.right_child or self.left_child
    
    def has_both_children(self):
        return self.right_child and self.left_child
    
    def replace_node_data(self, key, value, lc, rc):
        self.key = key
        self.payload = value
        self.left_child = lc
        self.right_child = rc
        
        if self.has_left_child():
            self.left_child.parent = self
            
        if self.has_right_child():
            self.right_child.parent = self
            
    def splice_out(self):
        if self.is_leaf():
            if self.is_left_child():
                self.parent.left_child = None
            else:
                self.parent.right_child = None
        elif self.has_any_children():
            if self.has_left_child():
                if self.is_left_child():
                    self.parent.left_child = self.left_child
                else:
                    self.parent.right_child = self.left_child
                    
                self.left_child.parent = self.parent
            else:  # has a right child
                if self.is_left_child():
                    self.parent.left_child = self.right_child
                else:
                    self.parent.right_child = self.right_child
                    
                self.right_child.parent = self.parent
                
    def find_successor(self):
        succ = None
        
        if self.has_right_child():
            succ = self.right_child.find_min()
        else:  # no right child? Then we work on left child
            if self.parent:  # it is an intermediate node
                if self.is_left_child():
                    succ = self.parent
                else:  # is a right child
                    self.parent.right_child = None
                    succ = self.parent.find_successor()
                    self.parent.right_child = self
        return succ
    
    
    def find_min(self):
        current = self
        
        while current.has_left_child():
            current = current.left_child

        return current
In [4]:
class BinarySearchTree:
    def __init__(self):
        self.root = None
        self.size = 0
    
    def __len__(self):
        return self.length()
    
    def __iter__(self):
        return self.root.__iter__()
    
    def __setitem__(self, k, v):
        self.put(k, v)
  
    def __getitem__(self, k):
        return self.get(k)
    
    def __contains__(self, k):
        #if self._get(key, self.root):
        #    return True
        #else:
        #    return False
        
        return self._get(k, self.root) is not None

    def __delitem__(self, k):
        self.delete(k)
    
    def length(self):
        return self.size
    
    def put(self, key, val):
        if self.root:
            self._put(key, val, self.root)
        else:  # the tree is non-existient
            self.root = TreeNode(key, val)
            
        self.size += 1
        
    def _put(self, key, val, current_node):
        if key < current_node.key:
            if current_node.has_left_child():
                self._put(key, val, current_node.left_child)
            else:
                current_node.left_child = TreeNode(key, val, parent=current_node)
        else:
            if current_node.has_right_child():
                self._put(key, val, current_node.right_child)
            else:
                current_node.right_child = TreeNode(key, val, parent=current_node)
                
    def get(self, key):
        if self.root:
            res = self._get(key, self.root)
            if res:
                # return the value of our TreeNode with matching key
                return res.payload
            else:
                return None
        else:
            return None
        
    def _get(self, key, current_node):
        if not current_node:
            return None
        elif current_node.key == key:
            return current_node
        elif key < current_node.key:
            return self._get(key, current_node.left_child)
        else:
            return self._get(key, current_node.right_child)
        
    def delete(self, key):
        if self.size > 1:
            node_to_remove = self._get(key, self.root)
            if node_to_remove:
                self.remove(node_to_remove)
                self.size -= 1
            else:
                raise KeyError("Key not found in tree!")
        elif self.size == 1 and self.root.key == key:
            # tree is completely removed, since it was only one node
            self.root = None
            self.size -= 1
        else:
            raise KeyError("Key not found in tree!")
            
    def remove(self, current_node):
        if current_node.is_leaf():  # leaf
            #if current_node == current_node.parent.left_child:
            if current_node.is_left_child():   
                current_node.parent.left_child = None
            else:
                current_node.parent.right_child = None
        elif current_node.has_both_children():  # interior
            succ = current_node.find_successor()
            succ.splice_out()
            current_node.key = succ.key
            current_node.payload = succ.payload
        else:  # node has one child
            if current_node.has_left_child():
                if current_node.is_left_child():
                    current_node.left_child.parent = current_node.parent
                    current_node.parent.left_child = current_node.left_child
                elif current_node.is_right_child():
                    current_node.left_child.parent = current_node.parent
                    current_node.parent.right_child = current_node.left_child
                else:
                    current_node.replace_node_data(current_node.left_child.key,
                                                   current_node.left_child.payload,
                                                   current_node.left_child.left_child,
                                                   current_node.left_child.right_child)
            else:
                if current_node.is_left_child():
                    current_node.right_child.parent = current_node.parent
                    current_node.parent.left_child = current_node.right_child
                elif current_node.is_right_child():
                    current_node.right_child.parent = current_node.parent
                    current_node.parent.right_child = current_node.right_child
                else:
                    current_node.replace_node_data(current_node.right_child.key,
                                                   current_node.right_child.payload,
                                                   current_node.right_child.left_child,
                                                   current_node.right_child.right_child)   

Testing Our Binary Search Tree

In [5]:
bt = BinarySearchTree()
In [6]:
bt
Out[6]:
<__main__.BinarySearchTree at 0x10c23fdc0>
In [7]:
bt[24] = "apple"
In [8]:
bt[24]
Out[8]:
'apple'
In [9]:
bt.get(24)
Out[9]:
'apple'
In [10]:
24 in bt
Out[10]:
True
In [11]:
42 in bt
Out[11]:
False
In [12]:
bt.size
Out[12]:
1
In [13]:
len(bt)
Out[13]:
1
In [14]:
bt.root
Out[14]:
<__main__.TreeNode at 0x10c23f3a0>
In [15]:
del bt[24]
In [16]:
len(bt)
Out[16]:
0
In [17]:
bt[42]
In [18]:
bt[42] = "Don't Panic!"
In [19]:
bt.delete(24)
---------------------------------------------------------------------------
KeyError                                  Traceback (most recent call last)
<ipython-input-19-72e1e3b14341> in <module>
----> 1 bt.delete(24)

<ipython-input-4-f0a8ecb70d2c> in delete(self, key)
     84             self.size -= 1
     85         else:
---> 86             raise KeyError("Key not found in tree!")
     87 
     88     def remove(self, current_node):

KeyError: 'Key not found in tree!'
In [20]:
bt[24] = "Panic Don't!"
In [21]:
list(bt)
Out[21]:
[24, 42]
In [22]:
d = {42: "Don't Panic!", 24: "Panic Don't!"}
In [23]:
d[24]
Out[23]:
"Panic Don't!"
In [24]:
bt[24]
Out[24]:
"Panic Don't!"
In [25]:
d
Out[25]:
{42: "Don't Panic!", 24: "Panic Don't!"}
In [26]:
bt
Out[26]:
<__main__.BinarySearchTree at 0x10c23fdc0>
In [27]:
for k in bt:
    print(f"{k}: {bt[k]!r}")
24: "Panic Don't!"
42: "Don't Panic!"

Testing with names and ranks

In [28]:
import requests
from random import shuffle
In [84]:
def get_names(url):
    req = requests.get(url)
    lines = req.text.split("\n")
    names = []
    
    for line in lines:
        data = line.split()
        if len(data) == 4:
            names.append( (data[0], data[3]) )
            
    return names
In [85]:
def main():
    names = get_names("https://www2.census.gov/topics/genealogy/1990surnames/dist.female.first")
    print(f"Extracted {len(names)} names")
    shuffle(names)
    
    name_tree = BinarySearchTree()
    
    for name, rank in names:
        name_tree[int(rank)] = name
        
    return name_tree
In [86]:
nt = main()
Extracted 4275 names
In [87]:
len(nt)
Out[87]:
4275
In [88]:
nt[33]
Out[88]:
'ANNA'
In [89]:
nt[0]
In [90]:
nt[4000]
Out[90]:
'YURIKO'
In [91]:
nt[1000]
Out[91]:
'CELINA'

Visualizing our BST

In [73]:
from graphviz import Digraph
from pythonds9.basic.stack import Stack
from pythonds9.tree.binary_tree import BinaryTree
In [81]:
def viz_tree(r):
    stack = Stack()
    g = Digraph(node_attr={'shape': 'record', 'height': '.1'})
    _id = 0
    
    if isinstance(r, BinaryTree):
        current_node = r
    elif isinstance(r, BinarySearchTree):
        current_node = r.root  # root is a TreeNode object!
        
    leftward = True
    current_root_num = 0

    while True:
        if current_node:
            stack.push((_id, current_node))
            
            if isinstance(current_node, BinaryTree):
                g.node(f'node{_id}',
                   f'<f0>|<f1> {current_node.key}|<f2> ')
            elif isinstance(current_node, TreeNode):
                g.node(f'node{_id}',
                   f'<f0>|<f1> {current_node.key}:{current_node.payload}|<f2> ')
            elif isinstance(current_node, AVLTreeNode):
                g.node(f'node{_id}',
                       f'<f0>|<f1> {current_node.key}:{current_node.payload} ({current_node.balance_factor})|<f2> ')

            if _id >= 1:
                g.edge(f'node{current_root_num}:f{0 if leftward else 2}',
                       f'node{_id}:f1')

            leftward = True
            current_node = current_node.left_child  # left
            current_root_num = _id
            _id += 1

        if current_node is None and not stack.is_empty():
            count, popped_node = stack.pop()
            if popped_node.right_child:
                current_root_num = count
                current_node = popped_node.right_child  # right
                leftward = False

        if current_node is None and stack.is_empty():
            break

    return g
In [92]:
g = viz_tree(nt)
In [94]:
g.render("bst")
Out[94]:
'bst.pdf'

Visualizing with 'names' as keys

In [55]:
def main():
    names = get_names("https://www2.census.gov/topics/genealogy/1990surnames/dist.female.first")
    print(f"Extracted {len(names)} names")
    shuffle(names)
    
    name_tree = BinarySearchTree()
    
    for name, rank in names:
        name_tree[name] = int(rank)
        
    return name_tree
In [56]:
nt = main()
Extracted 4275 names
In [57]:
g = viz_tree(nt)
In [58]:
g.render("bst_names_as_key")
Out[58]:
'bst_names_as_key.pdf'

Visualizing with sorted 'names' as keys

In [59]:
def main():
    names = get_names("https://www2.census.gov/topics/genealogy/1990surnames/dist.female.first")
    print(f"Extracted {len(names)} names")
    
    # No shuffling!
    #shuffle(names)
    
    # names[("MARY", "1"), ...)
    names.sort(key=lambda t: t[0])
    
    name_tree = BinarySearchTree()
    
    for name, rank in names:
        name_tree[name] = int(rank)
        
    return name_tree
In [60]:
nt = main()
Extracted 4275 names
---------------------------------------------------------------------------
RecursionError                            Traceback (most recent call last)
<ipython-input-60-8778a79c8192> in <module>
----> 1 nt = main()

<ipython-input-59-2cf8d7059a2c> in main()
     12 
     13     for name, rank in names:
---> 14         name_tree[name] = int(rank)
     15 
     16     return name_tree

<ipython-input-4-f0a8ecb70d2c> in __setitem__(self, k, v)
     11 
     12     def __setitem__(self, k, v):
---> 13         self.put(k, v)
     14 
     15     def __getitem__(self, k):

<ipython-input-4-f0a8ecb70d2c> in put(self, key, val)
     32     def put(self, key, val):
     33         if self.root:
---> 34             self._put(key, val, self.root)
     35         else:  # the tree is non-existient
     36             self.root = TreeNode(key, val)

<ipython-input-4-f0a8ecb70d2c> in _put(self, key, val, current_node)
     46         else:
     47             if current_node.has_right_child():
---> 48                 self._put(key, val, current_node.right_child)
     49             else:
     50                 current_node.right_child = TreeNode(key, val, parent=current_node)

... last 1 frames repeated, from the frame below ...

<ipython-input-4-f0a8ecb70d2c> in _put(self, key, val, current_node)
     46         else:
     47             if current_node.has_right_child():
---> 48                 self._put(key, val, current_node.right_child)
     49             else:
     50                 current_node.right_child = TreeNode(key, val, parent=current_node)

RecursionError: maximum recursion depth exceeded
In [ ]:
g = viz_tree(nt)
In [ ]:
g.render("bst_names_as_key_sorted")

Visualizing with sorted 'names' as keys, smaller subset

In [65]:
def main():
    names = get_names("https://www2.census.gov/topics/genealogy/1990surnames/dist.female.first")
    print(f"Extracted {len(names)} names")
    
    # No shuffling!
    #shuffle(names)
    
    # names[("MARY", "1"), ...)
    names.sort(key=lambda t: t[0])
    
    name_tree = BinarySearchTree()
    
    for name, rank in names[:100]:  # limit our tree to 100 names!
        name_tree[name] = int(rank)
        
    return name_tree
In [66]:
nt = main()
Extracted 4275 names
In [67]:
g = viz_tree(nt)
In [68]:
g.render("bst_names_as_key_sorted")
Out[68]:
'bst_names_as_key_sorted.pdf'