HashTable ADT

In [7]:
class HashTable:
    def __init__(self, size):
        self.size = size
        self.slots = [None] * self.size
        self.data = [None] * self.size
        
    def put(self, key, data):
        hashvalue = self.hashfunction(key)
        
        if self.slots[hashvalue] == None:
            # Our first hash works! We have an open spot
            self.slots[hashvalue] = key
            self.data[hashvalue] = data
        else:
            if self.slots[hashvalue] == key:
                # We have an existing key of that value in our hash table, so
                # this becomes an *update* operation.
                self.data[hashvalue] = data  # replace the existing data with new value
            else:
                # something is in our hashvalue position, and its not our key
                # we have collision and we need to remiadte it
                nextslot = self.rehash(hashvalue)
                while (self.slots[nextslot] != None and
                       self.slots[nextslot] != key):
                    nextslot = self.rehash(nextslot)
                    
                if self.slots[nextslot] == None:
                    # We found an open slot through rehashing
                    self.slots[nextslot] = key
                    self.data[nextslot] = data
                else:
                    # We found our key in some slot through rehashing
                    self.data[nextslot] = data
                    
    def get(self, key):
        startslot = self.hashfunction(key)
        position = startslot

        data = None
        stop = False
        found = False
        
        while (self.slots[position] != None and
               not found and
               not stop):
            
            if self.slots[position] == key:
                found = True
                data = self.data[position]
            else:
                position = self.rehash(position)
                if position == startslot:
                    stop = True  # our value is NOT in our hashtable

        return data
    
    def hashfunction(self, key):
        if isinstance(key, int):
            return key%self.size
        elif isinstance(key, str):
            # Basic hashing with weighted ordinal values
            sum_ = 0
            for i,c in enumerate(key, start=1):
                sum_ += ord(c)*i
            return sum_%self.size
    
    def rehash(self, oldhash):
        #return (oldhash+1)%self.size
        return self.hashfunction(oldhash+1)
In [8]:
words = ["aah" ,"aal" ,"aas" ,"aba" ,"abs" ,"aby" ,"ace",
         "act" ,"add" ,"ado" ,"ads" ,"adz" ,"aff" ,"aft" ,
         "aga" ,"age" ,"ago" ,"ags" ,"aha" ,"ahi" ,"ahs" ,"aid" ,
         "ail" ,"aim" ,"ain" ,"air" ,"ais" ,"ait" ,"aji" ,"ala" ,
         "alb" ,"ale" ,"all" ,"alp" ,"als" ,"alt" ,"ama" ,"ami" ,
         "amp" ,"amu" ,"ana" ,"and" ,"ane" ,"ani" ,"ant" ,"any" ,
         "ape" ,"apo" ,"app" ,"apt" ,"arb" ,"arc" ,"are" ,"arf" ,
         "ark" ,"arm" ,"ars" ,"art" ,"ash" ,"ask" ,"asp" ,"ass" ,
         "ate" ,"att" ,"auk" ,"ava" ,"ave" ,"avo" ,"awa" ,"awe" ,
         "awl" ,"awn" ,"axe" ,"aye", "ays"]
print(len(words))
75
In [9]:
ht = HashTable(100)
for i, word in enumerate(words):
    ht.put(word, i)
In [10]:
ht.get("ark")
Out[10]:
54
In [12]:
%pprint
Pretty printing has been turned OFF
In [13]:
ht.slots
Out[13]:
[None, None, None, 'aah', 'ala', None, 'age', 'aff', 'aid', 'alb', 'ama', 'ana', None, None, None, 'aal', 'ale', 'and', None, 'arb', 'ahi', 'ane', 'arc', None, 'aji', 'ape', 'ava', 'awa', 'are', None, 'ado', 'ail', 'ami', 'ani', 'aim', 'arf', 'aas', 'ago', 'abs', 'ain', 'all', 'ash', 'ads', 'act', 'ate', 'ave', 'ark', 'awe', 'ags', 'aft', 'ahs', 'air', 'ais', 'alp', 'amp', 'ait', 'aby', 'apo', 'als', 'app', 'arm', 'alt', 'ask', 'adz', 'asp', 'ant', 'amu', 'auk', 'avo', 'apt', 'ars', 'awl', 'ass', 'art', 'awn', 'axe', 'aye', 'att', None, None, 'any', None, None, None, 'aba', 'ays', None, None, None, None, None, None, None, None, 'aga', None, 'aha', 'add', 'ace', None]
In [15]:
ht.slots.count(None)
Out[15]:
25
In [16]:
ht.data
Out[16]:
[None, None, None, 0, 29, None, 15, 12, 21, 30, 36, 40, None, None, None, 1, 31, 41, None, 50, 19, 42, 51, None, 28, 46, 65, 68, 52, None, 9, 22, 37, 43, 23, 53, 2, 16, 4, 24, 32, 58, 10, 7, 62, 66, 54, 69, 17, 13, 20, 25, 26, 33, 38, 27, 5, 47, 34, 48, 55, 35, 59, 11, 60, 44, 39, 64, 67, 49, 56, 70, 61, 57, 71, 72, 73, 63, None, None, 45, None, None, None, 3, 74, None, None, None, None, None, None, None, None, 14, None, 18, 8, 6, None]
In [17]:
for word in words:
    print(ht[word])
---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
<ipython-input-17-ba5c70f5e72f> in <module>
      1 for word in words:
----> 2     print(ht[word])

TypeError: 'HashTable' object is not subscriptable
In [20]:
for word in words:
    print(ht.get(word), end=" ")
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 
In [ ]:
# __str__ output -> "<HashTable:{key1: value1, key2: value2}>"