Channi Studies

[Data Structure] Hash Table - 6 | Python Implementation 본문

Data Science/Data Structure & Algorithm

[Data Structure] Hash Table - 6 | Python Implementation

Chan Lee 2025. 4. 10. 14:47

In this post, we will finally take a look into Python implementation of hash tables. 

 

HashTable base class

# Base class for a hash table that supports the insert, remove, and search 
# operations.
class HashTable:
    # Returns a non-negative hash code for the specified key.
    def hashKey(self, key):
        return abs(hash(key))
    
    # Inserts the specified key/value pair. If the key already exists, the 
    # corresponding value is updated. If inserted or updated, True is returned. 
    # If not inserted, then False is returned.
    def insert(self, key, value):
        pass
    
    # Searches for the specified key. If found, the key/value pair is removed 
    # from the hash table and True is returned. If not found, False is returned.
    def remove(self, key):
        pass
    
    # Searches for the key, returning the corresponding value if found, None if 
    # not found.
    def search(self, key):
        pass

Classes that derive from HashTable should override insert(), remover(), and search(), but they can use the same hashKey() method. 

 

Chaining

class ChainingHashTableItem:
    def __init__(self, itemKey, itemValue):
        self.key = itemKey
        self.value = itemValue
        self.next = None

class ChainingHashTable(HashTable):
    def __init__(self, initialCapacity = 11):
        self.table = [None] * initialCapacity

    # Inserts the specified key/value pair. If the key already exists, the 
    # corresponding value is updated. If inserted or updated, True is returned. 
    # If not inserted, then False is returned.
    def insert(self, key, value):
        # Hash the key to get the bucket index
        bucket_index = self.hashKey(key) % len(self.table)
      
        # Traverse the linked list, searching for the key. If the key exists in 
        # an item, the value is replaced. Otherwise a new item is appended.
        item = self.table[bucket_index]
        previous = None
        while item != None:
            if key == item.key:
                item.value = value
                return True
            
            previous = item
            item = item.next
      
        # Append to the linked list
        if self.table[bucket_index] == None:
            self.table[bucket_index] = ChainingHashTableItem(key, value)
        else:
            previous.next = ChainingHashTableItem(key, value)
        return True

    # Searches for the specified key. If found, the key/value pair is removed 
    # from the hash table and True is returned. If not found, False is returned.    
    def remove(self, key):
        # Hash the key to get the bucket index
        bucket_index = self.hashKey(key) % len(self.table)
        
        # Search the bucket's linked list for the key
        item = self.table[bucket_index]
        previous = None
        while item != None:
            if key == item.key:
                if previous == None:
                    # Remove linked list's first item
                    self.table[bucket_index] = item.next
                else:
                    previous.next = item.next
                return True
            previous = item
            item = item.next
        return False # key not found
   
    # Searches for the key, returning the corresponding value if found, None if 
    # not found.
    def search(self, key):
        # Hash the key to get the bucket index
        bucket_index = self.hashKey(key) % len(self.table)
        
        # Search the bucket's linked list for the key
        item = self.table[bucket_index]
        while item != None:
            if key == item.key:
                return item.value
            item = item.next
        return None # key not found

 

 

OpenAddressingBucket class

# OpenAddressingBucket represents a bucket with a key and a value
class OpenAddressingBucket:
    def __init__(self, bucket_key = None, bucket_value = None):
        self.key = bucket_key
        self.value = bucket_value
    
    def isEmpty(self):
        if self is OpenAddressingBucket.EMPTY_SINCE_START:
            return True
        return self is OpenAddressingBucket.EMPTY_AFTER_REMOVAL

# Initialize two special bucket types: empty-since-start and empty-after-removal
OpenAddressingBucket.EMPTY_SINCE_START = OpenAddressingBucket()
OpenAddressingBucket.EMPTY_AFTER_REMOVAL = OpenAddressingBucket()

 

OpenAddressingHashTable class

 

class OpenAddressingHashTable(HashTable):
    def __init__(self, initialCapacity):
        self.table = [OpenAddressingBucket.EMPTY_SINCE_START] * initialCapacity
    
    def probe(self, key, i):
        # Each derived class must implement
        pass

    # Inserts the specified key/value pair. If the key already exists, the 
    # corresponding value is updated. If inserted or updated, True is returned. 
    # If not inserted, then False is returned.   
    def insert(self, key, value):
        # Search for the key in the table. If found, update the bucket's value.
        for i in range(len(self.table)):
            bucket_index = self.probe(key, i)
            
            # An empty-since-start bucket implies the key is not in the table
            if self.table[bucket_index] is OpenAddressingBucket.EMPTY_SINCE_START:
                break
            
            if self.table[bucket_index] is not OpenAddressingBucket.EMPTY_AFTER_REMOVAL:
                # Check if the non-empty bucket has the key
                if key == self.table[bucket_index].key:
                    # Update the value
                    self.table[bucket_index].value = value
                    return True
        
        # The key is not in the table, so insert into first empty bucket
        for i in range(len(self.table)):
            bucket_index = self.probe(key, i)
            if self.table[bucket_index].isEmpty():
                self.table[bucket_index] = OpenAddressingBucket(key, value)
                return True
        
        return False # no empty bucket found

    # Searches for the specified key. If found, the key/value pair is removed 
    # from the hash table and True is returned. If not found, False is returned.   
    def remove(self, key):
        for i in range(len(self.table)):
            bucket_index = self.probe(key, i)
            
            # An empty-since-start bucket implies the key is not in the table
            if self.table[bucket_index] is OpenAddressingBucket.EMPTY_SINCE_START:
                return False
            
            if self.table[bucket_index] is not OpenAddressingBucket.EMPTY_AFTER_REMOVAL:
                # Check if the non-empty bucket has the key
                if key == self.table[bucket_index].key:
                   # Remove by setting the bucket to empty-after-removal
                   self.table[bucket_index] = OpenAddressingBucket.EMPTY_AFTER_REMOVAL
                   return True
        return False # key not found
   
    # Searches for the key, returning the corresponding value if found, None if 
    # not found.
    def search(self, key):
        for i in range(len(self.table)):
            bucket_index = self.probe(key, i)
            
            # An empty-since-start bucket implies the key is not in the table
            if self.table[bucket_index] is OpenAddressingBucket.EMPTY_SINCE_START:
                return None
            
            if self.table[bucket_index] is not OpenAddressingBucket.EMPTY_AFTER_REMOVAL:
                # Check if the non-empty bucket has the key
                if key == self.table[bucket_index].key:
                    return self.table[bucket_index].value
        
        return None # key not found

 

LinearProbingHashTable Class

class LinearProbingHashTable(OpenAddressingHashTable):
    def __init__(self, initial_capacity = 11):
        OpenAddressingHashTable.__init__(self, initial_capacity)
    
    # Returns the bucket index for the specified key and i value using the 
    # linear probing sequence.
    def probe(self, key, i):
        return (self.hashKey(key) + i) % len(self.table)

 

QuadraticProbingHashTable Class

class QuadraticProbingHashTable(OpenAddressingHashTable):
    def __init__(self, c1 = 1, c2 = 1, initial_capacity = 11):
        OpenAddressingHashTable.__init__(self, initial_capacity)
        self.c1 = c1
        self.c2 = c2
    
    # Returns the bucket index for the specified key and i value using the 
    # quadratic probing sequence.
    def probe(self, key, i):
        return (self.hashKey(key) + self.c1 * i + self.c2 * i * i) % len(self.table)

 

DoubleHashingHashTable Class

class DoubleHashingHashTable(OpenAddressingHashTable):
    def __init__(self, initial_capacity = 11):
        OpenAddressingHashTable.__init__(self, initial_capacity)
    
    def h2(self, key):
        return 7 - self.hashKey(key) % 7
    
    # Returns the bucket index for the specified key and i value using the 
    # double hashing probing sequence.
    def probe(self, key, i):
        return (self.hashKey(key) + i * self.h2(key)) % len(self.table)