跳转至

散列表进阶

概述

散列表进阶内容涵盖开放寻址法、再哈希、一致性哈希等高级主题。

开放寻址法

线性探测

线性探测是开放寻址法中最简单的冲突解决策略。当发生冲突时,顺序检查下一个位置,直到找到空槽。

探测公式h(k, i) = (h'(k) + i) mod m

C
typedef struct {
    int *keys;
    int *values;
    int *deleted;
    int size;
    int capacity;
} OpenHashTable;

OpenHashTable* createOpenHash(int capacity) {
    OpenHashTable *ht = (OpenHashTable*)malloc(sizeof(OpenHashTable));
    ht->capacity = capacity;
    ht->size = 0;
    ht->keys = (int*)calloc(capacity, sizeof(int));
    ht->values = (int*)calloc(capacity, sizeof(int));
    ht->deleted = (int*)calloc(capacity, sizeof(int));
    
    for (int i = 0; i < capacity; i++) {
        ht->keys[i] = -1;
    }
    
    return ht;
}

int hash(int key, int capacity) {
    return (key % capacity + capacity) % capacity;
}

void insertOpen(OpenHashTable *ht, int key, int value) {
    if (ht->size >= ht->capacity * 0.7) return;
    
    int index = hash(key, ht->capacity);
    int firstDeleted = -1;
    
    while (ht->keys[index] != -1 && ht->keys[index] != key) {
        if (ht->deleted[index] && firstDeleted == -1) {
            firstDeleted = index;
        }
        index = (index + 1) % ht->capacity;
    }
    
    if (ht->keys[index] == key) {
        ht->values[index] = value;
    } else {
        int insertIndex = (firstDeleted != -1) ? firstDeleted : index;
        ht->keys[insertIndex] = key;
        ht->values[insertIndex] = value;
        ht->deleted[insertIndex] = 0;
        ht->size++;
    }
}

int getOpen(OpenHashTable *ht, int key) {
    int index = hash(key, ht->capacity);
    int startIndex = index;
    
    do {
        if (ht->keys[index] == key && !ht->deleted[index]) {
            return ht->values[index];
        }
        if (ht->keys[index] == -1 && !ht->deleted[index]) {
            return -1;
        }
        index = (index + 1) % ht->capacity;
    } while (index != startIndex);
    
    return -1;
}

void deleteOpen(OpenHashTable *ht, int key) {
    int index = hash(key, ht->capacity);
    int startIndex = index;
    
    do {
        if (ht->keys[index] == key && !ht->deleted[index]) {
            ht->deleted[index] = 1;
            ht->size--;
            return;
        }
        if (ht->keys[index] == -1 && !ht->deleted[index]) {
            return;
        }
        index = (index + 1) % ht->capacity;
    } while (index != startIndex);
}
C++
class OpenHashTable {
private:
    vector<int> keys;
    vector<int> values;
    vector<bool> deleted;
    int size;
    int capacity;
    
    int hash(int key) {
        return ((key % capacity) + capacity) % capacity;
    }
    
public:
    OpenHashTable(int cap) : capacity(cap), size(0) {
        keys.resize(capacity, -1);
        values.resize(capacity, 0);
        deleted.resize(capacity, false);
    }
    
    void insert(int key, int value) {
        if (size >= capacity * 0.7) return;
        
        int index = hash(key);
        int firstDeleted = -1;
        
        while (keys[index] != -1 && keys[index] != key) {
            if (deleted[index] && firstDeleted == -1) {
                firstDeleted = index;
            }
            index = (index + 1) % capacity;
        }
        
        if (keys[index] == key) {
            values[index] = value;
        } else {
            int insertIdx = (firstDeleted != -1) ? firstDeleted : index;
            keys[insertIdx] = key;
            values[insertIdx] = value;
            deleted[insertIdx] = false;
            size++;
        }
    }
    
    int get(int key) {
        int index = hash(key);
        int startIdx = index;
        
        do {
            if (keys[index] == key && !deleted[index]) {
                return values[index];
            }
            if (keys[index] == -1 && !deleted[index]) {
                return -1;
            }
            index = (index + 1) % capacity;
        } while (index != startIdx);
        
        return -1;
    }
    
    void remove(int key) {
        int index = hash(key);
        int startIdx = index;
        
        do {
            if (keys[index] == key && !deleted[index]) {
                deleted[index] = true;
                size--;
                return;
            }
            if (keys[index] == -1 && !deleted[index]) {
                return;
            }
            index = (index + 1) % capacity;
        } while (index != startIdx);
    }
};
Python
class OpenHashTable:
    def __init__(self, capacity):
        self.capacity = capacity
        self.size = 0
        self.keys = [-1] * capacity
        self.values = [0] * capacity
        self.deleted = [False] * capacity
    
    def _hash(self, key):
        return key % self.capacity
    
    def insert(self, key, value):
        if self.size >= self.capacity * 0.7:
            return
        
        index = self._hash(key)
        first_deleted = -1
        
        while self.keys[index] != -1 and self.keys[index] != key:
            if self.deleted[index] and first_deleted == -1:
                first_deleted = index
            index = (index + 1) % self.capacity
        
        if self.keys[index] == key:
            self.values[index] = value
        else:
            insert_idx = first_deleted if first_deleted != -1 else index
            self.keys[insert_idx] = key
            self.values[insert_idx] = value
            self.deleted[insert_idx] = False
            self.size += 1
    
    def get(self, key):
        index = self._hash(key)
        start_idx = index
        
        while True:
            if self.keys[index] == key and not self.deleted[index]:
                return self.values[index]
            if self.keys[index] == -1 and not self.deleted[index]:
                return -1
            index = (index + 1) % self.capacity
            if index == start_idx:
                break
        
        return -1
    
    def remove(self, key):
        index = self._hash(key)
        start_idx = index
        
        while True:
            if self.keys[index] == key and not self.deleted[index]:
                self.deleted[index] = True
                self.size -= 1
                return
            if self.keys[index] == -1 and not self.deleted[index]:
                return
            index = (index + 1) % self.capacity
            if index == start_idx:
                break
Java
public class OpenHashTable {
    private int[] keys;
    private int[] values;
    private boolean[] deleted;
    private int size;
    private int capacity;
    
    public OpenHashTable(int capacity) {
        this.capacity = capacity;
        this.size = 0;
        this.keys = new int[capacity];
        this.values = new int[capacity];
        this.deleted = new boolean[capacity];
        Arrays.fill(keys, -1);
    }
    
    private int hash(int key) {
        return ((key % capacity) + capacity) % capacity;
    }
    
    public void insert(int key, int value) {
        if (size >= capacity * 0.7) return;
        
        int index = hash(key);
        int firstDeleted = -1;
        
        while (keys[index] != -1 && keys[index] != key) {
            if (deleted[index] && firstDeleted == -1) {
                firstDeleted = index;
            }
            index = (index + 1) % capacity;
        }
        
        if (keys[index] == key) {
            values[index] = value;
        } else {
            int insertIdx = (firstDeleted != -1) ? firstDeleted : index;
            keys[insertIdx] = key;
            values[insertIdx] = value;
            deleted[insertIdx] = false;
            size++;
        }
    }
    
    public int get(int key) {
        int index = hash(key);
        int startIdx = index;
        
        do {
            if (keys[index] == key && !deleted[index]) {
                return values[index];
            }
            if (keys[index] == -1 && !deleted[index]) {
                return -1;
            }
            index = (index + 1) % capacity;
        } while (index != startIdx);
        
        return -1;
    }
    
    public void remove(int key) {
        int index = hash(key);
        int startIdx = index;
        
        do {
            if (keys[index] == key && !deleted[index]) {
                deleted[index] = true;
                size--;
                return;
            }
            if (keys[index] == -1 && !deleted[index]) {
                return;
            }
            index = (index + 1) % capacity;
        } while (index != startIdx);
    }
}
Go
type OpenHashTable struct {
    keys     []int
    values   []int
    deleted  []bool
    size     int
    capacity int
}

func NewOpenHashTable(capacity int) *OpenHashTable {
    ht := &OpenHashTable{
        keys:     make([]int, capacity),
        values:   make([]int, capacity),
        deleted:  make([]bool, capacity),
        size:     0,
        capacity: capacity,
    }
    for i := range ht.keys {
        ht.keys[i] = -1
    }
    return ht
}

func (ht *OpenHashTable) hash(key int) int {
    return ((key % ht.capacity) + ht.capacity) % ht.capacity
}

func (ht *OpenHashTable) Insert(key, value int) {
    if ht.size >= int(float64(ht.capacity)*0.7) {
        return
    }
    
    index := ht.hash(key)
    firstDeleted := -1
    
    for ht.keys[index] != -1 && ht.keys[index] != key {
        if ht.deleted[index] && firstDeleted == -1 {
            firstDeleted = index
        }
        index = (index + 1) % ht.capacity
    }
    
    if ht.keys[index] == key {
        ht.values[index] = value
    } else {
        insertIdx := index
        if firstDeleted != -1 {
            insertIdx = firstDeleted
        }
        ht.keys[insertIdx] = key
        ht.values[insertIdx] = value
        ht.deleted[insertIdx] = false
        ht.size++
    }
}

func (ht *OpenHashTable) Get(key int) int {
    index := ht.hash(key)
    startIdx := index
    
    for {
        if ht.keys[index] == key && !ht.deleted[index] {
            return ht.values[index]
        }
        if ht.keys[index] == -1 && !ht.deleted[index] {
            return -1
        }
        index = (index + 1) % ht.capacity
        if index == startIdx {
            break
        }
    }
    
    return -1
}

func (ht *OpenHashTable) Remove(key int) {
    index := ht.hash(key)
    startIdx := index
    
    for {
        if ht.keys[index] == key && !ht.deleted[index] {
            ht.deleted[index] = true
            ht.size--
            return
        }
        if ht.keys[index] == -1 && !ht.deleted[index] {
            return
        }
        index = (index + 1) % ht.capacity
        if index == startIdx {
            break
        }
    }
}
Rust
pub struct OpenHashTable {
    keys: Vec<i32>,
    values: Vec<i32>,
    deleted: Vec<bool>,
    size: usize,
    capacity: usize,
}

impl OpenHashTable {
    pub fn new(capacity: usize) -> Self {
        OpenHashTable {
            keys: vec![-1; capacity],
            values: vec![0; capacity],
            deleted: vec![false; capacity],
            size: 0,
            capacity,
        }
    }
    
    fn hash(&self, key: i32) -> usize {
        let cap = self.capacity as i32;
        (((key % cap) + cap) % cap) as usize
    }
    
    pub fn insert(&mut self, key: i32, value: i32) {
        if self.size >= (self.capacity as f64 * 0.7) as usize {
            return;
        }
        
        let mut index = self.hash(key);
        let mut first_deleted = None;
        
        while self.keys[index] != -1 && self.keys[index] != key {
            if self.deleted[index] && first_deleted.is_none() {
                first_deleted = Some(index);
            }
            index = (index + 1) % self.capacity;
        }
        
        if self.keys[index] == key {
            self.values[index] = value;
        } else {
            let insert_idx = first_deleted.unwrap_or(index);
            self.keys[insert_idx] = key;
            self.values[insert_idx] = value;
            self.deleted[insert_idx] = false;
            self.size += 1;
        }
    }
    
    pub fn get(&self, key: i32) -> Option<i32> {
        let mut index = self.hash(key);
        let start_idx = index;
        
        loop {
            if self.keys[index] == key && !self.deleted[index] {
                return Some(self.values[index]);
            }
            if self.keys[index] == -1 && !self.deleted[index] {
                return None;
            }
            index = (index + 1) % self.capacity;
            if index == start_idx {
                break;
            }
        }
        
        None
    }
    
    pub fn remove(&mut self, key: i32) {
        let mut index = self.hash(key);
        let start_idx = index;
        
        loop {
            if self.keys[index] == key && !self.deleted[index] {
                self.deleted[index] = true;
                self.size -= 1;
                return;
            }
            if self.keys[index] == -1 && !self.deleted[index] {
                return;
            }
            index = (index + 1) % self.capacity;
            if index == start_idx {
                break;
            }
        }
    }
}

二次探测

二次探测使用二次函数作为探测步长,减少主聚集现象。

探测公式h(k, i) = (h'(k) + c₁i + c₂i²) mod m

C
int quadraticProbe(int key, int capacity, int i) {
    int h = hash(key, capacity);
    return (h + i * i) % capacity;
}

void insertQuadratic(OpenHashTable *ht, int key, int value) {
    for (int i = 0; i < ht->capacity; i++) {
        int index = quadraticProbe(key, ht->capacity, i);
        
        if (ht->keys[index] == -1 || ht->deleted[index]) {
            ht->keys[index] = key;
            ht->values[index] = value;
            ht->deleted[index] = 0;
            ht->size++;
            return;
        }
        
        if (ht->keys[index] == key) {
            ht->values[index] = value;
            return;
        }
    }
}

int getQuadratic(OpenHashTable *ht, int key) {
    for (int i = 0; i < ht->capacity; i++) {
        int index = quadraticProbe(key, ht->capacity, i);
        
        if (ht->keys[index] == key && !ht->deleted[index]) {
            return ht->values[index];
        }
        if (ht->keys[index] == -1 && !ht->deleted[index]) {
            return -1;
        }
    }
    return -1;
}
C++
class QuadraticHashTable {
private:
    vector<int> keys;
    vector<int> values;
    vector<bool> deleted;
    int size;
    int capacity;
    
    int hash(int key) {
        return ((key % capacity) + capacity) % capacity;
    }
    
    int quadraticProbe(int key, int i) {
        return (hash(key) + i * i) % capacity;
    }
    
public:
    QuadraticHashTable(int cap) : capacity(cap), size(0) {
        keys.resize(capacity, -1);
        values.resize(capacity, 0);
        deleted.resize(capacity, false);
    }
    
    void insert(int key, int value) {
        for (int i = 0; i < capacity; i++) {
            int index = quadraticProbe(key, i);
            
            if (keys[index] == -1 || deleted[index]) {
                keys[index] = key;
                values[index] = value;
                deleted[index] = false;
                size++;
                return;
            }
            
            if (keys[index] == key) {
                values[index] = value;
                return;
            }
        }
    }
    
    int get(int key) {
        for (int i = 0; i < capacity; i++) {
            int index = quadraticProbe(key, i);
            
            if (keys[index] == key && !deleted[index]) {
                return values[index];
            }
            if (keys[index] == -1 && !deleted[index]) {
                return -1;
            }
        }
        return -1;
    }
};
Python
class QuadraticHashTable:
    def __init__(self, capacity):
        self.capacity = capacity
        self.size = 0
        self.keys = [-1] * capacity
        self.values = [0] * capacity
        self.deleted = [False] * capacity
    
    def _hash(self, key):
        return key % self.capacity
    
    def _quadratic_probe(self, key, i):
        return (self._hash(key) + i * i) % self.capacity
    
    def insert(self, key, value):
        for i in range(self.capacity):
            index = self._quadratic_probe(key, i)
            
            if self.keys[index] == -1 or self.deleted[index]:
                self.keys[index] = key
                self.values[index] = value
                self.deleted[index] = False
                self.size += 1
                return
            
            if self.keys[index] == key:
                self.values[index] = value
                return
    
    def get(self, key):
        for i in range(self.capacity):
            index = self._quadratic_probe(key, i)
            
            if self.keys[index] == key and not self.deleted[index]:
                return self.values[index]
            if self.keys[index] == -1 and not self.deleted[index]:
                return -1
        return -1
Java
public class QuadraticHashTable {
    private int[] keys;
    private int[] values;
    private boolean[] deleted;
    private int size;
    private int capacity;
    
    public QuadraticHashTable(int capacity) {
        this.capacity = capacity;
        this.size = 0;
        this.keys = new int[capacity];
        this.values = new int[capacity];
        this.deleted = new boolean[capacity];
        Arrays.fill(keys, -1);
    }
    
    private int hash(int key) {
        return ((key % capacity) + capacity) % capacity;
    }
    
    private int quadraticProbe(int key, int i) {
        return (hash(key) + i * i) % capacity;
    }
    
    public void insert(int key, int value) {
        for (int i = 0; i < capacity; i++) {
            int index = quadraticProbe(key, i);
            
            if (keys[index] == -1 || deleted[index]) {
                keys[index] = key;
                values[index] = value;
                deleted[index] = false;
                size++;
                return;
            }
            
            if (keys[index] == key) {
                values[index] = value;
                return;
            }
        }
    }
    
    public int get(int key) {
        for (int i = 0; i < capacity; i++) {
            int index = quadraticProbe(key, i);
            
            if (keys[index] == key && !deleted[index]) {
                return values[index];
            }
            if (keys[index] == -1 && !deleted[index]) {
                return -1;
            }
        }
        return -1;
    }
}
Go
type QuadraticHashTable struct {
    keys     []int
    values   []int
    deleted  []bool
    size     int
    capacity int
}

func NewQuadraticHashTable(capacity int) *QuadraticHashTable {
    ht := &QuadraticHashTable{
        keys:     make([]int, capacity),
        values:   make([]int, capacity),
        deleted:  make([]bool, capacity),
        size:     0,
        capacity: capacity,
    }
    for i := range ht.keys {
        ht.keys[i] = -1
    }
    return ht
}

func (ht *QuadraticHashTable) hash(key int) int {
    return ((key % ht.capacity) + ht.capacity) % ht.capacity
}

func (ht *QuadraticHashTable) quadraticProbe(key, i int) int {
    return (ht.hash(key) + i*i) % ht.capacity
}

func (ht *QuadraticHashTable) Insert(key, value int) {
    for i := 0; i < ht.capacity; i++ {
        index := ht.quadraticProbe(key, i)
        
        if ht.keys[index] == -1 || ht.deleted[index] {
            ht.keys[index] = key
            ht.values[index] = value
            ht.deleted[index] = false
            ht.size++
            return
        }
        
        if ht.keys[index] == key {
            ht.values[index] = value
            return
        }
    }
}

func (ht *QuadraticHashTable) Get(key int) int {
    for i := 0; i < ht.capacity; i++ {
        index := ht.quadraticProbe(key, i)
        
        if ht.keys[index] == key && !ht.deleted[index] {
            return ht.values[index]
        }
        if ht.keys[index] == -1 && !ht.deleted[index] {
            return -1
        }
    }
    return -1
}
Rust
pub struct QuadraticHashTable {
    keys: Vec<i32>,
    values: Vec<i32>,
    deleted: Vec<bool>,
    size: usize,
    capacity: usize,
}

impl QuadraticHashTable {
    pub fn new(capacity: usize) -> Self {
        QuadraticHashTable {
            keys: vec![-1; capacity],
            values: vec![0; capacity],
            deleted: vec![false; capacity],
            size: 0,
            capacity,
        }
    }
    
    fn hash(&self, key: i32) -> usize {
        let cap = self.capacity as i32;
        (((key % cap) + cap) % cap) as usize
    }
    
    fn quadratic_probe(&self, key: i32, i: usize) -> usize {
        (self.hash(key) + i * i) % self.capacity
    }
    
    pub fn insert(&mut self, key: i32, value: i32) {
        for i in 0..self.capacity {
            let index = self.quadratic_probe(key, i);
            
            if self.keys[index] == -1 || self.deleted[index] {
                self.keys[index] = key;
                self.values[index] = value;
                self.deleted[index] = false;
                self.size += 1;
                return;
            }
            
            if self.keys[index] == key {
                self.values[index] = value;
                return;
            }
        }
    }
    
    pub fn get(&self, key: i32) -> Option<i32> {
        for i in 0..self.capacity {
            let index = self.quadratic_probe(key, i);
            
            if self.keys[index] == key && !self.deleted[index] {
                return Some(self.values[index]);
            }
            if self.keys[index] == -1 && !self.deleted[index] {
                return None;
            }
        }
        None
    }
}

双重哈希

双重哈希使用两个独立的哈希函数,具有较好的分散性。

探测公式h(k, i) = (h₁(k) + i × h₂(k)) mod m

C
int hash1(int key, int capacity) {
    return (key % capacity + capacity) % capacity;
}

int hash2(int key, int capacity) {
    int h = (key % (capacity - 1) + capacity - 1) % (capacity - 1);
    return h + 1;
}

void insertDoubleHash(OpenHashTable *ht, int key, int value) {
    int h1 = hash1(key, ht->capacity);
    int h2 = hash2(key, ht->capacity);
    
    for (int i = 0; i < ht->capacity; i++) {
        int index = (h1 + i * h2) % ht->capacity;
        
        if (ht->keys[index] == -1 || ht->deleted[index]) {
            ht->keys[index] = key;
            ht->values[index] = value;
            ht->deleted[index] = 0;
            ht->size++;
            return;
        }
        
        if (ht->keys[index] == key) {
            ht->values[index] = value;
            return;
        }
    }
}

int getDoubleHash(OpenHashTable *ht, int key) {
    int h1 = hash1(key, ht->capacity);
    int h2 = hash2(key, ht->capacity);
    
    for (int i = 0; i < ht->capacity; i++) {
        int index = (h1 + i * h2) % ht->capacity;
        
        if (ht->keys[index] == key && !ht->deleted[index]) {
            return ht->values[index];
        }
        if (ht->keys[index] == -1 && !ht->deleted[index]) {
            return -1;
        }
    }
    return -1;
}
C++
class DoubleHashTable {
private:
    vector<int> keys;
    vector<int> values;
    vector<bool> deleted;
    int size;
    int capacity;
    
    int hash1(int key) {
        return ((key % capacity) + capacity) % capacity;
    }
    
    int hash2(int key) {
        int h = ((key % (capacity - 1)) + capacity - 1) % (capacity - 1);
        return h + 1;
    }
    
public:
    DoubleHashTable(int cap) : capacity(cap), size(0) {
        keys.resize(capacity, -1);
        values.resize(capacity, 0);
        deleted.resize(capacity, false);
    }
    
    void insert(int key, int value) {
        int h1 = hash1(key);
        int h2 = hash2(key);
        
        for (int i = 0; i < capacity; i++) {
            int index = (h1 + i * h2) % capacity;
            
            if (keys[index] == -1 || deleted[index]) {
                keys[index] = key;
                values[index] = value;
                deleted[index] = false;
                size++;
                return;
            }
            
            if (keys[index] == key) {
                values[index] = value;
                return;
            }
        }
    }
    
    int get(int key) {
        int h1 = hash1(key);
        int h2 = hash2(key);
        
        for (int i = 0; i < capacity; i++) {
            int index = (h1 + i * h2) % capacity;
            
            if (keys[index] == key && !deleted[index]) {
                return values[index];
            }
            if (keys[index] == -1 && !deleted[index]) {
                return -1;
            }
        }
        return -1;
    }
};
Python
class DoubleHashTable:
    def __init__(self, capacity):
        self.capacity = capacity
        self.size = 0
        self.keys = [-1] * capacity
        self.values = [0] * capacity
        self.deleted = [False] * capacity
    
    def _hash1(self, key):
        return key % self.capacity
    
    def _hash2(self, key):
        h = key % (self.capacity - 1)
        return h + 1
    
    def insert(self, key, value):
        h1 = self._hash1(key)
        h2 = self._hash2(key)
        
        for i in range(self.capacity):
            index = (h1 + i * h2) % self.capacity
            
            if self.keys[index] == -1 or self.deleted[index]:
                self.keys[index] = key
                self.values[index] = value
                self.deleted[index] = False
                self.size += 1
                return
            
            if self.keys[index] == key:
                self.values[index] = value
                return
    
    def get(self, key):
        h1 = self._hash1(key)
        h2 = self._hash2(key)
        
        for i in range(self.capacity):
            index = (h1 + i * h2) % self.capacity
            
            if self.keys[index] == key and not self.deleted[index]:
                return self.values[index]
            if self.keys[index] == -1 and not self.deleted[index]:
                return -1
        return -1
Java
public class DoubleHashTable {
    private int[] keys;
    private int[] values;
    private boolean[] deleted;
    private int size;
    private int capacity;
    
    public DoubleHashTable(int capacity) {
        this.capacity = capacity;
        this.size = 0;
        this.keys = new int[capacity];
        this.values = new int[capacity];
        this.deleted = new boolean[capacity];
        Arrays.fill(keys, -1);
    }
    
    private int hash1(int key) {
        return ((key % capacity) + capacity) % capacity;
    }
    
    private int hash2(int key) {
        int h = ((key % (capacity - 1)) + capacity - 1) % (capacity - 1);
        return h + 1;
    }
    
    public void insert(int key, int value) {
        int h1 = hash1(key);
        int h2 = hash2(key);
        
        for (int i = 0; i < capacity; i++) {
            int index = (h1 + i * h2) % capacity;
            
            if (keys[index] == -1 || deleted[index]) {
                keys[index] = key;
                values[index] = value;
                deleted[index] = false;
                size++;
                return;
            }
            
            if (keys[index] == key) {
                values[index] = value;
                return;
            }
        }
    }
    
    public int get(int key) {
        int h1 = hash1(key);
        int h2 = hash2(key);
        
        for (int i = 0; i < capacity; i++) {
            int index = (h1 + i * h2) % capacity;
            
            if (keys[index] == key && !deleted[index]) {
                return values[index];
            }
            if (keys[index] == -1 && !deleted[index]) {
                return -1;
            }
        }
        return -1;
    }
}
Go
type DoubleHashTable struct {
    keys     []int
    values   []int
    deleted  []bool
    size     int
    capacity int
}

func NewDoubleHashTable(capacity int) *DoubleHashTable {
    ht := &DoubleHashTable{
        keys:     make([]int, capacity),
        values:   make([]int, capacity),
        deleted:  make([]bool, capacity),
        size:     0,
        capacity: capacity,
    }
    for i := range ht.keys {
        ht.keys[i] = -1
    }
    return ht
}

func (ht *DoubleHashTable) hash1(key int) int {
    return ((key % ht.capacity) + ht.capacity) % ht.capacity
}

func (ht *DoubleHashTable) hash2(key int) int {
    h := ((key % (ht.capacity - 1)) + ht.capacity - 1) % (ht.capacity - 1)
    return h + 1
}

func (ht *DoubleHashTable) Insert(key, value int) {
    h1 := ht.hash1(key)
    h2 := ht.hash2(key)
    
    for i := 0; i < ht.capacity; i++ {
        index := (h1 + i*h2) % ht.capacity
        
        if ht.keys[index] == -1 || ht.deleted[index] {
            ht.keys[index] = key
            ht.values[index] = value
            ht.deleted[index] = false
            ht.size++
            return
        }
        
        if ht.keys[index] == key {
            ht.values[index] = value
            return
        }
    }
}

func (ht *DoubleHashTable) Get(key int) int {
    h1 := ht.hash1(key)
    h2 := ht.hash2(key)
    
    for i := 0; i < ht.capacity; i++ {
        index := (h1 + i*h2) % ht.capacity
        
        if ht.keys[index] == key && !ht.deleted[index] {
            return ht.values[index]
        }
        if ht.keys[index] == -1 && !ht.deleted[index] {
            return -1
        }
    }
    return -1
}
Rust
pub struct DoubleHashTable {
    keys: Vec<i32>,
    values: Vec<i32>,
    deleted: Vec<bool>,
    size: usize,
    capacity: usize,
}

impl DoubleHashTable {
    pub fn new(capacity: usize) -> Self {
        DoubleHashTable {
            keys: vec![-1; capacity],
            values: vec![0; capacity],
            deleted: vec![false; capacity],
            size: 0,
            capacity,
        }
    }
    
    fn hash1(&self, key: i32) -> usize {
        let cap = self.capacity as i32;
        (((key % cap) + cap) % cap) as usize
    }
    
    fn hash2(&self, key: i32) -> usize {
        let cap = self.capacity as i32;
        let h = ((key % (cap - 1)) + cap - 1) % (cap - 1);
        (h + 1) as usize
    }
    
    pub fn insert(&mut self, key: i32, value: i32) {
        let h1 = self.hash1(key);
        let h2 = self.hash2(key);
        
        for i in 0..self.capacity {
            let index = (h1 + i * h2) % self.capacity;
            
            if self.keys[index] == -1 || self.deleted[index] {
                self.keys[index] = key;
                self.values[index] = value;
                self.deleted[index] = false;
                self.size += 1;
                return;
            }
            
            if self.keys[index] == key {
                self.values[index] = value;
                return;
            }
        }
    }
    
    pub fn get(&self, key: i32) -> Option<i32> {
        let h1 = self.hash1(key);
        let h2 = self.hash2(key);
        
        for i in 0..self.capacity {
            let index = (h1 + i * h2) % self.capacity;
            
            if self.keys[index] == key && !self.deleted[index] {
                return Some(self.values[index]);
            }
            if self.keys[index] == -1 && !self.deleted[index] {
                return None;
            }
        }
        None
    }
}

一致性哈希

一致性哈希将哈希值空间组织成环状结构,实现负载均衡和数据分布。

C
typedef struct {
    int node;
    int position;
} VirtualNode;

typedef struct {
    VirtualNode *nodes;
    int count;
    int capacity;
} ConsistentHash;

int compareVirtualNodes(const void *a, const void *b) {
    return ((VirtualNode*)a)->position - ((VirtualNode*)b)->position;
}

ConsistentHash* createConsistentHash(int *realNodes, int realCount, int virtualCount) {
    ConsistentHash *ch = (ConsistentHash*)malloc(sizeof(ConsistentHash));
    ch->capacity = realCount * virtualCount;
    ch->count = ch->capacity;
    ch->nodes = (VirtualNode*)malloc(sizeof(VirtualNode) * ch->capacity);
    
    int index = 0;
    for (int i = 0; i < realCount; i++) {
        for (int j = 0; j < virtualCount; j++) {
            int virtualKey = realNodes[i] * 1000 + j;
            ch->nodes[index].node = realNodes[i];
            ch->nodes[index].position = hash(virtualKey, 1000000);
            index++;
        }
    }
    
    qsort(ch->nodes, ch->count, sizeof(VirtualNode), compareVirtualNodes);
    return ch;
}

int getNode(ConsistentHash *ch, int key) {
    int position = hash(key, 1000000);
    
    int left = 0, right = ch->count;
    while (left < right) {
        int mid = (left + right) / 2;
        if (ch->nodes[mid].position < position) {
            left = mid + 1;
        } else {
            right = mid;
        }
    }
    
    if (left >= ch->count) left = 0;
    return ch->nodes[left].node;
}
C++
class ConsistentHash {
private:
    struct VirtualNode {
        int node;
        int position;
        bool operator<(const VirtualNode& other) const {
            return position < other.position;
        }
    };
    
    vector<VirtualNode> nodes;
    int hashSpace;
    
    int hash(int key) {
        return ((key % hashSpace) + hashSpace) % hashSpace;
    }
    
public:
    ConsistentHash(int hashSpace = 1000000) : hashSpace(hashSpace) {}
    
    void addNode(int node, int virtualCount) {
        for (int i = 0; i < virtualCount; i++) {
            int virtualKey = node * 1000 + i;
            nodes.push_back({node, hash(virtualKey)});
        }
        sort(nodes.begin(), nodes.end());
    }
    
    int getNode(int key) {
        if (nodes.empty()) return -1;
        
        int position = hash(key);
        auto it = lower_bound(nodes.begin(), nodes.end(), 
                              VirtualNode{-1, position});
        
        if (it == nodes.end()) {
            return nodes[0].node;
        }
        return it->node;
    }
    
    void removeNode(int node) {
        nodes.erase(
            remove_if(nodes.begin(), nodes.end(),
                [node](const VirtualNode& vn) { return vn.node == node; }),
            nodes.end()
        );
    }
};
Python
import bisect

class ConsistentHash:
    def __init__(self, hash_space=1000000):
        self.hash_space = hash_space
        self.nodes = []
        self.node_positions = {}
    
    def _hash(self, key):
        return key % self.hash_space
    
    def add_node(self, node, virtual_count=100):
        for i in range(virtual_count):
            virtual_key = node * 1000 + i
            position = self._hash(virtual_key)
            self.nodes.append((position, node))
        
        self.nodes.sort()
        self.node_positions[node] = virtual_count
    
    def remove_node(self, node):
        self.nodes = [(pos, n) for pos, n in self.nodes if n != node]
        if node in self.node_positions:
            del self.node_positions[node]
    
    def get_node(self, key):
        if not self.nodes:
            return None
        
        position = self._hash(key)
        index = bisect.bisect_left(self.nodes, (position, -1))
        
        if index >= len(self.nodes):
            index = 0
        
        return self.nodes[index][1]
Java
import java.util.*;

public class ConsistentHash {
    private static class VirtualNode implements Comparable<VirtualNode> {
        int node;
        int position;
        
        VirtualNode(int node, int position) {
            this.node = node;
            this.position = position;
        }
        
        @Override
        public int compareTo(VirtualNode other) {
            return Integer.compare(this.position, other.position);
        }
    }
    
    private List<VirtualNode> nodes;
    private int hashSpace;
    
    public ConsistentHash(int hashSpace) {
        this.nodes = new ArrayList<>();
        this.hashSpace = hashSpace;
    }
    
    private int hash(int key) {
        return ((key % hashSpace) + hashSpace) % hashSpace;
    }
    
    public void addNode(int node, int virtualCount) {
        for (int i = 0; i < virtualCount; i++) {
            int virtualKey = node * 1000 + i;
            nodes.add(new VirtualNode(node, hash(virtualKey)));
        }
        Collections.sort(nodes);
    }
    
    public int getNode(int key) {
        if (nodes.isEmpty()) return -1;
        
        int position = hash(key);
        int index = Collections.binarySearch(
            nodes, new VirtualNode(-1, position));
        
        if (index < 0) {
            index = -index - 1;
        }
        
        if (index >= nodes.size()) {
            index = 0;
        }
        
        return nodes.get(index).node;
    }
}
Go
import "sort"

type VirtualNode struct {
    node     int
    position int
}

type ConsistentHash struct {
    nodes     []VirtualNode
    hashSpace int
}

func NewConsistentHash(hashSpace int) *ConsistentHash {
    return &ConsistentHash{
        nodes:     make([]VirtualNode, 0),
        hashSpace: hashSpace,
    }
}

func (ch *ConsistentHash) hash(key int) int {
    return ((key % ch.hashSpace) + ch.hashSpace) % ch.hashSpace
}

func (ch *ConsistentHash) AddNode(node, virtualCount int) {
    for i := 0; i < virtualCount; i++ {
        virtualKey := node*1000 + i
        ch.nodes = append(ch.nodes, VirtualNode{
            node:     node,
            position: ch.hash(virtualKey),
        })
    }
    sort.Slice(ch.nodes, func(i, j int) bool {
        return ch.nodes[i].position < ch.nodes[j].position
    })
}

func (ch *ConsistentHash) GetNode(key int) int {
    if len(ch.nodes) == 0 {
        return -1
    }
    
    position := ch.hash(key)
    index := sort.Search(len(ch.nodes), func(i int) bool {
        return ch.nodes[i].position >= position
    })
    
    if index >= len(ch.nodes) {
        index = 0
    }
    
    return ch.nodes[index].node
}
Rust
use std::cmp::Ordering;

#[derive(Debug, Clone)]
struct VirtualNode {
    node: i32,
    position: i32,
}

impl PartialEq for VirtualNode {
    fn eq(&self, other: &Self) -> bool {
        self.position == other.position
    }
}

impl Eq for VirtualNode {}

impl PartialOrd for VirtualNode {
    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
        Some(self.position.cmp(&other.position))
    }
}

impl Ord for VirtualNode {
    fn cmp(&self, other: &Self) -> Ordering {
        self.position.cmp(&other.position)
    }
}

pub struct ConsistentHash {
    nodes: Vec<VirtualNode>,
    hash_space: i32,
}

impl ConsistentHash {
    pub fn new(hash_space: i32) -> Self {
        ConsistentHash {
            nodes: Vec::new(),
            hash_space,
        }
    }
    
    fn hash(&self, key: i32) -> i32 {
        ((key % self.hash_space) + self.hash_space) % self.hash_space
    }
    
    pub fn add_node(&mut self, node: i32, virtual_count: usize) {
        for i in 0..virtual_count {
            let virtual_key = node * 1000 + i as i32;
            self.nodes.push(VirtualNode {
                node,
                position: self.hash(virtual_key),
            });
        }
        self.nodes.sort();
    }
    
    pub fn get_node(&self, key: i32) -> Option<i32> {
        if self.nodes.is_empty() {
            return None;
        }
        
        let position = self.hash(key);
        let index = self.nodes.binary_search_by(|node| {
            node.position.cmp(&position)
        });
        
        let idx = match index {
            Ok(i) => i,
            Err(i) => {
                if i >= self.nodes.len() {
                    0
                } else {
                    i
                }
            }
        };
        
        Some(self.nodes[idx].node)
    }
}

再哈希

再哈希在负载因子过高时重新分配所有元素到更大的哈希表中。

C
typedef struct {
    int **tables;
    int *sizes;
    int *capacities;
    int tableCount;
} RehashTable;

void rehash(RehashTable *rt, int tableIndex) {
    int oldCapacity = rt->capacities[tableIndex];
    int newCapacity = oldCapacity * 2;
    
    int *newKeys = (int*)calloc(newCapacity, sizeof(int));
    int *newValues = (int*)calloc(newCapacity, sizeof(int));
    
    for (int i = 0; i < newCapacity; i++) {
        newKeys[i] = -1;
    }
    
    for (int i = 0; i < oldCapacity; i++) {
        if (rt->tables[tableIndex][i] != -1) {
            int key = rt->tables[tableIndex][i];
            int newIndex = hash(key, newCapacity);
            while (newKeys[newIndex] != -1) {
                newIndex = (newIndex + 1) % newCapacity;
            }
            newKeys[newIndex] = key;
        }
    }
    
    free(rt->tables[tableIndex]);
    rt->tables[tableIndex] = newKeys;
    rt->capacities[tableIndex] = newCapacity;
}
C++
class RehashTable {
private:
    vector<vector<int>> tables;
    vector<int> sizes;
    double maxLoadFactor;
    
    int hash(int key, int capacity) {
        return ((key % capacity) + capacity) % capacity;
    }
    
    void rehash(int tableIndex) {
        int oldCapacity = tables[tableIndex].size();
        int newCapacity = oldCapacity * 2;
        
        vector<int> newTable(newCapacity, -1);
        
        for (int key : tables[tableIndex]) {
            if (key != -1) {
                int newIndex = hash(key, newCapacity);
                while (newTable[newIndex] != -1) {
                    newIndex = (newIndex + 1) % newCapacity;
                }
                newTable[newIndex] = key;
            }
        }
        
        tables[tableIndex] = move(newTable);
    }
    
public:
    RehashTable(int tableCount, int initialCapacity, double maxLF = 0.75)
        : maxLoadFactor(maxLF) {
        tables.resize(tableCount, vector<int>(initialCapacity, -1));
        sizes.resize(tableCount, 0);
    }
    
    void insert(int tableIndex, int key) {
        int capacity = tables[tableIndex].size();
        if ((double)(sizes[tableIndex] + 1) / capacity > maxLoadFactor) {
            rehash(tableIndex);
        }
        
        capacity = tables[tableIndex].size();
        int index = hash(key, capacity);
        
        while (tables[tableIndex][index] != -1 && 
               tables[tableIndex][index] != key) {
            index = (index + 1) % capacity;
        }
        
        if (tables[tableIndex][index] == -1) {
            tables[tableIndex][index] = key;
            sizes[tableIndex]++;
        }
    }
};
Python
class RehashTable:
    def __init__(self, table_count, initial_capacity, max_load_factor=0.75):
        self.tables = [[-1] * initial_capacity for _ in range(table_count)]
        self.sizes = [0] * table_count
        self.max_load_factor = max_load_factor
    
    def _hash(self, key, capacity):
        return key % capacity
    
    def _rehash(self, table_index):
        old_table = self.tables[table_index]
        new_capacity = len(old_table) * 2
        new_table = [-1] * new_capacity
        
        for key in old_table:
            if key != -1:
                index = self._hash(key, new_capacity)
                while new_table[index] != -1:
                    index = (index + 1) % new_capacity
                new_table[index] = key
        
        self.tables[table_index] = new_table
    
    def insert(self, table_index, key):
        capacity = len(self.tables[table_index])
        load_factor = (self.sizes[table_index] + 1) / capacity
        
        if load_factor > self.max_load_factor:
            self._rehash(table_index)
        
        capacity = len(self.tables[table_index])
        index = self._hash(key, capacity)
        
        while self.tables[table_index][index] != -1 and \
              self.tables[table_index][index] != key:
            index = (index + 1) % capacity
        
        if self.tables[table_index][index] == -1:
            self.tables[table_index][index] = key
            self.sizes[table_index] += 1
Java
public class RehashTable {
    private int[][] tables;
    private int[] sizes;
    private double maxLoadFactor;
    
    public RehashTable(int tableCount, int initialCapacity, double maxLF) {
        this.tables = new int[tableCount][initialCapacity];
        this.sizes = new int[tableCount];
        this.maxLoadFactor = maxLF;
        
        for (int i = 0; i < tableCount; i++) {
            Arrays.fill(tables[i], -1);
        }
    }
    
    private int hash(int key, int capacity) {
        return ((key % capacity) + capacity) % capacity;
    }
    
    private void rehash(int tableIndex) {
        int[] oldTable = tables[tableIndex];
        int newCapacity = oldTable.length * 2;
        int[] newTable = new int[newCapacity];
        Arrays.fill(newTable, -1);
        
        for (int key : oldTable) {
            if (key != -1) {
                int index = hash(key, newCapacity);
                while (newTable[index] != -1) {
                    index = (index + 1) % newCapacity;
                }
                newTable[index] = key;
            }
        }
        
        tables[tableIndex] = newTable;
    }
    
    public void insert(int tableIndex, int key) {
        int capacity = tables[tableIndex].length;
        double loadFactor = (double)(sizes[tableIndex] + 1) / capacity;
        
        if (loadFactor > maxLoadFactor) {
            rehash(tableIndex);
        }
        
        capacity = tables[tableIndex].length;
        int index = hash(key, capacity);
        
        while (tables[tableIndex][index] != -1 && 
               tables[tableIndex][index] != key) {
            index = (index + 1) % capacity;
        }
        
        if (tables[tableIndex][index] == -1) {
            tables[tableIndex][index] = key;
            sizes[tableIndex]++;
        }
    }
}
Go
type RehashTable struct {
    tables        [][]int
    sizes         []int
    maxLoadFactor float64
}

func NewRehashTable(tableCount, initialCapacity int, maxLF float64) *RehashTable {
    tables := make([][]int, tableCount)
    for i := range tables {
        tables[i] = make([]int, initialCapacity)
        for j := range tables[i] {
            tables[i][j] = -1
        }
    }
    
    return &RehashTable{
        tables:        tables,
        sizes:         make([]int, tableCount),
        maxLoadFactor: maxLF,
    }
}

func (rt *RehashTable) hash(key, capacity int) int {
    return ((key % capacity) + capacity) % capacity
}

func (rt *RehashTable) rehash(tableIndex int) {
    oldTable := rt.tables[tableIndex]
    newCapacity := len(oldTable) * 2
    newTable := make([]int, newCapacity)
    for i := range newTable {
        newTable[i] = -1
    }
    
    for _, key := range oldTable {
        if key != -1 {
            index := rt.hash(key, newCapacity)
            for newTable[index] != -1 {
                index = (index + 1) % newCapacity
            }
            newTable[index] = key
        }
    }
    
    rt.tables[tableIndex] = newTable
}

func (rt *RehashTable) Insert(tableIndex, key int) {
    capacity := len(rt.tables[tableIndex])
    loadFactor := float64(rt.sizes[tableIndex]+1) / float64(capacity)
    
    if loadFactor > rt.maxLoadFactor {
        rt.rehash(tableIndex)
    }
    
    capacity = len(rt.tables[tableIndex])
    index := rt.hash(key, capacity)
    
    for rt.tables[tableIndex][index] != -1 && 
        rt.tables[tableIndex][index] != key {
        index = (index + 1) % capacity
    }
    
    if rt.tables[tableIndex][index] == -1 {
        rt.tables[tableIndex][index] = key
        rt.sizes[tableIndex]++
    }
}
Rust
pub struct RehashTable {
    tables: Vec<Vec<i32>>,
    sizes: Vec<usize>,
    max_load_factor: f64,
}

impl RehashTable {
    pub fn new(table_count: usize, initial_capacity: usize, max_lf: f64) -> Self {
        let tables = vec![vec![-1; initial_capacity]; table_count];
        RehashTable {
            tables,
            sizes: vec![0; table_count],
            max_load_factor: max_lf,
        }
    }
    
    fn hash(&self, key: i32, capacity: usize) -> usize {
        let cap = capacity as i32;
        (((key % cap) + cap) % cap) as usize
    }
    
    fn rehash(&mut self, table_index: usize) {
        let old_table = self.tables[table_index].clone();
        let new_capacity = old_table.len() * 2;
        let mut new_table = vec![-1; new_capacity];
        
        for key in old_table {
            if key != -1 {
                let mut index = self.hash(key, new_capacity);
                while new_table[index] != -1 {
                    index = (index + 1) % new_capacity;
                }
                new_table[index] = key;
            }
        }
        
        self.tables[table_index] = new_table;
    }
    
    pub fn insert(&mut self, table_index: usize, key: i32) {
        let capacity = self.tables[table_index].len();
        let load_factor = (self.sizes[table_index] + 1) as f64 / capacity as f64;
        
        if load_factor > self.max_load_factor {
            self.rehash(table_index);
        }
        
        let capacity = self.tables[table_index].len();
        let mut index = self.hash(key, capacity);
        
        while self.tables[table_index][index] != -1 && 
              self.tables[table_index][index] != key {
            index = (index + 1) % capacity;
        }
        
        if self.tables[table_index][index] == -1 {
            self.tables[table_index][index] = key;
            self.sizes[table_index] += 1;
        }
    }
}

Cuckoo哈希

Cuckoo哈希使用多个哈希函数,保证最坏情况下O(1)的查找时间。

C
typedef struct {
    int *table1;
    int *table2;
    int capacity;
    int size;
} CuckooHashTable;

int cuckooHash1(int key, int capacity) {
    return (key % capacity + capacity) % capacity;
}

int cuckooHash2(int key, int capacity) {
    return ((key / capacity) % capacity + capacity) % capacity;
}

void cuckooInsert(CuckooHashTable *cht, int key, int maxLoop) {
    int currKey = key;
    int table = 0;
    
    for (int i = 0; i < maxLoop; i++) {
        int *currTable = (table == 0) ? cht->table1 : cht->table2;
        int index = (table == 0) ? cuckooHash1(currKey, cht->capacity) 
                                 : cuckooHash2(currKey, cht->capacity);
        
        if (currTable[index] == -1) {
            currTable[index] = currKey;
            cht->size++;
            return;
        }
        
        int temp = currTable[index];
        currTable[index] = currKey;
        currKey = temp;
        table = 1 - table;
    }
}
C++
class CuckooHashTable {
private:
    vector<int> table1;
    vector<int> table2;
    int capacity;
    int size;
    
    int hash1(int key) {
        return ((key % capacity) + capacity) % capacity;
    }
    
    int hash2(int key) {
        return (((key / capacity) % capacity) + capacity) % capacity;
    }
    
public:
    CuckooHashTable(int cap) : capacity(cap), size(0) {
        table1.resize(capacity, -1);
        table2.resize(capacity, -1);
    }
    
    bool insert(int key, int maxLoop = 100) {
        int currKey = key;
        int table = 0;
        
        for (int i = 0; i < maxLoop; i++) {
            int index = (table == 0) ? hash1(currKey) : hash2(currKey);
            vector<int>& currTable = (table == 0) ? table1 : table2;
            
            if (currTable[index] == -1) {
                currTable[index] = currKey;
                size++;
                return true;
            }
            
            swap(currTable[index], currKey);
            table = 1 - table;
        }
        
        return false;
    }
    
    bool contains(int key) {
        int idx1 = hash1(key);
        int idx2 = hash2(key);
        return table1[idx1] == key || table2[idx2] == key;
    }
    
    void remove(int key) {
        int idx1 = hash1(key);
        int idx2 = hash2(key);
        
        if (table1[idx1] == key) {
            table1[idx1] = -1;
            size--;
        } else if (table2[idx2] == key) {
            table2[idx2] = -1;
            size--;
        }
    }
};
Python
class CuckooHashTable:
    def __init__(self, capacity):
        self.capacity = capacity
        self.size = 0
        self.table1 = [-1] * capacity
        self.table2 = [-1] * capacity
    
    def _hash1(self, key):
        return key % self.capacity
    
    def _hash2(self, key):
        return (key // self.capacity) % self.capacity
    
    def insert(self, key, max_loop=100):
        curr_key = key
        table = 0
        
        for _ in range(max_loop):
            if table == 0:
                index = self._hash1(curr_key)
                curr_table = self.table1
            else:
                index = self._hash2(curr_key)
                curr_table = self.table2
            
            if curr_table[index] == -1:
                curr_table[index] = curr_key
                self.size += 1
                return True
            
            curr_table[index], curr_key = curr_key, curr_table[index]
            table = 1 - table
        
        return False
    
    def contains(self, key):
        return (self.table1[self._hash1(key)] == key or 
                self.table2[self._hash2(key)] == key)
    
    def remove(self, key):
        idx1 = self._hash1(key)
        idx2 = self._hash2(key)
        
        if self.table1[idx1] == key:
            self.table1[idx1] = -1
            self.size -= 1
        elif self.table2[idx2] == key:
            self.table2[idx2] = -1
            self.size -= 1
Java
public class CuckooHashTable {
    private int[] table1;
    private int[] table2;
    private int capacity;
    private int size;
    
    public CuckooHashTable(int capacity) {
        this.capacity = capacity;
        this.size = 0;
        this.table1 = new int[capacity];
        this.table2 = new int[capacity];
        Arrays.fill(table1, -1);
        Arrays.fill(table2, -1);
    }
    
    private int hash1(int key) {
        return ((key % capacity) + capacity) % capacity;
    }
    
    private int hash2(int key) {
        return (((key / capacity) % capacity) + capacity) % capacity;
    }
    
    public boolean insert(int key, int maxLoop) {
        int currKey = key;
        int table = 0;
        
        for (int i = 0; i < maxLoop; i++) {
            int[] currTable = (table == 0) ? table1 : table2;
            int index = (table == 0) ? hash1(currKey) : hash2(currKey);
            
            if (currTable[index] == -1) {
                currTable[index] = currKey;
                size++;
                return true;
            }
            
            int temp = currTable[index];
            currTable[index] = currKey;
            currKey = temp;
            table = 1 - table;
        }
        
        return false;
    }
    
    public boolean contains(int key) {
        return table1[hash1(key)] == key || table2[hash2(key)] == key;
    }
    
    public void remove(int key) {
        int idx1 = hash1(key);
        int idx2 = hash2(key);
        
        if (table1[idx1] == key) {
            table1[idx1] = -1;
            size--;
        } else if (table2[idx2] == key) {
            table2[idx2] = -1;
            size--;
        }
    }
}
Go
type CuckooHashTable struct {
    table1   []int
    table2   []int
    capacity int
    size     int
}

func NewCuckooHashTable(capacity int) *CuckooHashTable {
    return &CuckooHashTable{
        table1:   make([]int, capacity),
        table2:   make([]int, capacity),
        capacity: capacity,
        size:     0,
    }
}

func (cht *CuckooHashTable) hash1(key int) int {
    return ((key % cht.capacity) + cht.capacity) % cht.capacity
}

func (cht *CuckooHashTable) hash2(key int) int {
    return (((key / cht.capacity) % cht.capacity) + cht.capacity) % cht.capacity
}

func (cht *CuckooHashTable) Insert(key int, maxLoop int) bool {
    currKey := key
    table := 0

    for i := 0; i < maxLoop; i++ {
        var currTable []int
        var index int
        if table == 0 {
            currTable = cht.table1
            index = cht.hash1(currKey)
        } else {
            currTable = cht.table2
            index = cht.hash2(currKey)
        }

        if currTable[index] == -1 {
            currTable[index] = currKey
            cht.size++
            return true
        }

        currTable[index], currKey = currKey, currTable[index]
        table = 1 - table
    }

    return false
}

func (cht *CuckooHashTable) Contains(key int) bool {
    return cht.table1[cht.hash1(key)] == key || cht.table2[cht.hash2(key)] == key
}

func (cht *CuckooHashTable) Remove(key int) {
    idx1 := cht.hash1(key)
    idx2 := cht.hash2(key)

    if cht.table1[idx1] == key {
        cht.table1[idx1] = -1
        cht.size--
    } else if cht.table2[idx2] == key {
        cht.table2[idx2] = -1
        cht.size--
    }
}
Rust
pub struct CuckooHashTable {
    table1: Vec<i32>,
    table2: Vec<i32>,
    capacity: usize,
    size: usize,
}

impl CuckooHashTable {
    pub fn new(capacity: usize) -> Self {
        CuckooHashTable {
            table1: vec![-1; capacity],
            table2: vec![-1; capacity],
            capacity,
            size: 0,
        }
    }
    
    fn hash1(&self, key: i32) -> usize {
        let cap = self.capacity as i32;
        (((key % cap) + cap) % cap) as usize
    }
    
    fn hash2(&self, key: i32) -> usize {
        let cap = self.capacity as i32;
        (((key / cap) % cap + cap) % cap) as usize
    }
    
    pub fn insert(&mut self, key: i32, max_loop: usize) -> bool {
        let mut curr_key = key;
        let mut table = 0;
        
        for _ in 0..max_loop {
            let index = if table == 0 {
                self.hash1(curr_key)
            } else {
                self.hash2(curr_key)
            };
            
            let curr_table = if table == 0 {
                &mut self.table1
            } else {
                &mut self.table2
            };
            
            if curr_table[index] == -1 {
                curr_table[index] = curr_key;
                self.size += 1;
                return true;
            }
            
            let temp = curr_table[index];
            curr_table[index] = curr_key;
            curr_key = temp;
            table = 1 - table;
        }
        
        false
    }
    
    pub fn contains(&self, key: i32) -> bool {
        self.table1[self.hash1(key)] == key || self.table2[self.hash2(key)] == key
    }
    
    pub fn remove(&mut self, key: i32) {
        let idx1 = self.hash1(key);
        let idx2 = self.hash2(key);
        
        if self.table1[idx1] == key {
            self.table1[idx1] = -1;
            self.size -= 1;
        } else if self.table2[idx2] == key {
            self.table2[idx2] = -1;
            self.size -= 1;
        }
    }
}

哈希函数选择

哈希函数 特点 应用
取模法 简单 整数
乘数法 分布均匀 整数
FNV 快速 字符串
MurmurHash 高质量 通用
CityHash 高性能 字符串

冲突解决方法比较

方法 查找 插入 删除 空间
链地址法 O(1+α) O(1) O(1) O(n)
线性探测 O(1/(1-α)) O(1/(1-α)) 复杂 O(n)
二次探测 O(1/(1-α)) O(1/(1-α)) 复杂 O(n)
双重哈希 O(1/(1-α)) O(1/(1-α)) 复杂 O(n)

应用场景

  1. 数据库索引:哈希索引
  2. 缓存系统:键值存储
  3. 分布式系统:一致性哈希
  4. 负载均衡:请求分发

参考资料

  • 《算法导论》第11章
  • 《数据结构与算法分析》第5章