跳转至

哈希查找

概述

哈希查找是基于哈希表的数据查找方法,通过哈希函数将关键字映射到存储位置,实现O(1)平均时间复杂度的查找操作。

核心思想

哈希查找的核心思想是直接寻址:通过哈希函数计算关键字对应的存储位置,直接访问目标数据。

哈希查找特点

  • 查找速度极快:平均O(1)时间复杂度
  • 需要额外空间:存储哈希表
  • 存在哈希冲突:需要冲突解决策略
  • 不支持范围查询:只适合精确查找

哈希函数设计

常用哈希函数

C
// 除法哈希
unsigned int hashDiv(int key, int tableSize) {
    return key % tableSize;
}

// 乘法哈希
unsigned int hashMul(int key, int tableSize) {
    const double A = 0.6180339887;
    double frac = key * A - (int)(key * A);
    return (unsigned int)(tableSize * frac);
}

// 字符串哈希
unsigned int hashString(char *str, int tableSize) {
    unsigned int hash = 0;
    while (*str) {
        hash = hash * 31 + *str++;
    }
    return hash % tableSize;
}

// FNV哈希
unsigned int hashFNV(char *str, int tableSize) {
    unsigned int hash = 2166136261u;
    while (*str) {
        hash ^= *str++;
        hash *= 16777619;
    }
    return hash % tableSize;
}

// MurmurHash3
unsigned int hashMurmur3(int key, int tableSize) {
    key ^= key >> 16;
    key *= 0x85ebca6b;
    key ^= key >> 13;
    key *= 0xc2b2ae35;
    key ^= key >> 16;
    return key % tableSize;
}
C++
#include <string>
#include <functional>
using namespace std;

// 除法哈希
size_t hashDiv(int key, size_t tableSize) {
    return key % tableSize;
}

// 乘法哈希
size_t hashMul(int key, size_t tableSize) {
    const double A = 0.6180339887;
    double frac = key * A - static_cast<int>(key * A);
    return static_cast<size_t>(tableSize * frac);
}

// 字符串哈希
size_t hashString(const string& str, size_t tableSize) {
    size_t hash = 0;
    for (char c : str) {
        hash = hash * 31 + c;
    }
    return hash % tableSize;
}

// 使用标准库哈希
size_t hashStandard(const string& key, size_t tableSize) {
    return hash<string>{}(key) % tableSize;
}
Python
def hash_div(key, table_size):
    """除法哈希"""
    return key % table_size

def hash_mul(key, table_size):
    """乘法哈希"""
    A = 0.6180339887
    frac = (key * A) % 1
    return int(table_size * frac)

def hash_string(s, table_size):
    """字符串哈希"""
    hash_val = 0
    for c in s:
        hash_val = hash_val * 31 + ord(c)
    return hash_val % table_size

def hash_fnv(s, table_size):
    """FNV哈希"""
    hash_val = 2166136261
    for c in s:
        hash_val ^= ord(c)
        hash_val *= 16777619
        hash_val &= 0xffffffff  # 保持32位
    return hash_val % table_size

# 使用内置hash函数
def hash_builtin(key, table_size):
    return hash(key) % table_size
Java
public class HashFunctions {
    // 除法哈希
    public static int hashDiv(int key, int tableSize) {
        return Math.abs(key % tableSize);
    }
    
    // 乘法哈希
    public static int hashMul(int key, int tableSize) {
        final double A = 0.6180339887;
        double frac = key * A - (int)(key * A);
        return (int)(tableSize * frac);
    }
    
    // 字符串哈希
    public static int hashString(String str, int tableSize) {
        int hash = 0;
        for (int i = 0; i < str.length(); i++) {
            hash = hash * 31 + str.charAt(i);
        }
        return Math.abs(hash % tableSize);
    }
    
    // 使用Object.hashCode()
    public static int hashObject(Object key, int tableSize) {
        return Math.abs(key.hashCode() % tableSize);
    }
}
Go
import "hash/fnv"

// 除法哈希
func hashDiv(key int, tableSize int) int {
    return key % tableSize
}

// 乘法哈希
func hashMul(key int, tableSize int) int {
    A := 0.6180339887
    frac := float64(key)*A - float64(int(float64(key)*A))
    return int(float64(tableSize) * frac)
}

// 字符串哈希
func hashString(s string, tableSize int) int {
    hash := 0
    for _, c := range s {
        hash = hash*31 + int(c)
    }
    if hash < 0 {
        hash = -hash
    }
    return hash % tableSize
}

// 使用FNV哈希
func hashFNV(s string, tableSize int) int {
    h := fnv.New32a()
    h.Write([]byte(s))
    return int(h.Sum32()) % tableSize
}
Rust
use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};

// 除法哈希
fn hash_div(key: i32, table_size: usize) -> usize {
    (key.abs() as usize) % table_size
}

// 乘法哈希
fn hash_mul(key: i32, table_size: usize) -> usize {
    const A: f64 = 0.6180339887;
    let frac = (key as f64 * A).fract();
    (table_size as f64 * frac) as usize
}

// 字符串哈希
fn hash_string(s: &str, table_size: usize) -> usize {
    let mut hash: usize = 0;
    for c in s.chars() {
        hash = hash.wrapping_mul(31).wrapping_add(c as usize);
    }
    hash % table_size
}

// 使用标准库哈希
fn hash_standard<T: Hash>(key: &T, table_size: usize) -> usize {
    let mut hasher = DefaultHasher::new();
    key.hash(&mut hasher);
    (hasher.finish() as usize) % table_size
}

冲突解决方法

1. 开放定址法

线性探测

C
#define DELETED -1
#define EMPTY 0
#define OCCUPIED 1

typedef struct {
    int key;
    int value;
    int state;
} HashEntry;

typedef struct {
    HashEntry *table;
    int size;
    int count;
} LinearProbingHashTable;

LinearProbingHashTable* createLPHashTable(int size) {
    LinearProbingHashTable *ht = (LinearProbingHashTable*)malloc(sizeof(LinearProbingHashTable));
    ht->table = (HashEntry*)calloc(size, sizeof(HashEntry));
    ht->size = size;
    ht->count = 0;
    
    for (int i = 0; i < size; i++) {
        ht->table[i].state = EMPTY;
    }
    
    return ht;
}

int lpHashSearch(LinearProbingHashTable *ht, int key) {
    unsigned int pos = key % ht->size;
    unsigned int startPos = pos;
    
    do {
        if (ht->table[pos].state == EMPTY) {
            return -1;
        }
        
        if (ht->table[pos].state == OCCUPIED && ht->table[pos].key == key) {
            return pos;
        }
        
        pos = (pos + 1) % ht->size;
    } while (pos != startPos);
    
    return -1;
}

int lpHashInsert(LinearProbingHashTable *ht, int key, int value) {
    if (ht->count >= ht->size * 0.75) {
        return -1;
    }
    
    unsigned int pos = key % ht->size;
    
    while (ht->table[pos].state == OCCUPIED) {
        if (ht->table[pos].key == key) {
            ht->table[pos].value = value;
            return pos;
        }
        pos = (pos + 1) % ht->size;
    }
    
    ht->table[pos].key = key;
    ht->table[pos].value = value;
    ht->table[pos].state = OCCUPIED;
    ht->count++;
    
    return pos;
}

void lpHashDelete(LinearProbingHashTable *ht, int key) {
    int pos = lpHashSearch(ht, key);
    if (pos != -1) {
        ht->table[pos].state = DELETED;
        ht->count--;
    }
}
C++
#include <vector>
using namespace std;

enum State { EMPTY, OCCUPIED, DELETED };

struct HashEntry {
    int key;
    int value;
    State state;
};

class LinearProbingHashTable {
private:
    vector<HashEntry> table;
    size_t count;
    
public:
    LinearProbingHashTable(size_t size) : table(size), count(0) {
        for (auto& entry : table) {
            entry.state = EMPTY;
        }
    }
    
    int search(int key) {
        size_t pos = key % table.size();
        size_t startPos = pos;
        
        do {
            if (table[pos].state == EMPTY) return -1;
            if (table[pos].state == OCCUPIED && table[pos].key == key) {
                return pos;
            }
            pos = (pos + 1) % table.size();
        } while (pos != startPos);
        
        return -1;
    }
    
    bool insert(int key, int value) {
        if (count >= table.size() * 0.75) return false;
        
        size_t pos = key % table.size();
        while (table[pos].state == OCCUPIED) {
            if (table[pos].key == key) {
                table[pos].value = value;
                return true;
            }
            pos = (pos + 1) % table.size();
        }
        
        table[pos] = {key, value, OCCUPIED};
        count++;
        return true;
    }
};
Python
class LinearProbingHashTable:
    def __init__(self, size):
        self.size = size
        self.keys = [None] * size
        self.values = [None] * size
        self.count = 0
    
    def search(self, key):
        pos = key % self.size
        start_pos = pos
        
        while self.keys[pos] is not None:
            if self.keys[pos] == key:
                return pos
            pos = (pos + 1) % self.size
            if pos == start_pos:
                break
        
        return -1
    
    def insert(self, key, value):
        if self.count >= self.size * 0.75:
            return False
        
        pos = key % self.size
        while self.keys[pos] is not None:
            if self.keys[pos] == key:
                self.values[pos] = value
                return True
            pos = (pos + 1) % self.size
        
        self.keys[pos] = key
        self.values[pos] = value
        self.count += 1
        return True
    
    def delete(self, key):
        pos = self.search(key)
        if pos != -1:
            self.keys[pos] = None  # 标记为删除
            self.count -= 1
            return True
        return False
Java
public class LinearProbingHashTable {
    private Integer[] keys;
    private Integer[] values;
    private int size;
    private int count;
    
    public LinearProbingHashTable(int size) {
        this.size = size;
        this.keys = new Integer[size];
        this.values = new Integer[size];
        this.count = 0;
    }
    
    public int search(int key) {
        int pos = key % size;
        int startPos = pos;
        
        do {
            if (keys[pos] == null) return -1;
            if (keys[pos] == key) return pos;
            pos = (pos + 1) % size;
        } while (pos != startPos);
        
        return -1;
    }
    
    public boolean insert(int key, int value) {
        if (count >= size * 0.75) return false;
        
        int pos = key % size;
        while (keys[pos] != null) {
            if (keys[pos] == key) {
                values[pos] = value;
                return true;
            }
            pos = (pos + 1) % size;
        }
        
        keys[pos] = key;
        values[pos] = value;
        count++;
        return true;
    }
}
Go
type LinearProbingHashTable struct {
    keys   []int
    values []int
    size   int
    count  int
}

func NewLinearProbingHashTable(size int) *LinearProbingHashTable {
    return &LinearProbingHashTable{
        keys:   make([]int, size),
        values: make([]int, size),
        size:   size,
        count:  0,
    }
}

func (ht *LinearProbingHashTable) Search(key int) int {
    pos := key % ht.size
    startPos := pos
    
    for ht.keys[pos] != 0 {
        if ht.keys[pos] == key {
            return pos
        }
        pos = (pos + 1) % ht.size
        if pos == startPos {
            break
        }
    }
    
    return -1
}

func (ht *LinearProbingHashTable) Insert(key, value int) bool {
    if ht.count >= int(float64(ht.size)*0.75) {
        return false
    }
    
    pos := key % ht.size
    for ht.keys[pos] != 0 {
        if ht.keys[pos] == key {
            ht.values[pos] = value
            return true
        }
        pos = (pos + 1) % ht.size
    }
    
    ht.keys[pos] = key
    ht.values[pos] = value
    ht.count++
    return true
}
Rust
use std::collections::HashMap;

// Rust 标准库已提供高效的 HashMap 实现
// 这里展示简单的线性探测实现
struct LinearProbingHashTable {
    keys: Vec<Option<i32>>,
    values: Vec<Option<i32>>,
    size: usize,
    count: usize,
}

impl LinearProbingHashTable {
    fn new(size: usize) -> Self {
        Self {
            keys: vec![None; size],
            values: vec![None; size],
            size,
            count: 0,
        }
    }
    
    fn search(&self, key: i32) -> Option<usize> {
        let mut pos = (key.abs() as usize) % self.size;
        let start_pos = pos;
        
        while let Some(k) = self.keys[pos] {
            if k == key {
                return Some(pos);
            }
            pos = (pos + 1) % self.size;
            if pos == start_pos {
                break;
            }
        }
        
        None
    }
    
    fn insert(&mut self, key: i32, value: i32) -> bool {
        if self.count >= (self.size as f64 * 0.75) as usize {
            return false;
        }
        
        let mut pos = (key.abs() as usize) % self.size;
        while self.keys[pos].is_some() {
            if self.keys[pos] == Some(key) {
                self.values[pos] = Some(value);
                return true;
            }
            pos = (pos + 1) % self.size;
        }
        
        self.keys[pos] = Some(key);
        self.values[pos] = Some(value);
        self.count += 1;
        true
    }
}

2. 链地址法

C
typedef struct HashNode {
    int key;
    int value;
    struct HashNode *next;
} HashNode;

typedef struct {
    HashNode **buckets;
    int size;
    int count;
} ChainHashTable;

ChainHashTable* createChainHashTable(int size) {
    ChainHashTable *ht = (ChainHashTable*)malloc(sizeof(ChainHashTable));
    ht->buckets = (HashNode**)calloc(size, sizeof(HashNode*));
    ht->size = size;
    ht->count = 0;
    return ht;
}

HashNode* chainHashSearch(ChainHashTable *ht, int key) {
    unsigned int pos = key % ht->size;
    HashNode *curr = ht->buckets[pos];
    
    while (curr) {
        if (curr->key == key) {
            return curr;
        }
        curr = curr->next;
    }
    
    return NULL;
}

void chainHashInsert(ChainHashTable *ht, int key, int value) {
    HashNode *node = chainHashSearch(ht, key);
    
    if (node) {
        node->value = value;
        return;
    }
    
    unsigned int pos = key % ht->size;
    HashNode *newNode = (HashNode*)malloc(sizeof(HashNode));
    newNode->key = key;
    newNode->value = value;
    newNode->next = ht->buckets[pos];
    ht->buckets[pos] = newNode;
    ht->count++;
}

void chainHashDelete(ChainHashTable *ht, int key) {
    unsigned int pos = key % ht->size;
    HashNode *curr = ht->buckets[pos];
    HashNode *prev = NULL;
    
    while (curr) {
        if (curr->key == key) {
            if (prev) {
                prev->next = curr->next;
            } else {
                ht->buckets[pos] = curr->next;
            }
            free(curr);
            ht->count--;
            return;
        }
        prev = curr;
        curr = curr->next;
    }
}
C++
#include <vector>
#include <list>
using namespace std;

class ChainHashTable {
private:
    vector<list<pair<int, int>>> buckets;
    size_t count;
    
public:
    ChainHashTable(size_t size) : buckets(size), count(0) {}
    
    int* search(int key) {
        size_t pos = key % buckets.size();
        
        for (auto& p : buckets[pos]) {
            if (p.first == key) {
                return &p.second;
            }
        }
        
        return nullptr;
    }
    
    void insert(int key, int value) {
        size_t pos = key % buckets.size();
        
        for (auto& p : buckets[pos]) {
            if (p.first == key) {
                p.second = value;
                return;
            }
        }
        
        buckets[pos].push_back({key, value});
        count++;
    }
    
    bool erase(int key) {
        size_t pos = key % buckets.size();
        
        for (auto it = buckets[pos].begin(); it != buckets[pos].end(); ++it) {
            if (it->first == key) {
                buckets[pos].erase(it);
                count--;
                return true;
            }
        }
        
        return false;
    }
};
Python
class ChainHashTable:
    def __init__(self, size):
        self.size = size
        self.buckets = [[] for _ in range(size)]
        self.count = 0
    
    def search(self, key):
        pos = key % self.size
        
        for k, v in self.buckets[pos]:
            if k == key:
                return v
        
        return None
    
    def insert(self, key, value):
        pos = key % self.size
        
        for i, (k, v) in enumerate(self.buckets[pos]):
            if k == key:
                self.buckets[pos][i] = (key, value)
                return
        
        self.buckets[pos].append((key, value))
        self.count += 1
    
    def delete(self, key):
        pos = key % self.size
        
        for i, (k, v) in enumerate(self.buckets[pos]):
            if k == key:
                del self.buckets[pos][i]
                self.count -= 1
                return True
        
        return False
    
    def __getitem__(self, key):
        result = self.search(key)
        if result is None:
            raise KeyError(key)
        return result
    
    def __setitem__(self, key, value):
        self.insert(key, value)
Java
import java.util.LinkedList;

public class ChainHashTable {
    private LinkedList<Pair>[] buckets;
    private int size;
    private int count;
    
    private static class Pair {
        int key, value;
        Pair(int k, int v) { key = k; value = v; }
    }
    
    @SuppressWarnings("unchecked")
    public ChainHashTable(int size) {
        this.size = size;
        this.buckets = new LinkedList[size];
        for (int i = 0; i < size; i++) {
            buckets[i] = new LinkedList<>();
        }
        this.count = 0;
    }
    
    public Integer search(int key) {
        int pos = key % size;
        
        for (Pair p : buckets[pos]) {
            if (p.key == key) {
                return p.value;
            }
        }
        
        return null;
    }
    
    public void insert(int key, int value) {
        int pos = key % size;
        
        for (Pair p : buckets[pos]) {
            if (p.key == key) {
                p.value = value;
                return;
            }
        }
        
        buckets[pos].add(new Pair(key, value));
        count++;
    }
}
Go
type chainNode struct {
    key   int
    value int
    next  *chainNode
}

type ChainHashTable struct {
    buckets []*chainNode
    size    int
    count   int
}

func NewChainHashTable(size int) *ChainHashTable {
    return &ChainHashTable{
        buckets: make([]*chainNode, size),
        size:    size,
        count:   0,
    }
}

func (ht *ChainHashTable) Search(key int) (int, bool) {
    pos := key % ht.size
    curr := ht.buckets[pos]
    
    for curr != nil {
        if curr.key == key {
            return curr.value, true
        }
        curr = curr.next
    }
    
    return 0, false
}

func (ht *ChainHashTable) Insert(key, value int) {
    pos := key % ht.size
    curr := ht.buckets[pos]
    
    // 检查是否已存在
    for curr != nil {
        if curr.key == key {
            curr.value = value
            return
        }
        curr = curr.next
    }
    
    // 插入新节点
    newNode := &chainNode{key: key, value: value, next: ht.buckets[pos]}
    ht.buckets[pos] = newNode
    ht.count++
}
Rust
use std::collections::HashMap;

// Rust 标准库的 HashMap 使用链地址法
// 直接使用标准库实现
fn demonstrate_hashmap() {
    let mut map: HashMap<i32, i32> = HashMap::new();
    
    // 插入
    map.insert(1, 100);
    map.insert(2, 200);
    
    // 查找
    if let Some(&value) = map.get(&1) {
        println!("Found: {}", value);
    }
    
    // 删除
    map.remove(&1);
}

// 自定义链地址法哈希表
struct ChainHashTable {
    buckets: Vec<Vec<(i32, i32)>>,
    count: usize,
}

impl ChainHashTable {
    fn new(size: usize) -> Self {
        Self {
            buckets: vec![Vec::new(); size],
            count: 0,
        }
    }
    
    fn search(&self, key: i32) -> Option<i32> {
        let pos = (key.abs() as usize) % self.buckets.len();
        
        for &(k, v) in &self.buckets[pos] {
            if k == key {
                return Some(v);
            }
        }
        
        None
    }
    
    fn insert(&mut self, key: i32, value: i32) {
        let pos = (key.abs() as usize) % self.buckets.len();
        
        for pair in &mut self.buckets[pos] {
            if pair.0 == key {
                pair.1 = value;
                return;
            }
        }
        
        self.buckets[pos].push((key, value));
        self.count += 1;
    }
}

时间复杂度分析

操作 平均时间复杂度 最坏时间复杂度 说明
查找 O(1) O(n) 最坏情况所有键冲突
插入 O(1) O(n) 可能需要rehash
删除 O(1) O(n) 最坏情况链表很长
Rehash O(n) O(n) 重建整个表

性能影响因素

  • 哈希函数质量:分布越均匀越好
  • 装载因子:α = n/m,建议0.7-0.8
  • 冲突解决策略:链地址法更稳定
  • 表大小:选择质数减少冲突

空间复杂度

  • 开放定址法:O(m)(m为表大小)
  • 链地址法:O(n + m)(n为元素数量)

装载因子与性能

装载因子α 链地址法查找 开放定址查找
0.5 1.5次 2次
0.75 2.5次 4次
0.9 5.5次 10次
1.0 发散

应用场景

  1. 数据库索引:内存索引、缓存
  2. 编译器:符号表、关键字查找
  3. 缓存系统:Redis、Memcached
  4. 集合操作:去重、判重
  5. 字符串匹配:Rabin-Karp算法

优缺点

优点

  1. 查找速度快:平均O(1)时间复杂度
  2. 实现简单:核心逻辑易于理解
  3. 增删高效:插入和删除同样快速
  4. 灵活性强:可调整装载因子

缺点

  1. 不支持范围查询:只能精确查找
  2. 空间开销较大:需要预留空间
  3. 最坏性能差:所有键冲突时退化为链表
  4. 无序性:不能保持插入顺序

各语言标准库实现

C++
#include <unordered_map>
#include <unordered_set>

// 哈希表
std::unordered_map<std::string, int> hashMap;
hashMap["key"] = 100;
int value = hashMap["key"];

// 哈希集合
std::unordered_set<int> hashSet;
hashSet.insert(1);
bool found = hashSet.count(1);
Python
1
2
3
4
5
6
7
8
9
# 字典(dict)就是哈希表
hash_map = {}
hash_map["key"] = 100
value = hash_map.get("key")

# 集合(set)也是哈希表
hash_set = set()
hash_set.add(1)
found = 1 in hash_set
Java
import java.util.HashMap;
import java.util.HashSet;

// HashMap
HashMap<String, Integer> hashMap = new HashMap<>();
hashMap.put("key", 100);
int value = hashMap.get("key");

// HashSet
HashSet<Integer> hashSet = new HashSet<>();
hashSet.add(1);
boolean found = hashSet.contains(1);
Go
1
2
3
4
5
6
7
8
9
// map 就是哈希表
hashMap := make(map[string]int)
hashMap["key"] = 100
value := hashMap["key"]

// 检查键是否存在
if val, ok := hashMap["key"]; ok {
    fmt.Println(val)
}
Rust
use std::collections::{HashMap, HashSet};

// HashMap
let mut hash_map: HashMap<&str, i32> = HashMap::new();
hash_map.insert("key", 100);
let value = hash_map.get("key");

// HashSet
let mut hash_set: HashSet<i32> = HashSet::new();
hash_set.insert(1);
let found = hash_set.contains(&1);

参考资料