哈希查找
概述
哈希查找是基于哈希表的数据查找方法,通过哈希函数将关键字映射到存储位置,实现O(1)平均时间复杂度的查找操作。
核心思想
哈希查找的核心思想是直接寻址:通过哈希函数计算关键字对应的存储位置,直接访问目标数据。
哈希查找特点
- 查找速度极快:平均O(1)时间复杂度
- 需要额外空间:存储哈希表
- 存在哈希冲突:需要冲突解决策略
- 不支持范围查询:只适合精确查找
哈希函数设计
常用哈希函数
| C |
|---|
| // 除法哈希
unsigned int hashDiv(int key, int tableSize) {
return key % tableSize;
}
// 乘法哈希
unsigned int hashMul(int key, int tableSize) {
const double A = 0.6180339887;
double frac = key * A - (int)(key * A);
return (unsigned int)(tableSize * frac);
}
// 字符串哈希
unsigned int hashString(char *str, int tableSize) {
unsigned int hash = 0;
while (*str) {
hash = hash * 31 + *str++;
}
return hash % tableSize;
}
// FNV哈希
unsigned int hashFNV(char *str, int tableSize) {
unsigned int hash = 2166136261u;
while (*str) {
hash ^= *str++;
hash *= 16777619;
}
return hash % tableSize;
}
// MurmurHash3
unsigned int hashMurmur3(int key, int tableSize) {
key ^= key >> 16;
key *= 0x85ebca6b;
key ^= key >> 13;
key *= 0xc2b2ae35;
key ^= key >> 16;
return key % tableSize;
}
|
| C++ |
|---|
| #include <string>
#include <functional>
using namespace std;
// 除法哈希
size_t hashDiv(int key, size_t tableSize) {
return key % tableSize;
}
// 乘法哈希
size_t hashMul(int key, size_t tableSize) {
const double A = 0.6180339887;
double frac = key * A - static_cast<int>(key * A);
return static_cast<size_t>(tableSize * frac);
}
// 字符串哈希
size_t hashString(const string& str, size_t tableSize) {
size_t hash = 0;
for (char c : str) {
hash = hash * 31 + c;
}
return hash % tableSize;
}
// 使用标准库哈希
size_t hashStandard(const string& key, size_t tableSize) {
return hash<string>{}(key) % tableSize;
}
|
| Python |
|---|
| def hash_div(key, table_size):
"""除法哈希"""
return key % table_size
def hash_mul(key, table_size):
"""乘法哈希"""
A = 0.6180339887
frac = (key * A) % 1
return int(table_size * frac)
def hash_string(s, table_size):
"""字符串哈希"""
hash_val = 0
for c in s:
hash_val = hash_val * 31 + ord(c)
return hash_val % table_size
def hash_fnv(s, table_size):
"""FNV哈希"""
hash_val = 2166136261
for c in s:
hash_val ^= ord(c)
hash_val *= 16777619
hash_val &= 0xffffffff # 保持32位
return hash_val % table_size
# 使用内置hash函数
def hash_builtin(key, table_size):
return hash(key) % table_size
|
| Java |
|---|
| public class HashFunctions {
// 除法哈希
public static int hashDiv(int key, int tableSize) {
return Math.abs(key % tableSize);
}
// 乘法哈希
public static int hashMul(int key, int tableSize) {
final double A = 0.6180339887;
double frac = key * A - (int)(key * A);
return (int)(tableSize * frac);
}
// 字符串哈希
public static int hashString(String str, int tableSize) {
int hash = 0;
for (int i = 0; i < str.length(); i++) {
hash = hash * 31 + str.charAt(i);
}
return Math.abs(hash % tableSize);
}
// 使用Object.hashCode()
public static int hashObject(Object key, int tableSize) {
return Math.abs(key.hashCode() % tableSize);
}
}
|
| Go |
|---|
| import "hash/fnv"
// 除法哈希
func hashDiv(key int, tableSize int) int {
return key % tableSize
}
// 乘法哈希
func hashMul(key int, tableSize int) int {
A := 0.6180339887
frac := float64(key)*A - float64(int(float64(key)*A))
return int(float64(tableSize) * frac)
}
// 字符串哈希
func hashString(s string, tableSize int) int {
hash := 0
for _, c := range s {
hash = hash*31 + int(c)
}
if hash < 0 {
hash = -hash
}
return hash % tableSize
}
// 使用FNV哈希
func hashFNV(s string, tableSize int) int {
h := fnv.New32a()
h.Write([]byte(s))
return int(h.Sum32()) % tableSize
}
|
| Rust |
|---|
| use std::collections::hash_map::DefaultHasher;
use std::hash::{Hash, Hasher};
// 除法哈希
fn hash_div(key: i32, table_size: usize) -> usize {
(key.abs() as usize) % table_size
}
// 乘法哈希
fn hash_mul(key: i32, table_size: usize) -> usize {
const A: f64 = 0.6180339887;
let frac = (key as f64 * A).fract();
(table_size as f64 * frac) as usize
}
// 字符串哈希
fn hash_string(s: &str, table_size: usize) -> usize {
let mut hash: usize = 0;
for c in s.chars() {
hash = hash.wrapping_mul(31).wrapping_add(c as usize);
}
hash % table_size
}
// 使用标准库哈希
fn hash_standard<T: Hash>(key: &T, table_size: usize) -> usize {
let mut hasher = DefaultHasher::new();
key.hash(&mut hasher);
(hasher.finish() as usize) % table_size
}
|
冲突解决方法
1. 开放定址法
线性探测
| C |
|---|
| #define DELETED -1
#define EMPTY 0
#define OCCUPIED 1
typedef struct {
int key;
int value;
int state;
} HashEntry;
typedef struct {
HashEntry *table;
int size;
int count;
} LinearProbingHashTable;
LinearProbingHashTable* createLPHashTable(int size) {
LinearProbingHashTable *ht = (LinearProbingHashTable*)malloc(sizeof(LinearProbingHashTable));
ht->table = (HashEntry*)calloc(size, sizeof(HashEntry));
ht->size = size;
ht->count = 0;
for (int i = 0; i < size; i++) {
ht->table[i].state = EMPTY;
}
return ht;
}
int lpHashSearch(LinearProbingHashTable *ht, int key) {
unsigned int pos = key % ht->size;
unsigned int startPos = pos;
do {
if (ht->table[pos].state == EMPTY) {
return -1;
}
if (ht->table[pos].state == OCCUPIED && ht->table[pos].key == key) {
return pos;
}
pos = (pos + 1) % ht->size;
} while (pos != startPos);
return -1;
}
int lpHashInsert(LinearProbingHashTable *ht, int key, int value) {
if (ht->count >= ht->size * 0.75) {
return -1;
}
unsigned int pos = key % ht->size;
while (ht->table[pos].state == OCCUPIED) {
if (ht->table[pos].key == key) {
ht->table[pos].value = value;
return pos;
}
pos = (pos + 1) % ht->size;
}
ht->table[pos].key = key;
ht->table[pos].value = value;
ht->table[pos].state = OCCUPIED;
ht->count++;
return pos;
}
void lpHashDelete(LinearProbingHashTable *ht, int key) {
int pos = lpHashSearch(ht, key);
if (pos != -1) {
ht->table[pos].state = DELETED;
ht->count--;
}
}
|
| C++ |
|---|
| #include <vector>
using namespace std;
enum State { EMPTY, OCCUPIED, DELETED };
struct HashEntry {
int key;
int value;
State state;
};
class LinearProbingHashTable {
private:
vector<HashEntry> table;
size_t count;
public:
LinearProbingHashTable(size_t size) : table(size), count(0) {
for (auto& entry : table) {
entry.state = EMPTY;
}
}
int search(int key) {
size_t pos = key % table.size();
size_t startPos = pos;
do {
if (table[pos].state == EMPTY) return -1;
if (table[pos].state == OCCUPIED && table[pos].key == key) {
return pos;
}
pos = (pos + 1) % table.size();
} while (pos != startPos);
return -1;
}
bool insert(int key, int value) {
if (count >= table.size() * 0.75) return false;
size_t pos = key % table.size();
while (table[pos].state == OCCUPIED) {
if (table[pos].key == key) {
table[pos].value = value;
return true;
}
pos = (pos + 1) % table.size();
}
table[pos] = {key, value, OCCUPIED};
count++;
return true;
}
};
|
| Python |
|---|
| class LinearProbingHashTable:
def __init__(self, size):
self.size = size
self.keys = [None] * size
self.values = [None] * size
self.count = 0
def search(self, key):
pos = key % self.size
start_pos = pos
while self.keys[pos] is not None:
if self.keys[pos] == key:
return pos
pos = (pos + 1) % self.size
if pos == start_pos:
break
return -1
def insert(self, key, value):
if self.count >= self.size * 0.75:
return False
pos = key % self.size
while self.keys[pos] is not None:
if self.keys[pos] == key:
self.values[pos] = value
return True
pos = (pos + 1) % self.size
self.keys[pos] = key
self.values[pos] = value
self.count += 1
return True
def delete(self, key):
pos = self.search(key)
if pos != -1:
self.keys[pos] = None # 标记为删除
self.count -= 1
return True
return False
|
| Java |
|---|
| public class LinearProbingHashTable {
private Integer[] keys;
private Integer[] values;
private int size;
private int count;
public LinearProbingHashTable(int size) {
this.size = size;
this.keys = new Integer[size];
this.values = new Integer[size];
this.count = 0;
}
public int search(int key) {
int pos = key % size;
int startPos = pos;
do {
if (keys[pos] == null) return -1;
if (keys[pos] == key) return pos;
pos = (pos + 1) % size;
} while (pos != startPos);
return -1;
}
public boolean insert(int key, int value) {
if (count >= size * 0.75) return false;
int pos = key % size;
while (keys[pos] != null) {
if (keys[pos] == key) {
values[pos] = value;
return true;
}
pos = (pos + 1) % size;
}
keys[pos] = key;
values[pos] = value;
count++;
return true;
}
}
|
| Go |
|---|
| type LinearProbingHashTable struct {
keys []int
values []int
size int
count int
}
func NewLinearProbingHashTable(size int) *LinearProbingHashTable {
return &LinearProbingHashTable{
keys: make([]int, size),
values: make([]int, size),
size: size,
count: 0,
}
}
func (ht *LinearProbingHashTable) Search(key int) int {
pos := key % ht.size
startPos := pos
for ht.keys[pos] != 0 {
if ht.keys[pos] == key {
return pos
}
pos = (pos + 1) % ht.size
if pos == startPos {
break
}
}
return -1
}
func (ht *LinearProbingHashTable) Insert(key, value int) bool {
if ht.count >= int(float64(ht.size)*0.75) {
return false
}
pos := key % ht.size
for ht.keys[pos] != 0 {
if ht.keys[pos] == key {
ht.values[pos] = value
return true
}
pos = (pos + 1) % ht.size
}
ht.keys[pos] = key
ht.values[pos] = value
ht.count++
return true
}
|
| Rust |
|---|
| use std::collections::HashMap;
// Rust 标准库已提供高效的 HashMap 实现
// 这里展示简单的线性探测实现
struct LinearProbingHashTable {
keys: Vec<Option<i32>>,
values: Vec<Option<i32>>,
size: usize,
count: usize,
}
impl LinearProbingHashTable {
fn new(size: usize) -> Self {
Self {
keys: vec![None; size],
values: vec![None; size],
size,
count: 0,
}
}
fn search(&self, key: i32) -> Option<usize> {
let mut pos = (key.abs() as usize) % self.size;
let start_pos = pos;
while let Some(k) = self.keys[pos] {
if k == key {
return Some(pos);
}
pos = (pos + 1) % self.size;
if pos == start_pos {
break;
}
}
None
}
fn insert(&mut self, key: i32, value: i32) -> bool {
if self.count >= (self.size as f64 * 0.75) as usize {
return false;
}
let mut pos = (key.abs() as usize) % self.size;
while self.keys[pos].is_some() {
if self.keys[pos] == Some(key) {
self.values[pos] = Some(value);
return true;
}
pos = (pos + 1) % self.size;
}
self.keys[pos] = Some(key);
self.values[pos] = Some(value);
self.count += 1;
true
}
}
|
2. 链地址法
| C |
|---|
| typedef struct HashNode {
int key;
int value;
struct HashNode *next;
} HashNode;
typedef struct {
HashNode **buckets;
int size;
int count;
} ChainHashTable;
ChainHashTable* createChainHashTable(int size) {
ChainHashTable *ht = (ChainHashTable*)malloc(sizeof(ChainHashTable));
ht->buckets = (HashNode**)calloc(size, sizeof(HashNode*));
ht->size = size;
ht->count = 0;
return ht;
}
HashNode* chainHashSearch(ChainHashTable *ht, int key) {
unsigned int pos = key % ht->size;
HashNode *curr = ht->buckets[pos];
while (curr) {
if (curr->key == key) {
return curr;
}
curr = curr->next;
}
return NULL;
}
void chainHashInsert(ChainHashTable *ht, int key, int value) {
HashNode *node = chainHashSearch(ht, key);
if (node) {
node->value = value;
return;
}
unsigned int pos = key % ht->size;
HashNode *newNode = (HashNode*)malloc(sizeof(HashNode));
newNode->key = key;
newNode->value = value;
newNode->next = ht->buckets[pos];
ht->buckets[pos] = newNode;
ht->count++;
}
void chainHashDelete(ChainHashTable *ht, int key) {
unsigned int pos = key % ht->size;
HashNode *curr = ht->buckets[pos];
HashNode *prev = NULL;
while (curr) {
if (curr->key == key) {
if (prev) {
prev->next = curr->next;
} else {
ht->buckets[pos] = curr->next;
}
free(curr);
ht->count--;
return;
}
prev = curr;
curr = curr->next;
}
}
|
| C++ |
|---|
| #include <vector>
#include <list>
using namespace std;
class ChainHashTable {
private:
vector<list<pair<int, int>>> buckets;
size_t count;
public:
ChainHashTable(size_t size) : buckets(size), count(0) {}
int* search(int key) {
size_t pos = key % buckets.size();
for (auto& p : buckets[pos]) {
if (p.first == key) {
return &p.second;
}
}
return nullptr;
}
void insert(int key, int value) {
size_t pos = key % buckets.size();
for (auto& p : buckets[pos]) {
if (p.first == key) {
p.second = value;
return;
}
}
buckets[pos].push_back({key, value});
count++;
}
bool erase(int key) {
size_t pos = key % buckets.size();
for (auto it = buckets[pos].begin(); it != buckets[pos].end(); ++it) {
if (it->first == key) {
buckets[pos].erase(it);
count--;
return true;
}
}
return false;
}
};
|
| Python |
|---|
| class ChainHashTable:
def __init__(self, size):
self.size = size
self.buckets = [[] for _ in range(size)]
self.count = 0
def search(self, key):
pos = key % self.size
for k, v in self.buckets[pos]:
if k == key:
return v
return None
def insert(self, key, value):
pos = key % self.size
for i, (k, v) in enumerate(self.buckets[pos]):
if k == key:
self.buckets[pos][i] = (key, value)
return
self.buckets[pos].append((key, value))
self.count += 1
def delete(self, key):
pos = key % self.size
for i, (k, v) in enumerate(self.buckets[pos]):
if k == key:
del self.buckets[pos][i]
self.count -= 1
return True
return False
def __getitem__(self, key):
result = self.search(key)
if result is None:
raise KeyError(key)
return result
def __setitem__(self, key, value):
self.insert(key, value)
|
| Java |
|---|
| import java.util.LinkedList;
public class ChainHashTable {
private LinkedList<Pair>[] buckets;
private int size;
private int count;
private static class Pair {
int key, value;
Pair(int k, int v) { key = k; value = v; }
}
@SuppressWarnings("unchecked")
public ChainHashTable(int size) {
this.size = size;
this.buckets = new LinkedList[size];
for (int i = 0; i < size; i++) {
buckets[i] = new LinkedList<>();
}
this.count = 0;
}
public Integer search(int key) {
int pos = key % size;
for (Pair p : buckets[pos]) {
if (p.key == key) {
return p.value;
}
}
return null;
}
public void insert(int key, int value) {
int pos = key % size;
for (Pair p : buckets[pos]) {
if (p.key == key) {
p.value = value;
return;
}
}
buckets[pos].add(new Pair(key, value));
count++;
}
}
|
| Go |
|---|
| type chainNode struct {
key int
value int
next *chainNode
}
type ChainHashTable struct {
buckets []*chainNode
size int
count int
}
func NewChainHashTable(size int) *ChainHashTable {
return &ChainHashTable{
buckets: make([]*chainNode, size),
size: size,
count: 0,
}
}
func (ht *ChainHashTable) Search(key int) (int, bool) {
pos := key % ht.size
curr := ht.buckets[pos]
for curr != nil {
if curr.key == key {
return curr.value, true
}
curr = curr.next
}
return 0, false
}
func (ht *ChainHashTable) Insert(key, value int) {
pos := key % ht.size
curr := ht.buckets[pos]
// 检查是否已存在
for curr != nil {
if curr.key == key {
curr.value = value
return
}
curr = curr.next
}
// 插入新节点
newNode := &chainNode{key: key, value: value, next: ht.buckets[pos]}
ht.buckets[pos] = newNode
ht.count++
}
|
| Rust |
|---|
| use std::collections::HashMap;
// Rust 标准库的 HashMap 使用链地址法
// 直接使用标准库实现
fn demonstrate_hashmap() {
let mut map: HashMap<i32, i32> = HashMap::new();
// 插入
map.insert(1, 100);
map.insert(2, 200);
// 查找
if let Some(&value) = map.get(&1) {
println!("Found: {}", value);
}
// 删除
map.remove(&1);
}
// 自定义链地址法哈希表
struct ChainHashTable {
buckets: Vec<Vec<(i32, i32)>>,
count: usize,
}
impl ChainHashTable {
fn new(size: usize) -> Self {
Self {
buckets: vec![Vec::new(); size],
count: 0,
}
}
fn search(&self, key: i32) -> Option<i32> {
let pos = (key.abs() as usize) % self.buckets.len();
for &(k, v) in &self.buckets[pos] {
if k == key {
return Some(v);
}
}
None
}
fn insert(&mut self, key: i32, value: i32) {
let pos = (key.abs() as usize) % self.buckets.len();
for pair in &mut self.buckets[pos] {
if pair.0 == key {
pair.1 = value;
return;
}
}
self.buckets[pos].push((key, value));
self.count += 1;
}
}
|
时间复杂度分析
| 操作 |
平均时间复杂度 |
最坏时间复杂度 |
说明 |
| 查找 |
O(1) |
O(n) |
最坏情况所有键冲突 |
| 插入 |
O(1) |
O(n) |
可能需要rehash |
| 删除 |
O(1) |
O(n) |
最坏情况链表很长 |
| Rehash |
O(n) |
O(n) |
重建整个表 |
性能影响因素
- 哈希函数质量:分布越均匀越好
- 装载因子:α = n/m,建议0.7-0.8
- 冲突解决策略:链地址法更稳定
- 表大小:选择质数减少冲突
空间复杂度
- 开放定址法:O(m)(m为表大小)
- 链地址法:O(n + m)(n为元素数量)
装载因子与性能
| 装载因子α |
链地址法查找 |
开放定址查找 |
| 0.5 |
1.5次 |
2次 |
| 0.75 |
2.5次 |
4次 |
| 0.9 |
5.5次 |
10次 |
| 1.0 |
∞ |
发散 |
应用场景
- 数据库索引:内存索引、缓存
- 编译器:符号表、关键字查找
- 缓存系统:Redis、Memcached
- 集合操作:去重、判重
- 字符串匹配:Rabin-Karp算法
优缺点
优点
- 查找速度快:平均O(1)时间复杂度
- 实现简单:核心逻辑易于理解
- 增删高效:插入和删除同样快速
- 灵活性强:可调整装载因子
缺点
- 不支持范围查询:只能精确查找
- 空间开销较大:需要预留空间
- 最坏性能差:所有键冲突时退化为链表
- 无序性:不能保持插入顺序
各语言标准库实现
参考资料