替罪羊树
概述
替罪羊树(Scapegoat Tree)是一种自平衡二叉搜索树,通过检测不平衡节点并重建子树来维持平衡。当某个子树的不平衡程度超过阈值时,将该子树重建为完全平衡状态。
核心思想
替罪羊树的核心思想是"部分重建":当插入或删除操作导致某子树失衡时,找到第一个失衡的祖先节点(替罪羊),对该子树进行完全重建。
替罪羊树特点
- 不需要旋转操作
- 使用α参数控制平衡度(0.5 < α < 1)
- 重建操作时间复杂度O(n),但分摊复杂度良好
- 实现简单,适合学习和实际应用
平衡条件
设α为平衡参数(通常取0.7),对于任意节点x:
size(left(x)) ≤ α × size(x)
size(right(x)) ≤ α × size(x)
当违反上述条件时,需要对以x为根的子树进行重建。
数据结构定义
| C |
|---|
| typedef struct ScapegoatNode {
int key;
struct ScapegoatNode *left;
struct ScapegoatNode *right;
int size;
} ScapegoatNode;
typedef struct {
ScapegoatNode *root;
double alpha;
int maxSize;
int size;
} ScapegoatTree;
|
核心操作实现
初始化
| C |
|---|
| ScapegoatTree* createScapegoatTree(double alpha) {
ScapegoatTree *tree = (ScapegoatTree*)malloc(sizeof(ScapegoatTree));
tree->root = NULL;
tree->alpha = alpha;
tree->maxSize = 0;
tree->size = 0;
return tree;
}
int getSize(ScapegoatNode *node) {
return node ? node->size : 0;
}
void updateSize(ScapegoatNode *node) {
if (node) {
node->size = 1 + getSize(node->left) + getSize(node->right);
}
}
|
查找操作
| C |
|---|
| ScapegoatNode* search(ScapegoatNode *root, int key) {
if (!root || root->key == key) {
return root;
}
if (key < root->key) {
return search(root->left, key);
}
return search(root->right, key);
}
|
中序遍历(用于重建)
| C |
|---|
| void inorderTraversal(ScapegoatNode *node, ScapegoatNode **arr, int *index) {
if (!node) return;
inorderTraversal(node->left, arr, index);
arr[(*index)++] = node;
inorderTraversal(node->right, arr, index);
}
ScapegoatNode* buildBalanced(ScapegoatNode **arr, int start, int end) {
if (start > end) return NULL;
int mid = (start + end) / 2;
ScapegoatNode *root = arr[mid];
root->left = buildBalanced(arr, start, mid - 1);
root->right = buildBalanced(arr, mid + 1, end);
updateSize(root);
return root;
}
|
重建子树
| C |
|---|
| ScapegoatNode* rebuild(ScapegoatNode *root) {
int size = getSize(root);
ScapegoatNode **arr = (ScapegoatNode**)malloc(size * sizeof(ScapegoatNode*));
int index = 0;
inorderTraversal(root, arr, &index);
ScapegoatNode *newRoot = buildBalanced(arr, 0, size - 1);
free(arr);
return newRoot;
}
|
检查是否需要重建
| C |
|---|
| int needRebuild(ScapegoatNode *node, double alpha) {
if (!node) return 0;
int leftSize = getSize(node->left);
int rightSize = getSize(node->right);
int totalSize = getSize(node);
return (leftSize > alpha * totalSize) || (rightSize > alpha * totalSize);
}
|
插入操作
| C |
|---|
| typedef struct {
ScapegoatNode *node;
int depth;
int rebuilt;
} InsertResult;
InsertResult insertHelper(ScapegoatNode *root, int key, double alpha, int depth) {
InsertResult result = {NULL, depth, 0};
if (!root) {
ScapegoatNode *newNode = (ScapegoatNode*)malloc(sizeof(ScapegoatNode));
newNode->key = key;
newNode->left = NULL;
newNode->right = NULL;
newNode->size = 1;
result.node = newNode;
return result;
}
if (key < root->key) {
InsertResult leftResult = insertHelper(root->left, key, alpha, depth + 1);
root->left = leftResult.node;
updateSize(root);
result.node = root;
result.rebuilt = leftResult.rebuilt;
if (!leftResult.rebuilt && needRebuild(root, alpha)) {
result.node = rebuild(root);
result.rebuilt = 1;
}
} else if (key > root->key) {
InsertResult rightResult = insertHelper(root->right, key, alpha, depth + 1);
root->right = rightResult.node;
updateSize(root);
result.node = root;
result.rebuilt = rightResult.rebuilt;
if (!rightResult.rebuilt && needRebuild(root, alpha)) {
result.node = rebuild(root);
result.rebuilt = 1;
}
}
return result;
}
void insert(ScapegoatTree *tree, int key) {
InsertResult result = insertHelper(tree->root, key, tree->alpha, 0);
tree->root = result.node;
tree->size++;
tree->maxSize = (tree->size > tree->maxSize) ? tree->size : tree->maxSize;
}
|
删除操作
| C |
|---|
| ScapegoatNode* findMin(ScapegoatNode *node) {
while (node && node->left) {
node = node->left;
}
return node;
}
ScapegoatNode* deleteHelper(ScapegoatNode *root, int key) {
if (!root) return NULL;
if (key < root->key) {
root->left = deleteHelper(root->left, key);
} else if (key > root->key) {
root->right = deleteHelper(root->right, key);
} else {
if (!root->left) {
ScapegoatNode *temp = root->right;
free(root);
return temp;
}
if (!root->right) {
ScapegoatNode *temp = root->left;
free(root);
return temp;
}
ScapegoatNode *minNode = findMin(root->right);
root->key = minNode->key;
root->right = deleteHelper(root->right, minNode->key);
}
updateSize(root);
return root;
}
void delete(ScapegoatTree *tree, int key) {
tree->root = deleteHelper(tree->root, key);
tree->size--;
if (tree->size < tree->alpha * tree->maxSize) {
tree->root = rebuild(tree->root);
tree->maxSize = tree->size;
}
}
|
C++ 实现
| C++ |
|---|
| template<typename T>
class ScapegoatTree {
private:
struct Node {
T key;
Node *left, *right;
int size;
Node(T k) : key(k), left(nullptr), right(nullptr), size(1) {}
};
Node *root;
double alpha;
int maxSize;
int size;
int getSize(Node *node) {
return node ? node->size : 0;
}
void updateSize(Node *node) {
if (node) {
node->size = 1 + getSize(node->left) + getSize(node->right);
}
}
bool needRebuild(Node *node) {
if (!node) return false;
int leftSize = getSize(node->left);
int rightSize = getSize(node->right);
int totalSize = getSize(node);
return (leftSize > alpha * totalSize) || (rightSize > alpha * totalSize);
}
void inorderTraversal(Node *node, vector<Node*>& arr) {
if (!node) return;
inorderTraversal(node->left, arr);
arr.push_back(node);
inorderTraversal(node->right, arr);
}
Node* buildBalanced(vector<Node*>& arr, int start, int end) {
if (start > end) return nullptr;
int mid = (start + end) / 2;
Node *root = arr[mid];
root->left = buildBalanced(arr, start, mid - 1);
root->right = buildBalanced(arr, mid + 1, end);
updateSize(root);
return root;
}
Node* rebuild(Node *node) {
vector<Node*> arr;
inorderTraversal(node, arr);
return buildBalanced(arr, 0, arr.size() - 1);
}
public:
ScapegoatTree(double a = 0.7) : root(nullptr), alpha(a), maxSize(0), size(0) {}
Node* search(T key) {
Node *curr = root;
while (curr && curr->key != key) {
curr = (key < curr->key) ? curr->left : curr->right;
}
return curr;
}
void insert(T key);
void remove(T key);
};
|
时间复杂度分析
| 操作 |
平均时间复杂度 |
最坏时间复杂度 |
空间复杂度 |
| 查找 |
O(log n) |
O(log n) |
O(1) |
| 插入 |
O(log n) |
O(n) |
O(log n) |
| 删除 |
O(log n) |
O(n) |
O(log n) |
| 重建 |
O(n) |
O(n) |
O(n) |
复杂度说明
- 虽然单次插入/删除可能触发O(n)的重建
- 但分摊分析表明,连续n次操作的总复杂度为O(n log n)
- 因此分摊每次操作的复杂度为O(log n)
空间复杂度
- 单节点空间:O(1)
- 总空间:O(n)
- 重建临时空间:O(n)
应用场景
- 数据库索引:适合频繁更新的索引结构
- 内存数据库:实现简单,内存效率高
- 缓存系统:需要自平衡但不需要频繁重建的场景
- 教学示例:理解自平衡树的原理
与其他自平衡树比较
| 特性 |
替罪羊树 |
AVL树 |
红黑树 |
B树 |
| 平衡方式 |
重建子树 |
旋转 |
旋转 |
分裂合并 |
| 实现难度 |
简单 |
中等 |
较难 |
中等 |
| 查找效率 |
O(log n) |
O(log n) |
O(log n) |
O(log n) |
| 空间开销 |
O(n) |
O(n) |
O(n) |
O(n) |
| 批量插入 |
高效 |
一般 |
一般 |
高效 |
优点与缺点
优点
- 实现简单:不需要复杂的旋转操作
- 无递归栈溢出风险:可以迭代实现
- 批量操作友好:重建操作适合批量插入
- 可调参数:通过α参数平衡时间和空间
缺点
- 单次操作可能较慢:重建需要O(n)时间
- 额外空间:重建需要临时数组
- 非确定性:重建时机依赖于操作序列
- 不适合实时系统:重建操作不可预测
参考资料