哈希映射函数
/* BKDR Hash Function */
unsigned int BKDR_hash(char *str)
{
unsigned int seed = 131; // 31 131 1313 13131 131313 etc..
unsigned int hash = 0;
while (*str)
{
hash = hash * seed + (*str++);
}
return (hash & 0x7FFFFFFF);
}
哈希解决冲突
开放定址法
- 线性探测再散列:顺序查看表中下一单元,直到找出一个空单元或查遍全表。
- 二次探测再散列:冲突发生时,在表的左右进行跳跃式探测,比较灵活
- 伪随机探测再散列:建立一个伪随机数发生器,(如i=(i+p) % m),并给定一个随机数做起点。
- 实例:已知哈希表长度m=11,哈希函数为:H(key)= key % 11,则H(47)=3,H(26)=4,H(60)=5,假设下一个关键字为69,则H(69)=3,与47冲突。1)如果用线性探测再散列处理冲突,下一个哈希地址为H1=(3 + 1)% 11 = 4,仍然冲突,再找下一个哈希地址为H2=(3 + 2)% 11 = 5,还是冲突,继续找下一个哈希地址为H3=(3 + 3)% 11 = 6,此时不再冲突,将69填入5号单元。 2)如果用二次探测再散列处理冲突,下一个哈希地址为H1=(3 + 12)% 11 = 4,仍然冲突,再找下一个哈希地址为H2=(3 - 12)% 11 = 2,此时不再冲突,将69填入2号单元。3)如果用伪随机探测再散列处理冲突,且伪随机数序列为:2,5,9,……..,则下一个哈希地址为H1=(3 + 2)% 11 = 5,仍然冲突,再找下一个哈希地址为H2=(3 + 5)% 11 = 8,此时不再冲突,将69填入8号单元。
再哈希法
- 同时构造多个不同的哈希函数,这种方法不易产生聚集,但增加了计算时间。
链地址法(拉链法)
- 基本思想是将所有哈希地址为i的元素构成一个称为同义词链的单链表,并将单链表的头指针存在哈希表的第i个单元中,因而查找、插入和删除主要在同义词链中进行。链地址法适用于经常进行插入和删除的情况。
建立公共溢出区
- 将哈希表分为基本表和溢出表两部分,凡是和基本表发生冲突的元素,一律填入溢出表。
hash表代码
c++
/*
因为类的申明和定义分开来分别在.h和.cpp文件,我这边编译错误,所以放在了一起。
*/
//--------simpleHashTable.h-------
#pragma once
#include <iostream>
using namespace std;
typedef int KeyType;
#define NULLKEY -1
struct Entry{
KeyType _key;
int _value;
Entry(KeyType key=NULLKEY, int value=0):_key(key),_value(value){}
};
class hashTable{
public:
hashTable();
//hashTable(int tableSize);
~hashTable();
bool find(const Entry& e);
bool insert(const Entry& e);
bool remove(const Entry& e);
void clear();
Entry& operator[](KeyType key);//重载下标操作;当找不到key对应的Entry时,插入Entry(key,0)
int size();
void display();
protected:
int hashFunction(KeyType key);//将键值映射到对应地址
void rehash();//调整hashTable大小
bool find(const KeyType& k);//按键值查找
int nextPrime();//p(n) = n^2 - n + 41, n<41, p<1681
private:
Entry *_pTable;
int _pos;//当前访问元素的位置
int _size;
int _capacity;
int primeIndex;
};
hashTable::hashTable()
{
_capacity = 3;//初始化hashTable容量为3,便于观察rehash过程
_pTable = new Entry[_capacity];
_size = 0;
primeIndex = 1;
}
//hashTable::hashTable(int tableSize)
//{
//
//}
hashTable::~hashTable()
{
clear();
}
int hashTable::nextPrime()
{
int p = std::pow(static_cast<float>(primeIndex),2) - primeIndex + 41;
primeIndex = primeIndex << 1;
if(primeIndex >= 41){
cout << "Max capacity reached. exit!" << endl;
exit(-1);
}
return p;
}
bool hashTable::find(const Entry& e)
{
return(find(e._key));
}
bool hashTable::find(const KeyType& k)
{
_pos = hashFunction(k);
if(_pTable[_pos]._key==NULLKEY)
return false;
int lastPos = _pos;
while(_pTable[_pos]._key!=k){
if(++_pos%_capacity == lastPos)
return false;
}
return true;
}
bool hashTable::insert(const Entry& e)
{
if((_size*1.0)/_capacity>0.75)
rehash();//[OK]插入操作前,需要判断hash table是否需要扩容
if(find(e))
return false;
_pTable[_pos] = e;
++_size;
return true;
}
bool hashTable::remove(const Entry& e)
{
if(!find(e))
return false;
_pTable[_pos]._key = NULLKEY;
--_size;
//rehash();//移除操作后,需要判断hash table是否需要缩容
return true;
}
void hashTable::clear()
{
delete []_pTable;
_size = _capacity = 0;
}
Entry& hashTable::operator[](KeyType key)
{
if(!find(key))
insert(Entry(key,0));
return _pTable[_pos];
}
int hashTable::size()
{
return _size;
}
int hashTable::hashFunction(KeyType key)
{
return key%_capacity;
}
void hashTable::display()
{
cout << "capacity = " << _capacity << ", size = " << _size << endl;
for(int i=0; i<_capacity; i++){
if(_pTable[i]._key!=NULLKEY)
cout << "key=" << _pTable[i]._key << ",value=" << _pTable[i]._value << endl;
}
}
void hashTable::rehash()
{
cout << "begin rehash..." << endl;
Entry *p = new Entry[_size];//用来暂存原哈希表
for(int i=0; i<_capacity; i++){//i<_size不对;元素散列在容量为_capacity的hashTable中
if(_pTable[i]._key != NULLKEY)
*(p+i) = _pTable[i];
}
delete []_pTable;
int lastSize = _size;
_size = 0;
_capacity = nextPrime();
_pTable = new Entry[_capacity];
for(int i=0; i<lastSize; i++)
insert(*(p+i));
delete []p;
}
//------testbench.cpp------
#include <iostream>
#include "simpleHashTable.h"
using namespace std;
int main()
{
hashTable *pTable = new hashTable;
cout << "insert Entry(1,11)..." << endl;
pTable->insert(Entry(1,11));
pTable->display();
cout << "insert Entry(2,22)..." << endl;
pTable->insert(Entry(2,22));
pTable->display();
cout << "insert Entry(3,33)..." << endl;
pTable->insert(Entry(3,33));
pTable->display();
cout << "insert Entry(4,44)..." << endl;
//pTable->insert(Entry(4,44));
(*pTable)[4]._value = 44;//下标操作,返回值充当左值
pTable->display();
cout << endl << "delete Entry(1,11)..." << endl;
pTable->remove(Entry(1,11));
pTable->display();
cout << "delete Entry(2,22)..." << endl;
pTable->remove(Entry(2,22));
pTable->display();
cout << "delete Entry(3,33)..." << endl;
pTable->remove(Entry(3,33));
pTable->display();
cout << "delete Entry(3,33)..." << endl;
pTable->remove(Entry(3,33));
pTable->display();
delete pTable;
getchar();
return 0;
}
c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>
/*=================hash table start=========================================*/
#define HASH_TABLE_MAX_SIZE 10000
typedef struct HashNode_Struct HashNode;
struct HashNode_Struct
{
char* sKey;
int nValue;
HashNode* pNext;
};
HashNode* hashTable[HASH_TABLE_MAX_SIZE]; //hash table data strcutrue
int hash_table_size; //the number of key-value pairs in the hash table!
//initialize hash table
void hash_table_init()
{
hash_table_size = 0;
memset(hashTable, 0, sizeof(HashNode*) * HASH_TABLE_MAX_SIZE);
}
//string hash function
unsigned int hash_table_hash_str(const char* skey)
{
const signed char *p = (const signed char*)skey;
unsigned int h = *p;
if(h)
{
for(p += 1; *p != '\0'; ++p)
h = (h << 5) - h + *p; //h = h*31+*p
}
return h;
}
//insert key-value into hash table
void hash_table_insert(const char* skey, int nvalue)
{
if(hash_table_size >= HASH_TABLE_MAX_SIZE)
{
printf("out of hash table memory!\n");
return;
}
unsigned int pos = hash_table_hash_str(skey) % HASH_TABLE_MAX_SIZE;
HashNode* pHead = hashTable[pos];
while(pHead)
{
if(strcmp(pHead->sKey, skey) == 0) //strcmp(str1, str2):1)相等返回0;2)str1<str2返回负数;3)str1>str2返回正数
{
printf("%s already exists!\n", skey);
return ;
}
pHead = pHead->pNext;
}
HashNode* pNewNode = (HashNode*)malloc(sizeof(HashNode));
memset(pNewNode, 0, sizeof(HashNode));
pNewNode->sKey = (char*)malloc(sizeof(char) * (strlen(skey) + 1));
strcpy(pNewNode->sKey, skey);
pNewNode->nValue = nvalue;
pNewNode->pNext = hashTable[pos];
hashTable[pos] = pNewNode;
hash_table_size++;
}
//remove key-value from the hash table
void hash_table_remove(const char* skey)
{
unsigned int pos = hash_table_hash_str(skey) % HASH_TABLE_MAX_SIZE;
if(hashTable[pos])
{
HashNode* pHead = hashTable[pos];
HashNode* pLast = NULL;
HashNode* pRemove = NULL;
while(pHead)
{
if(strcmp(skey, pHead->sKey) == 0)
{
pRemove = pHead;
break;
}
pLast = pHead;
pHead = pHead->pNext;
}
if(pRemove)
{
if(pLast)
pLast->pNext = pRemove->pNext;
else
hashTable[pos] = NULL;
free(pRemove->sKey);
free(pRemove);
}
}
}
//lookup a key in the hash table
HashNode* hash_table_lookup(const char* skey)
{
unsigned int pos = hash_table_hash_str(skey) % HASH_TABLE_MAX_SIZE;
if(hashTable[pos])
{
HashNode* pHead = hashTable[pos];
while(pHead)
{
if(strcmp(skey, pHead->sKey) == 0)
return pHead;
pHead = pHead->pNext;
}
}
return NULL;
}
//print the content in the hash table
void hash_table_print()
{
printf("===========content of hash table=================\n");
int i;
for(i = 0; i < HASH_TABLE_MAX_SIZE; ++i)
if(hashTable[i])
{
HashNode* pHead = hashTable[i];
printf("%d=>", i);
while(pHead)
{
printf("%s:%d ", pHead->sKey, pHead->nValue);
pHead = pHead->pNext;
}
printf("\n");
}
}
//free the memory of the hash table
void hash_table_release()
{
int i;
for(i = 0; i < HASH_TABLE_MAX_SIZE; ++i)
{
if(hashTable[i])
{
HashNode* pHead = hashTable[i];
while(pHead)
{
HashNode* pTemp = pHead;
pHead = pHead->pNext;
if(pTemp)
{
free(pTemp->sKey);
free(pTemp);
}
}
}
}
}
/* ===============================hash table end=========================*/
/* ============================test function ============================*/
#define MAX_STR_LEN 20
#define MIN_STR_LEN 10
void rand_str(char r[])
{
int i;
int len = MIN_STR_LEN + rand() % (MAX_STR_LEN - MIN_STR_LEN);
for(i = 0; i < len - 1; ++i)
r[i] = 'a' + rand() % ( 'z' - 'a');
r[len - 1] = '\0';
}
int main(int argc, char** argv)
{
srand(time(NULL));
hash_table_init();
printf("insert testing.........\n");
int n = 10;
const char *key1 = "aaammd";
const char *key2 = "xzzyym";
const char *key3 = "cdcded";
hash_table_insert(key1, 110);
hash_table_insert(key2, 220);
hash_table_insert(key3, 330);
char str[MAX_STR_LEN + 1];
while(n--)
{
rand_str(str);
hash_table_insert(str, n);
}
hash_table_print();
printf("\nlookup testing..........\n");
HashNode* pNode = hash_table_lookup(key1);
printf("lookup result:%d\n", pNode->nValue);
pNode = hash_table_lookup(key2);
printf("lookup result:%d\n", pNode->nValue);
printf("\nremove testing..........\n");
printf("before remove %s:\n", key3);
hash_table_print();
hash_table_remove(key3);
printf("after remove:\n");
hash_table_print();
hash_table_release();
system("pause");
return 0;
}