hash表

哈希映射函数

/* BKDR Hash Function */
unsigned int BKDR_hash(char *str)
{
    unsigned int seed = 131; // 31 131 1313 13131 131313 etc..
    unsigned int hash = 0;
    while (*str)
    {
        hash = hash * seed + (*str++);
    }
    return (hash & 0x7FFFFFFF);
}

哈希解决冲突

开放定址法

  • 线性探测再散列:顺序查看表中下一单元,直到找出一个空单元或查遍全表。
  • 二次探测再散列:冲突发生时,在表的左右进行跳跃式探测,比较灵活
  • 伪随机探测再散列:建立一个伪随机数发生器,(如i=(i+p) % m),并给定一个随机数做起点。
  • 实例:已知哈希表长度m=11,哈希函数为:H(key)= key % 11,则H(47)=3,H(26)=4,H(60)=5,假设下一个关键字为69,则H(69)=3,与47冲突。1)如果用线性探测再散列处理冲突,下一个哈希地址为H1=(3 + 1)% 11 = 4,仍然冲突,再找下一个哈希地址为H2=(3 + 2)% 11 = 5,还是冲突,继续找下一个哈希地址为H3=(3 + 3)% 11 = 6,此时不再冲突,将69填入5号单元。 2)如果用二次探测再散列处理冲突,下一个哈希地址为H1=(3 + 12)% 11 = 4,仍然冲突,再找下一个哈希地址为H2=(3 - 12)% 11 = 2,此时不再冲突,将69填入2号单元。3)如果用伪随机探测再散列处理冲突,且伪随机数序列为:2,5,9,……..,则下一个哈希地址为H1=(3 + 2)% 11 = 5,仍然冲突,再找下一个哈希地址为H2=(3 + 5)% 11 = 8,此时不再冲突,将69填入8号单元。

再哈希法

  • 同时构造多个不同的哈希函数,这种方法不易产生聚集,但增加了计算时间。

链地址法(拉链法)

  • 基本思想是将所有哈希地址为i的元素构成一个称为同义词链的单链表,并将单链表的头指针存在哈希表的第i个单元中,因而查找、插入和删除主要在同义词链中进行。链地址法适用于经常进行插入和删除的情况。

建立公共溢出区

  • 将哈希表分为基本表和溢出表两部分,凡是和基本表发生冲突的元素,一律填入溢出表。

hash表代码

c++

/*
    因为类的申明和定义分开来分别在.h和.cpp文件,我这边编译错误,所以放在了一起。
*/
//--------simpleHashTable.h-------
#pragma once
#include <iostream>
using namespace std;

typedef int KeyType;
#define NULLKEY -1

struct Entry{
    KeyType _key;
    int        _value;
    Entry(KeyType key=NULLKEY, int value=0):_key(key),_value(value){}
};

class hashTable{
public:
    hashTable();
    //hashTable(int tableSize);
    ~hashTable();

    bool find(const Entry&  e);
    bool insert(const Entry& e);
    bool remove(const Entry& e);
    void clear();
    Entry& operator[](KeyType key);//重载下标操作;当找不到key对应的Entry时,插入Entry(key,0)
    int size();
    void display();

protected:
    int hashFunction(KeyType key);//将键值映射到对应地址
    void rehash();//调整hashTable大小
    bool find(const KeyType& k);//按键值查找
    int nextPrime();//p(n) = n^2 - n + 41, n<41, p<1681

private:
    Entry *_pTable;
    int _pos;//当前访问元素的位置
    int _size;
    int _capacity;
    int primeIndex;
};

hashTable::hashTable()
{
    _capacity = 3;//初始化hashTable容量为3,便于观察rehash过程
    _pTable = new Entry[_capacity];
    _size = 0;
    primeIndex = 1;
}

//hashTable::hashTable(int tableSize)
//{
//
//}

hashTable::~hashTable()
{
    clear();
}

int hashTable::nextPrime()
{
    int p = std::pow(static_cast<float>(primeIndex),2) - primeIndex + 41;
    primeIndex = primeIndex << 1;
    if(primeIndex >= 41){
        cout << "Max capacity reached. exit!" << endl;
        exit(-1);
    }
    return p;
}

bool hashTable::find(const Entry&  e)
{
    return(find(e._key));
}

bool hashTable::find(const KeyType& k)
{
    _pos = hashFunction(k);
    if(_pTable[_pos]._key==NULLKEY)
        return false;
    int lastPos = _pos;
    while(_pTable[_pos]._key!=k){
        if(++_pos%_capacity == lastPos)
            return false;
    }
    return true;
}

bool hashTable::insert(const Entry& e)
{
    if((_size*1.0)/_capacity>0.75)
        rehash();//[OK]插入操作前,需要判断hash table是否需要扩容
    if(find(e))
        return false;
    _pTable[_pos] = e;
    ++_size;
    return true;
}

bool hashTable::remove(const Entry& e)
{
    if(!find(e))
        return false;
    _pTable[_pos]._key = NULLKEY;
    --_size;
    //rehash();//移除操作后,需要判断hash table是否需要缩容
    return true;
}

void hashTable::clear()
{
    delete []_pTable;
    _size = _capacity = 0;
}

Entry& hashTable::operator[](KeyType key)
{
    if(!find(key))
        insert(Entry(key,0));
    return _pTable[_pos];
}

int hashTable::size()
{
    return _size;
}

int hashTable::hashFunction(KeyType key)
{
    return key%_capacity;
}

void hashTable::display()
{
    cout << "capacity = " << _capacity << ", size = " << _size << endl;
    for(int i=0; i<_capacity; i++){
        if(_pTable[i]._key!=NULLKEY)
            cout << "key=" << _pTable[i]._key << ",value=" << _pTable[i]._value << endl;
    }
}

void hashTable::rehash()
{    
    cout << "begin rehash..." << endl;
    Entry *p = new Entry[_size];//用来暂存原哈希表
    for(int i=0; i<_capacity; i++){//i<_size不对;元素散列在容量为_capacity的hashTable中
        if(_pTable[i]._key != NULLKEY)
            *(p+i) = _pTable[i];
    }
    delete []_pTable;
    int lastSize = _size;
    _size = 0;    

    _capacity = nextPrime();
    _pTable = new Entry[_capacity];
    for(int i=0; i<lastSize; i++)
        insert(*(p+i));
    delete []p;        
}

//------testbench.cpp------
#include <iostream>
#include "simpleHashTable.h"

using namespace std;

int main()
{
    hashTable *pTable = new hashTable;
    cout << "insert Entry(1,11)..." << endl;
    pTable->insert(Entry(1,11));
    pTable->display();
    cout << "insert Entry(2,22)..." << endl;
    pTable->insert(Entry(2,22));
    pTable->display();
    cout << "insert Entry(3,33)..." << endl;
    pTable->insert(Entry(3,33));
    pTable->display();
    cout << "insert Entry(4,44)..." << endl;
    //pTable->insert(Entry(4,44));
    (*pTable)[4]._value = 44;//下标操作,返回值充当左值
    pTable->display();

    cout << endl << "delete Entry(1,11)..." << endl;
    pTable->remove(Entry(1,11));
    pTable->display();
    cout << "delete Entry(2,22)..." << endl;
    pTable->remove(Entry(2,22));
    pTable->display();
    cout << "delete Entry(3,33)..." << endl;
    pTable->remove(Entry(3,33));
    pTable->display();

    cout << "delete Entry(3,33)..." << endl;
    pTable->remove(Entry(3,33));
    pTable->display();

    delete pTable;
    getchar();
    return 0;
}

c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <time.h>

/*=================hash table start=========================================*/

#define HASH_TABLE_MAX_SIZE 10000
typedef struct HashNode_Struct HashNode;

struct HashNode_Struct
{
    char* sKey;
    int nValue;
    HashNode* pNext;
};

HashNode* hashTable[HASH_TABLE_MAX_SIZE]; //hash table data strcutrue
int hash_table_size;  //the number of key-value pairs in the hash table!

//initialize hash table
void hash_table_init()
{
    hash_table_size = 0;
    memset(hashTable, 0, sizeof(HashNode*) * HASH_TABLE_MAX_SIZE);
}


//string hash function
unsigned int hash_table_hash_str(const char* skey)
{
    const signed char *p = (const signed char*)skey;
    unsigned int h = *p;
    if(h)
    {
        for(p += 1; *p != '\0'; ++p)
            h = (h << 5) - h + *p;    //h = h*31+*p
    }
    return h;
}

//insert key-value into hash table
void hash_table_insert(const char* skey, int nvalue)
{
    if(hash_table_size >= HASH_TABLE_MAX_SIZE)
    {
        printf("out of hash table memory!\n");
        return;
    }

    unsigned int pos = hash_table_hash_str(skey) % HASH_TABLE_MAX_SIZE;

    HashNode* pHead =  hashTable[pos];
    while(pHead)
    {
        if(strcmp(pHead->sKey, skey) == 0) //strcmp(str1, str2):1)相等返回0;2)str1<str2返回负数;3)str1>str2返回正数
        {
            printf("%s already exists!\n", skey);
            return ;
        }
        pHead = pHead->pNext;
    }

    HashNode* pNewNode = (HashNode*)malloc(sizeof(HashNode));
    memset(pNewNode, 0, sizeof(HashNode));
    pNewNode->sKey = (char*)malloc(sizeof(char) * (strlen(skey) + 1));
    strcpy(pNewNode->sKey, skey);
    pNewNode->nValue = nvalue;

    pNewNode->pNext = hashTable[pos];
    hashTable[pos] = pNewNode;


    hash_table_size++;
}
//remove key-value from the hash table
void hash_table_remove(const char* skey)
{
    unsigned int pos = hash_table_hash_str(skey) % HASH_TABLE_MAX_SIZE;
    if(hashTable[pos])
    {
        HashNode* pHead = hashTable[pos];
        HashNode* pLast = NULL;
        HashNode* pRemove = NULL;
        while(pHead)
        {
            if(strcmp(skey, pHead->sKey) == 0)
            {
                pRemove = pHead;
                break;
            }
            pLast = pHead;
            pHead = pHead->pNext;
        }
        if(pRemove)
        {
            if(pLast)
                pLast->pNext = pRemove->pNext;
            else
                hashTable[pos] = NULL;

            free(pRemove->sKey);
            free(pRemove);
        }
    }
}

//lookup a key in the hash table
HashNode* hash_table_lookup(const char* skey)
{
    unsigned int pos = hash_table_hash_str(skey) % HASH_TABLE_MAX_SIZE;
    if(hashTable[pos])
    {
        HashNode* pHead = hashTable[pos];
        while(pHead)
        {
            if(strcmp(skey, pHead->sKey) == 0)
                return pHead;
            pHead = pHead->pNext;
        }
    }
    return NULL;
}

//print the content in the hash table
void hash_table_print()
{
    printf("===========content of hash table=================\n");
    int i;
    for(i = 0; i < HASH_TABLE_MAX_SIZE; ++i)
        if(hashTable[i])
        {
            HashNode* pHead = hashTable[i];
            printf("%d=>", i);
            while(pHead)
            {
                printf("%s:%d  ", pHead->sKey, pHead->nValue);
                pHead = pHead->pNext;
            }
            printf("\n");
        }
}

//free the memory of the hash table
void hash_table_release()
{
    int i;
    for(i = 0; i < HASH_TABLE_MAX_SIZE; ++i)
    {
        if(hashTable[i])
        {
            HashNode* pHead = hashTable[i];
            while(pHead)
            {
                HashNode* pTemp = pHead;
                pHead = pHead->pNext;
                if(pTemp)
                {
                    free(pTemp->sKey);
                    free(pTemp);
                }

            }
        }
    }
}

/* ===============================hash table end=========================*/


/* ============================test function ============================*/
#define MAX_STR_LEN 20
#define MIN_STR_LEN 10
void rand_str(char r[])
{
    int i;
    int len = MIN_STR_LEN + rand() % (MAX_STR_LEN - MIN_STR_LEN);
    for(i = 0; i < len - 1; ++i)
        r[i] = 'a' + rand() % ( 'z' - 'a');
    r[len - 1] = '\0';
}

int main(int argc, char** argv)
{
    srand(time(NULL));
    hash_table_init();
    printf("insert testing.........\n");
    int n = 10;
    const char *key1 = "aaammd";
    const char *key2 = "xzzyym";
    const char *key3 = "cdcded";

    hash_table_insert(key1, 110);
    hash_table_insert(key2, 220);
    hash_table_insert(key3, 330);
    char str[MAX_STR_LEN + 1];
    while(n--)
    {
        rand_str(str);
        hash_table_insert(str, n);
    }
    hash_table_print();

    printf("\nlookup testing..........\n");
    HashNode* pNode = hash_table_lookup(key1);
    printf("lookup result:%d\n", pNode->nValue);
    pNode = hash_table_lookup(key2);
    printf("lookup result:%d\n", pNode->nValue);

    printf("\nremove testing..........\n");
    printf("before remove %s:\n", key3);
    hash_table_print();
    hash_table_remove(key3);
    printf("after remove:\n");
    hash_table_print();
    hash_table_release();

    system("pause");
    return 0;
}

参考链接