【数据结构】哈希桶

  在上一篇博客中,我们简单地介绍了哈希表,以及解决哈希冲突的办法;今天我们介绍解决哈希冲突的另一种方法——开散列法。开散列法又叫链地址法,首先对关键码集合用散列函数计算散列地址,具有相同地址的关键码归于同一子集合,每一个子集合称为一个桶,各个桶中的元素通过一个单链表链接起来,各链表的头结点组成一个向量。说起来可能很复杂,其实很简单,看一个图就明白了:假设哈希函数为Hash(key)=key%10
这里写图片描述

  • 假如哈希函数被破解了,可能会对哈希桶进行攻击,从而将哈希桶退化成单链表,此时查找效率将会大大降低,我们可以把单链表换成红黑树,提高查找效率。
  • 当哈希桶的个数与插入的结点个数相等时,我们就增加容量。

以下是我实现的哈希桶:

// 获得一个适合的素数
const int _PrimeSize = 28;//素数表
static const unsigned long _PrimeList[_PrimeSize] =
{
    53ul, 97ul, 193ul, 389ul, 769ul,
    1543ul, 3079ul, 6151ul, 12289ul, 24593ul,
    49157ul, 98317ul, 196613ul, 393241ul, 786433ul,
    1572869ul, 3145739ul, 6291469ul, 12582917ul, 25165843ul,
    50331653ul, 100663319ul, 201326611ul, 402653189ul, 805306457ul,
    1610612741ul, 3221225473ul, 4294967291ul
};
size_t GetNextPrime(size_t num)
{
    for (size_t i = 0; i < _PrimeSize; i++)
    {
        if (_PrimeList[i]>num)
            return _PrimeList[i];
    }
    return _PrimeList[_PrimeSize - 1];
}

template <class K>
class KeyToIntDef
{
public:
    size_t operator()(const K& key)
    {
        return key;
    }
};
// 将字符串转换成整型
static size_t BKDRHash(const char * str)
{
    unsigned int seed = 131; // 31 131 1313 13131 131313
    unsigned int hash = 0;
    while (*str)
    {
        hash = hash * seed + (*str++);
    }
    size_t ret = (hash & 0x7FFFFFFF);
    return ret;
}
class StringToInt
{
public:
    size_t operator()(const string& key)
    {
        return BKDRHash(key.c_str());
    }
};


#include <iostream>
#include <vector>
#include <string>
using namespace std;
template <class K, class V>
struct HashNode//结点
{
    pair<K, V> _kv;
    HashNode<K, V>* _pNext;
    HashNode(const pair<K, V>& kv)
        : _kv(kv)
        , _pNext(NULL)
    {}
};

template <class K, class V, class KeyToInt>
class HashTable;//前置声明
//迭代器
template <class K, class V, class KeyToInt = KeyToIntDef<K>>
struct HashTableIterator
{
    typedef HashNode<K, V> Node;
    typedef Node* PNode;
    typedef HashTableIterator<K, V, KeyToInt> Self;

    PNode _pCur;
    HashTable<K, V, KeyToInt>* _ht;
public:
    HashTableIterator()
        : _pCur(NULL)
        , _ht(NULL)
    {}

    HashTableIterator(const PNode pCur, HashTable<K, V, KeyToInt>* ht)
        : _pCur(pCur)
        , _ht(ht)
    {}

    HashTableIterator(Self& s)
        : _pCur(s._pCur)
        , _ht(s._ht)
    {}

    Self& operator++()
    {
        Next();
        return *this;
    }

    Self operator++(int)
    {
        HashTableIterator temp(*this);
        Next();
        return temp;
    }

    pair<K, V>& operator*()
    {
        return _pCur->_kv;
    }

    pair<K, V>* operator->()
    {
        return &(operator*());
    }

    bool operator==(const Self& s)
    {
        return _pCur == s._pCur;
    }

    bool operator!=(const Self& s)
    {
        return _pCur != s._pCur;
    }
private:
    void Next()
    {
        if (_pCur->_pNext)
            _pCur = _pCur->_pNext;
        else
        {//找下一个非空桶
            size_t bucketNo = _ht->HashFunc(_pCur->_kv.first) + 1;
            for (; bucketNo < _ht->_hashTable.capacity(); bucketNo++)
            {
                if (_ht->_hashTable[bucketNo])
                {
                    _pCur = _ht->_hashTable[bucketNo];
                    return;
                }
            }
            _pCur = NULL;//没有找到
        }
        return;
    }
};
//哈希桶
template <class K, class V, class KeyToInt = KeyToIntDef<K>>
class HashTable
{
    typedef HashNode<K, V> Node;
    typedef Node* PNode;
    friend HashTableIterator<K, V, KeyToInt>;
public:
    typedef HashTableIterator<K, V, KeyToInt> Iterator;
public:
    HashTable(size_t capacity = 10)
    {
        capacity = GetNextPrimer(capacity);
        _hashTable.resize(capacity);
        _size = 0;
    }

    Iterator Begin()
    {
        for (size_t bucketNo = 0; bucketNo < _hashTable.capacity(); bucketNo++)
        {
            if (_hashTable[bucketNo])
                return Iterator(_hashTable[bucketNo], this);
        }
        return Iterator(NULL, this);
    }

    Iterator End()
    {
        return Iterator(NULL, this);
    }
//////////////////////////插入,查找及删除/////////////////////////////////
    pair<Iterator, bool> InsertEqual(const pair<K, V>& kv)//允许重复
    {
        CheckCapacity();
        size_t bucketNo = HashFunc(kv.first);
        PNode pNewNode = new Node(kv);
        pNewNode->_pNext = _hashTable[bucketNo];//头插
        _hashTable[bucketNo] = pNewNode;
        _size++;
        return make_pair(Iterator(pNewNode, this), true);
    }

    pair<Iterator, bool> InsertUnique(const pair<K, V>& kv)//key值唯一
    {
        CheckCapacity();
        size_t bucketNo = HashFunc(kv.first);
        PNode pCur = _hashTable[bucketNo];
        while (pCur)
        {
            if (kv.first == pCur->_kv.first)
                return make_pair(Iterator(pCur, this), false);
            pCur = pCur->_pNext;
        }
        pCur = new Node(kv);
        pCur->_pNext = _hashTable[bucketNo];
        _hashTable[bucketNo] = pCur;
        _size++;
        return make_pair(Iterator(pCur, this), true);
    }

    Iterator Find(const K& key)
    {
        size_t bucketNo = HashFunc(key);
        PNode pCur = _hashTable[bucketNo];

        while (pCur)
        {
            if (pCur->_kv.first == key)
                return Iterator(pCur, this);
            pCur = pCur->_pNext;
        }
        return Iterator(NULL, this);
    }

    Iterator Erase(Iterator pos)//key值唯一
    {
        if (pos._pCur == NULL)
            return Iterator(NULL, this);
        size_t key = pos._pCur->_kv.first;
        size_t bucketNo = HashFunc(key);
        PNode pCur = _hashTable[bucketNo];
        PNode pPre = NULL;
        while (pCur)
        {
            if (pCur->_kv.first == key)
            {
                if (_hashTable[bucketNo] == pCur)//pCur是首元素结点
                    _hashTable[bucketNo] = pCur->_pNext;
                else
                    pPre->_pNext = pCur->_pNext;
                delete pCur;
                pCur = NULL;
                _size--;
                return Iterator(pPre, this);
            }
            else
            {
                pPre = pCur;
                pCur = pCur->_pNext;
            }
        }
        return Iterator(NULL, this);
    }

    size_t Erase(const K& key)//key值可以重复
    {
        size_t oldsize = _size;
        size_t bucketNo = HashFunc(key);
        PNode pCur = _hashTable[bucketNo];
        PNode pPre = NULL;
        while (pCur)
        {
            if (pCur->_kv.first == key)
            {
                if (pCur == _hashTable[bucketNo])
                {
                    _hashTable[bucketNo] = pCur->_pNext;
                    delete pCur;
                    pCur = _hashTable[bucketNo];
                }
                else
                {
                    pPre->_pNext = pCur->_pNext;
                    delete pCur;
                    pCur = pPre->_pNext;
                }
                _size--;
            }
            else
            {
                pPre = pCur;
                pCur = pPre->_pNext;
            }
        }
        if (oldsize == _size)
            return 0;
        else
            return oldsize - _size;
    }
///////////////////////////其它常用函数///////////////////////////////
    size_t Count(const K key)//值为key的元素个数
    {
        size_t count = 0;
        size_t bucketNo = HashFunc(key);
        PNode pCur = _hashTable[bucketNo];
        while (pCur)
        {
            if (pCur->_kv.first == key)
                count++;
            pCur = pCur->_pNext;
        }
        return count;
    }

    size_t BucketCount()const//桶的个数
    {
        return _hashTable.capacity();
    }

    size_t BucketSize(size_t bucketNo)const//桶内元素个数
    {
        size_t count = 0;
        PNode pCur = _hashTable[bucketNo];
        while (pCur)
        {
            count++;
            pCur = pCur->_pNext;
        }
        return count;
    }

    V& FindORInsert(const K& key)//查找值为key,如果找到了,返回对应的value,
                                 //如果没有找到插入该结点,返回缺省的value
    {
        Iterator ret = InsertUnique(make_pair(key, V())).first;
        return (*ret).second;
    }

    bool Empty()const//是否为空
    {
        return _size == 0;
    }

    size_t Size()const//插入的总数
    {
        return _size;
    }

    void Clear()//清空
    {
        for (size_t bucketNo = 0; bucketNo < _hashTable.capacity(); bucketNo++)
        {
            PNode pCur = _hashTable[bucketNo];
            while (pCur)
            {
                _hashTable[bucketNo] = pCur->_pNext;
                delete pCur;
                pCur = _hashTable[bucketNo];
                _size--;
            }
        }
    }

    ~HashTable()
    {
        Clear();
    }
private:
    size_t HashFunc(const K& key)//哈希函数
    {
        return KeyToInt()(key) % _hashTable.capacity();
    }

    void CheckCapacity()//扩容
    {
        size_t capacity = _hashTable.capacity();
        if (_size == capacity)
        {
            HashTable<K, V, KeyToInt> newHt(GetNextPrime(capacity));
            for (size_t bucketNo = 0; bucketNo < capacity; bucketNo++)
            {
                PNode pCur = _hashTable[bucketNo];
                while (pCur)
                {
                    newHt.InsertEqual(pCur->_kv);
                    pCur = pCur->_pNext;
                }
            }
            _hashTable.swap(newHt._hashTable);
        }
    }
private:
    vector<PNode> _hashTable;
    size_t _size;
};

void test()
{
    HashTable<int, int> ht;
    ht.InsertEqual(make_pair(1, 1));
    ht.InsertEqual(make_pair(5, 5));
    ht.InsertEqual(make_pair(15, 15));
    ht.InsertEqual(make_pair(15, 15));
    ht.InsertEqual(make_pair(35, 35));
    ht.InsertEqual(make_pair(9, 9));
    HashTable<int, int>::Iterator it = ht.Begin();
    if (!ht.Empty())
    {
        while (it != ht.End())
        {
            cout << it->first << " " ;
            cout << (*it).second << endl;
            it++;
        }
        cout << endl;
        cout << ht.BucketSize(5) << endl;
        cout << ht.BucketCount() << endl;
        cout << ht.Count(15) << endl;
    }
    it = ht.Begin();
    cout << ht.Erase(15) << endl;
    HashTable<int, int>::Iterator ret = ht.Find(1);
    ht.Erase(ret);
    cout << ht.Size() << endl;
    ht.Clear();

    HashTable<string, string, StringToInt> ht1;
    ht1.InsertUnique(make_pair("111", "111"));
    ht1.InsertUnique(make_pair("111", "111"));
    cout << ht1.FindORInsert("111") << endl;
}
相关文章
相关标签/搜索