//////////////////////////////////////////////////////////////////////////////// // Author: Andy Rushton // Copyright: (c) Southampton University 1999-2004 // (c) Andy Rushton 2004 onwards // License: BSD License, see ../docs/license.html //////////////////////////////////////////////////////////////////////////////// #include namespace stlplus { //////////////////////////////////////////////////////////////////////////////// // the element stored in the hash template class hash_element { public: master_iterator, hash_element > m_master; std::pair m_value; hash_element* m_next; unsigned m_hash; hash_element(const hash* owner, const K& key, const T& data, unsigned hash) : m_master(owner,this), m_value(key,data), m_next(0), m_hash(hash) { } hash_element(const hash* owner, const std::pair& value, unsigned hash) : m_master(owner,this), m_value(value), m_next(0), m_hash(hash) { } ~hash_element(void) { m_next = 0; m_hash = 0; } const hash* owner(void) const { return m_master.owner(); } // generate the bin number from the hash value and the owner's number of bins unsigned bin(void) const { return m_hash % (owner()->m_bins); } }; //////////////////////////////////////////////////////////////////////////////// // iterator // null constructor template hash_iterator::hash_iterator(void) { } // non-null constructor used from within the hash to construct a valid iterator template hash_iterator::hash_iterator(hash_element* element) : safe_iterator,hash_element >(element->m_master) { } // constructor used to create an end iterator template hash_iterator::hash_iterator(const hash* owner) : safe_iterator,hash_element >(owner) { } template hash_iterator::hash_iterator(const safe_iterator, hash_element >& iterator) : safe_iterator,hash_element >(iterator) { } // destructor template hash_iterator::~hash_iterator(void) { } // mode conversions template typename hash_iterator::const_iterator hash_iterator::constify(void) const { return hash_iterator >(*this); } template typename hash_iterator::iterator hash_iterator::deconstify(void) const { return hash_iterator >(*this); } // increment operator looks for the next element in the table // if there isn't one, then this becomes an end() iterator with m_bin = m_bins template typename hash_iterator::this_iterator& hash_iterator::operator ++ (void) { this->assert_valid(); if (this->node()->m_next) this->set(this->node()->m_next->m_master); else { // failing that, subsequent hash values are tried until either an element is found or there are no more bins // in which case it becomes an end() iterator hash_element* element = 0; unsigned current_bin = this->node()->bin(); for(current_bin++; !element && (current_bin < this->owner()->m_bins); current_bin++) element = this->owner()->m_values[current_bin]; if (element) this->set(element->m_master); else this->set_end(); } return *this; } // post-increment is defined in terms of pre-increment template typename hash_iterator::this_iterator hash_iterator::operator ++ (int) { hash_iterator old(*this); ++(*this); return old; } // two iterators are equal if they point to the same element // both iterators must be non-null and belong to the same table template bool hash_iterator::operator == (const hash_iterator& r) const { return this->equal(r); } template bool hash_iterator::operator != (const hash_iterator& r) const { return !operator==(r); } template bool hash_iterator::operator < (const hash_iterator& r) const { return this->compare(r) < 0; } // iterator dereferencing is only legal on a non-null iterator template V& hash_iterator::operator*(void) const { this->assert_valid(); return this->node()->m_value; } template V* hash_iterator::operator->(void) const { return &(operator*()); } //////////////////////////////////////////////////////////////////////////////// // hash // totally arbitrary initial size used for auto-rehashed tables static unsigned hash_default_bins = 127; // constructor // tests whether the user wants auto-rehash // sets the rehash point to be a loading of 1.0 by setting it to the number of bins // uses the user's size unless this is zero, in which case implement the default template hash::hash(unsigned bins) : m_rehash(bins), m_bins(bins > 0 ? bins : hash_default_bins), m_size(0), m_values(0) { m_values = new hash_element*[m_bins]; for (unsigned i = 0; i < m_bins; i++) m_values[i] = 0; } template hash::~hash(void) { // delete all the elements clear(); // and delete the data structure delete[] m_values; m_values = 0; } // as usual, implement the copy constructor i.t.o. the assignment operator template hash::hash(const hash& right) : m_rehash(right.m_rehash), m_bins(right.m_bins), m_size(0), m_values(0) { m_values = new hash_element*[right.m_bins]; // copy the rehash behaviour as well as the size for (unsigned i = 0; i < m_bins; i++) m_values[i] = 0; *this = right; } // assignment operator // this is done by copying the elements // the source and target hashes can be different sizes // the hash is self-copy safe, i.e. it is legal to say x = x; template hash& hash::operator = (const hash& r) { // make self-copy safe if (&r == this) return *this; // remove all the existing elements clear(); // copy the elements across - remember that this is rehashing because the two // tables can be different sizes so there is no quick way of doing this by // copying the lists for (hash_iterator > i = r.begin(); i != r.end(); ++i) insert(i->first, i->second); return *this; } // number of values in the hash template bool hash::empty(void) const { return m_size == 0; } template unsigned hash::size(void) const { return m_size; } // equality template bool hash::operator == (const hash& right) const { // this table is the same as the right table if they are the same table! if (&right == this) return true; // they must be the same size to be equal if (m_size != right.m_size) return false; // now every key in this must be in right and have the same data for (hash_iterator > i = begin(); i != end(); i++) { hash_element* found = right._find_element(i->first); if (found == 0) return false; if (!(i->second == found->m_value.second)) return false; // hash_iterator > found = right.find(i->first); // if (found == right.end()) return false; // if (!(i->second == found->second)) return false; } return true; } // set up the hash to auto-rehash at a specific size // setting the rehash size to 0 forces manual rehashing template void hash::auto_rehash(void) { m_rehash = m_bins; } template void hash::manual_rehash(void) { m_rehash = 0; } // the rehash function // builds a new hash table and moves the elements (without copying) from the old to the new // I store the un-modulused hash value in the element for more efficient rehashing // passing 0 to the bins parameter does auto-rehashing // passing any other value forces the number of bins template void hash::rehash(unsigned bins) { // user specified size: just take the user's value // auto calculate: if the load is high, increase the size; else do nothing unsigned new_bins = bins ? bins : m_bins; if (bins == 0 && m_size > 0) { // these numbers are pretty arbitrary // TODO - make them user-customisable? float load = loading(); if (load > 2.0) new_bins = (unsigned)(m_bins * load); else if (load > 1.0) new_bins = m_bins * 2; } if (new_bins == m_bins) return; // set the new rehashing point if auto-rehashing is on if (m_rehash) m_rehash = new_bins; // move aside the old structure hash_element** old_values = m_values; unsigned old_bins = m_bins; // create a replacement structure m_values = new hash_element*[new_bins]; for (unsigned i = 0; i < new_bins; i++) m_values[i] = 0; m_bins = new_bins; // move all the old elements across, rehashing each one for (unsigned j = 0; j < old_bins; j++) { while(old_values[j]) { // unhook from the old structure hash_element* current = old_values[j]; old_values[j] = current->m_next; // rehash using the stored hash value unsigned bin = current->bin(); // hook it into the new structure current->m_next = m_values[bin]; m_values[bin] = current; } } // now delete the old structure delete[] old_values; } // the loading is the average number of elements per bin // this simplifies to the total elements divided by the number of bins template float hash::loading(void) const { return (float)m_size / (float)m_bins; } // remove all elements from the table template void hash::erase(void) { // unhook the list elements and destroy them for (unsigned i = 0; i < m_bins; i++) { hash_element* current = m_values[i]; while(current) { hash_element* next = current->m_next; delete current; current = next; } m_values[i] = 0; } m_size = 0; } // test for whether a key is present in the table template bool hash::present(const K& key) const { return _find_element(key) != 0; } template typename hash::size_type hash::count(const K& key) const { return present() ? 1 : 0; } // add a key and data element to the table - defined in terms of the general-purpose pair insert function template typename hash::iterator hash::insert(const K& key, const T& data) { return insert(std::pair(key,data)).first; } // insert a key/data pair into the table // this removes any old value with the same key since there is no multihash functionality template std::pair::iterator, bool> hash::insert(const std::pair& value) { // if auto-rehash is enabled, implement the auto-rehash before inserting the new value // the table is rehashed if this insertion makes the loading exceed 1.0 if (m_rehash && (m_size >= m_rehash)) rehash(); // calculate the new hash value unsigned hash_value_full = H()(value.first); unsigned bin = hash_value_full % m_bins; bool inserted = true; // unhook any previous value with this key // this has been inlined from erase(key) so that the hash value is not calculated twice hash_element* previous = 0; for (hash_element* current = m_values[bin]; current; previous = current, current = current->m_next) { // first check the full stored hash value if (current->m_hash != hash_value_full) continue; // next try the equality operator if (!E()(current->m_value.first, value.first)) continue; // unhook this value and destroy it if (previous) previous->m_next = current->m_next; else m_values[bin] = current->m_next; delete current; m_size--; // we've overwritten a previous value inserted = false; // assume there can only be one match so we can give up now break; } // now hook in a new list element at the start of the list for this hash value hash_element* new_item = new hash_element(this, value, hash_value_full); new_item->m_next = m_values[bin]; m_values[bin] = new_item; // increment the size count m_size++; // construct an iterator from the list node, and return whether inserted return std::make_pair(hash_iterator >(new_item), inserted); } // insert a key with an empty data field ready to be filled in later template typename hash::iterator hash::insert(const K& key) { return insert(key,T()); } // remove a key from the table - return true if the key was found and removed, false if it wasn't present template unsigned hash::erase(const K& key) { unsigned hash_value_full = H()(key); unsigned bin = hash_value_full % m_bins; // scan the list for an element with this key // need to keep a previous pointer because the lists are single-linked hash_element* previous = 0; for (hash_element* current = m_values[bin]; current; previous = current, current = current->m_next) { // first check the full stored hash value if (current->m_hash != hash_value_full) continue; // next try the equality operator if (!E()(current->m_value.first, key)) continue; // found this key, so unhook the element from the list if (previous) previous->m_next = current->m_next; else m_values[bin] = current->m_next; // destroy it delete current; // remember to maintain the size count m_size--; return 1; } return 0; } // remove an element from the hash table using an iterator (std::map equivalent) template typename hash::iterator hash::erase(typename hash::iterator it) { // work out what the next iterator is in order to return it later typename hash::iterator next(it); ++next; // we now need to find where this item is - made difficult by the use of // single-linked lists which means I have to search through the bin from // the top in order to unlink from the list. unsigned hash_value_full = it.node()->m_hash; unsigned bin = hash_value_full % m_bins; // scan the list for this element // need to keep a previous pointer because the lists are single-linked hash_element* previous = 0; for (hash_element* current = m_values[bin]; current; previous = current, current = current->m_next) { // direct test on the address of the element if (current != it.node()) continue; // found this iterator, so unhook the element from the list if (previous) previous->m_next = current->m_next; else m_values[bin] = current->m_next; // destroy it delete current; current = 0; // remember to maintain the size count m_size--; break; } return next; } template void hash::clear(void) { erase(); } // search for a key in the table and return an iterator to it // if the search fails, returns an end() iterator // Note that ALL hash functions that use iterators are **NOT** thread safe!!! // This is due to the usage of a reference counted master iterator. template typename hash::const_iterator hash::find(const K& key) const { hash_element* found = _find_element(key); return found ? hash_iterator >(found) : end(); } template typename hash::iterator hash::find(const K& key) { hash_element* found = _find_element(key); return found ? hash_iterator >(found) : end(); } // table lookup by key using the index operator[], returning a reference to the data field, not an iterator // this is rather like the std::map's [] operator // the difference is that I have a const and non-const version // the const version will not create the element if not present already, but the non-const version will // the non-const version is compatible with the behaviour of the map template const T& hash::operator[] (const K& key) const { // this const version cannot change the hash, so has to raise an exception if the key is missing hash_element* found = _find_element(key); if (!found) throw std::out_of_range("key not found in stlplus::hash::operator[]"); return found->m_value.second; } template T& hash::operator[] (const K& key) { // this non-const version can change the hash, so creates a new element if the key is missing hash_element* found = _find_element(key); return found ? found->m_value.second : insert(key)->second; } // Thread-safe (doesn't use iterators), fast const access to the hash value at a given key. template const T& hash::at(const K& key) const { // this const version cannot change the hash, so has to raise an exception if the key is missing hash_element* found = _find_element(key); if (!found) throw std::out_of_range("key not found in stlplus::hash::at"); return found->m_value.second; } // Thread-safe (doesn't use iterators), fast const pointer access to the hash value at a given key. // This will not throw, instead returning a null pointer if the value is not found. template const T* hash::at_pointer(const K& key) const { hash_element* found = _find_element(key); return found ? &(found->m_value.second) : 0; } // iterators template typename hash::const_iterator hash::begin(void) const { // find the first element for (unsigned bin = 0; bin < m_bins; bin++) if (m_values[bin]) return hash_iterator >(m_values[bin]); // if the hash is empty, return the end iterator return end(); } template typename hash::iterator hash::begin(void) { // find the first element for (unsigned bin = 0; bin < m_bins; bin++) if (m_values[bin]) return hash_iterator >(m_values[bin]); // if the hash is empty, return the end iterator return end(); } template typename hash::const_iterator hash::end(void) const { return hash_iterator >(this); } template typename hash::iterator hash::end(void) { return hash_iterator >(this); } template void hash::debug_report(std::ostream& str) const { // calculate some stats first unsigned occupied = 0; unsigned min_in_bin = m_size; unsigned max_in_bin = 0; for (unsigned i = 0; i < m_bins; i++) { if (m_values[i]) occupied++; unsigned count = 0; for (hash_element* item = m_values[i]; item; item = item->m_next) count++; if (count > max_in_bin) max_in_bin = count; if (count < min_in_bin) min_in_bin = count; } // now print the table str << "------------------------------------------------------------------------" << std::endl; str << "| size: " << m_size << std::endl; str << "| bins: " << m_bins << std::endl; str << "| loading: " << loading() << " "; if (m_rehash) str << "auto-rehash at " << m_rehash << std::endl; else str << "manual rehash" << std::endl; str << "| occupied: " << occupied << std::fixed << " (" << (100.0*(float)occupied/(float)m_bins) << "%)" << std::scientific << ", min = " << min_in_bin << ", max = " << max_in_bin << std::endl; str << "|-----------------------------------------------------------------------" << std::endl; str << "| bin 0 1 2 3 4 5 6 7 8 9" << std::endl; str << "| ---------------------------------------------------------------"; for (unsigned j = 0; j < m_bins; j++) { if (j % 10 == 0) { str << std::endl; str << "| " << std::setw(6) << std::right << (j/10*10) << std::left << " |"; } unsigned count = 0; for (hash_element* item = m_values[j]; item; item = item->m_next) count++; if (!count) str << " ."; else str << std::setw(6) << std::right << count << std::left; } str << std::endl; str << "------------------------------------------------------------------------" << std::endl; } // find a key and return the element pointer // zero is returned if the find fails // this is used internally where iterator usage may not be required (after profiling by DJDM) template hash_element* hash::_find_element(const K& key) const { // scan the list for this key's hash value for the element with a matching key unsigned hash_value_full = H()(key); unsigned bin = hash_value_full % m_bins; for (hash_element* current = m_values[bin]; current; current = current->m_next) { if (current->m_hash == hash_value_full && E()(current->m_value.first, key)) return current; } return 0; } //////////////////////////////////////////////////////////////////////////////// } // end namespace stlplus