00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00025 #ifndef __AbstractHashTable_h
00026 #define __AbstractHashTable_h
00027
00028 #ifdef __GNUC__
00029 #pragma interface
00030 #endif
00031
00032 #ifdef NON_ANSI
00033 #include <iostream.h>
00034 #else
00035 #include <iostream>
00036 using namespace std;
00037 #endif
00038
00039 #ifndef ANSI_HDRS
00040 #include <string.h>
00041 #include <stdio.h>
00042 #else
00043 #include <cstring>
00044 #include <cstdio>
00045 #endif
00046
00047 #include "_math.h"
00048 #include "LinkedList.h"
00049 #include "BasicArray.h"
00050 #include "hash_fn.h"
00051 #include "compare.h"
00052
00053
00054
00055
00056 extern int utilib_num_primes;
00057 extern unsigned long utilib_prime_list[];
00058
00059
00060 template <class T, class KEY>
00061 class AbstractHashTable
00062 {
00063 public:
00065 typedef size_t size_type;
00067 typedef ListItem<T*>* hash_element;
00068
00069 public:
00070
00072 explicit AbstractHashTable(const char* nameBuff = "Unnamed");
00074 explicit AbstractHashTable(size_type initial_size,
00075 const char* nameBuff = "Unnamed");
00077 virtual ~AbstractHashTable()
00078 {
00079 clear();
00080 ListItem<T*>::delete_unused();
00081 }
00082
00084 bool empty() const {return (data.len() == 0);}
00086 operator bool() const {return (data.len() != 0);}
00088 int size() const {return data.len();}
00089
00095 virtual T* find(KEY& key) const;
00096
00098 virtual T* first() const
00099 {
00100 if (data.len() == 0) return ((T*)0);
00101 return data.head()->data();
00102 }
00103
00105 virtual void add(KEY& key)
00106 {insert(&key);}
00111 virtual void add(KEY& key, T*& item)
00112 {item = insert(&key);}
00118 virtual void remove(KEY& key, bool& status)
00119 {
00120 T* tmp = find(key);
00121 if (tmp) extract(tmp,status);
00122 else status = false;
00123 }
00129 virtual void remove(T*& item, bool& status)
00130 {
00131 if (!item)
00132 status = false;
00133 else {
00134 extract(item,status);
00135 item = NULL;
00136 }
00137 }
00143 virtual void remove(T*& item, KEY& key, bool& status)
00144 {
00145 if (!item)
00146 status = false;
00147 else {
00148 key = item->key();
00149 extract(item,status);
00150 item = NULL;
00151 }
00152 }
00153
00155 virtual T* next(T* item);
00156
00158 int write(ostream& os) const;
00160 int read(istream& is);
00162 void clear();
00163
00165 void statistics(ostream& os);
00166
00168 bool& ignore_duplicates()
00169 {return ignore_duplicates_flag;}
00170
00172 bool using_prime_ht;
00174 void set_hash_fn(size_type (*hashfn)(const KEY&,size_type))
00175 {curr_hashfn = hashfn;}
00177 size_type (*curr_hashfn)(const KEY&,size_type);
00178
00179 protected:
00180
00182 virtual size_type hash(KEY& key, size_type table_size) const
00183 {
00184 if (curr_hashfn)
00185 return (*curr_hashfn)(key,table_size);
00186 else
00187 return hash_fn(key,table_size);
00188 }
00190 virtual T* insert(KEY* key);
00192 void insert(T* item, const bool resize_OK=true);
00193
00199 virtual void extract(T* item, bool& status);
00201 void resize(const int newsize=-1);
00202
00204 LinkedList<T*> data;
00206 BasicArray<ListItem<T*>*> table;
00208 BasicArray<int> bucket_size;
00209
00211 double max_load;
00213 bool ignore_duplicates_flag;
00214
00215 };
00216
00217
00218
00219 template <class T, class KEY>
00220 inline ostream& operator<<(ostream& output, const AbstractHashTable<T,KEY>&
00221 table)
00222 { table.write(output); return(output); }
00223
00224
00225
00226 template <class T, class KEY>
00227 inline istream& operator>>(istream& input, AbstractHashTable<T,KEY>& table)
00228 { table.read(input); return(input); }
00229
00230
00231
00232 template <class T, class KEY>
00233 AbstractHashTable<T,KEY>::AbstractHashTable(const char* ):
00234 using_prime_ht(true),
00235 curr_hashfn(0),
00236 table(utilib_prime_list[0]),
00237 bucket_size(utilib_prime_list[0]),
00238 max_load(3.0),
00239 ignore_duplicates_flag(true)
00240 { bucket_size=0; table = (ListItem<T*>*)NULL; }
00241
00242
00243
00244 template <class T, class KEY>
00245 AbstractHashTable<T,KEY>::AbstractHashTable(size_type initial_size,
00246 const char* ) :
00247 using_prime_ht(true),
00248 curr_hashfn(0),
00249 table(initial_size),
00250 bucket_size(initial_size),
00251 max_load(3.0),
00252 ignore_duplicates_flag(true)
00253 { bucket_size=0; table = (ListItem<T*>*)NULL; }
00254
00255
00256
00257 template <class T, class KEY>
00258 T* AbstractHashTable<T,KEY>::find(KEY& key) const
00259 {
00260 size_type index = hash(key, table.size());
00261 if (bucket_size[index] == 0)
00262 return ((T*)NULL);
00263
00264 hash_element curr = table[index];
00265
00266 int i=0;
00267 for (; curr && (i<bucket_size[index]); i++) {
00268 if (curr->data()->compare(key) == 0)
00269 break;
00270 curr = data.next(curr);
00271 }
00272
00273 if ((curr == NULL) || (i == bucket_size[index]))
00274 return NULL;
00275 return curr->data();
00276 }
00277
00278
00279
00280
00281
00282
00283
00284 template <class T, class KEY>
00285 T* AbstractHashTable<T,KEY>::next(T* item)
00286 {
00287 size_type index = hash(item->key(),table.size());
00288 if (bucket_size[index] == 0)
00289 return ((T*)NULL);
00290
00291 hash_element curr = table[index];
00292
00293 int i=0;
00294 for (; i<bucket_size[index]; i++) {
00295 if (item->compare(curr->data()->key()) == 0)
00296 break;
00297 curr = data.next(curr);
00298 }
00299
00300 curr = data.next(curr);
00301 if (curr == NULL)
00302 return NULL;
00303 return curr->data();
00304 }
00305
00306
00307
00308 template <class T, class KEY>
00309 T* AbstractHashTable<T,KEY>::insert(KEY* key)
00310 {
00311 T* item = new T(key);
00312 if (!item)
00313 ErrAbort("AbstractHashTable<T,KEY>::insert - memory error.");
00314 insert(item);
00315 return item;
00316 }
00317
00318
00319 template <class T, class KEY>
00320 void AbstractHashTable<T,KEY>::insert(T* item, const bool resize_OK)
00321 {
00322 bool resize_flag=false;
00323 size_type index;
00324 if ( !(ignore_duplicates_flag && (find(item->key()) != NULL)) ) {
00325 index = hash(item->key(),table.size());
00326 table[index] = data.insert(item, table[index]);
00327 bucket_size[index]++;
00328 }
00329 if (resize_OK == false) return;
00330 if ( (resize_flag == true) ||
00331 (max_load < (((double)data.size())/((double)table.size()))) )
00332 resize();
00333 }
00334
00335
00336
00337 template <class T, class KEY>
00338 void AbstractHashTable<T,KEY>::extract(T* item, bool& status)
00339 {
00340 hash_element curr;
00341 status = false;
00342
00343 if ((curr = data.find(item)) != NULL) {
00344 size_type index = hash(item->key(),table.size());
00345 if (curr == table[index]) {
00346 if (bucket_size[index] > 1)
00347 table[index] = data.next(table[index]);
00348 else
00349 table[index] = NULL;
00350 }
00351 data.remove(curr);
00352 bucket_size[index]--;
00353 status = true;
00354 delete item;
00355 }
00356 }
00357
00358
00359
00360 template <class T, class KEY>
00361 void AbstractHashTable<T,KEY>::statistics(ostream& os)
00362 {
00363 os << "Hash table size is " << table.size() << endl;
00364 os << "Total number of objects is " << data.size() << endl;
00365 os << "Table load is " << (((double)data.size())/table.size()) << endl;
00366 os << "Largest bucket contains " << max(bucket_size) << " objects" << endl;
00367 os << "Largest bucket is #" << argmax(bucket_size) << endl;
00368 os << "Hash table histogram:" << endl;
00369 char str[128];
00370 sprintf(str,"%10s %10s %10s %10s", "size", "buckets", "objects", "sum-pct");
00371 os << str << endl;
00372
00373 size_type buckets = table.len();
00374 int tnbuckets=0;
00375 for (int j=0; j<16; j++) {
00376 int nbuckets=0;
00377 int nobjs=0;
00378 for (size_type i=0; i < buckets; i++) {
00379 if (bucket_size[i] == j) {
00380 nbuckets++;
00381 nobjs += j;
00382 }
00383 }
00384 tnbuckets+=nobjs;
00385 sprintf(str,"%10d %10d %10d %10d", j, nbuckets, nobjs, (int)((100.0*tnbuckets)/data.len()) );
00386 os << str << endl;
00387 }
00388 {
00389 int nbuckets=0;
00390 int nobjs=0;
00391 for (size_type i=0; i < buckets; i++) {
00392 if (bucket_size[i] >= 16) {
00393 nbuckets++;
00394 nobjs += bucket_size[i];
00395 }
00396 }
00397 tnbuckets+=nobjs;
00398 sprintf(str,"%10s %10d %10d %10d", ">15", nbuckets, nobjs, (int)((100.0*tnbuckets)/data.len()) );
00399 os << str << endl;
00400 }
00401 }
00402
00403
00404
00405 template <class T, class KEY>
00406 void AbstractHashTable<T,KEY>::clear()
00407 {
00408 bucket_size = 0;
00409 while (data) {
00410 T* tmp = data.head()->data();
00411 delete tmp;
00412 data.remove(data.head());
00413 }
00414 table = (ListItem<T*>*)NULL;
00415 }
00416
00417
00418 template <class T, class KEY>
00419 void AbstractHashTable<T,KEY>::resize(const int newsize)
00420 {
00421 int tmp = newsize;
00422 bool flag=true;
00423
00424 while (flag) {
00425 if (tmp == -1) {
00426 if (using_prime_ht) {
00427 int i=0;
00428 while (i < utilib_num_primes) {
00429 if (utilib_prime_list[i] > table.size())
00430 break;
00431 i++;
00432 }
00433 if (utilib_prime_list[i] <= table.size())
00434 ErrAbort("AbstractHashTable<T,KEY>::resize -- Cannot automatically resize the hashtable any larger.");
00435 tmp = utilib_prime_list[i];
00436 }
00437 else {
00438 tmp = table.size()*2;
00439 }
00440 }
00441
00442 table.resize(tmp);
00443 table = (ListItem<T*>*)NULL;
00444 bucket_size.resize(tmp);
00445 bucket_size = 0;
00446
00447 int ndata = data.size();
00448 for (int i=0; i<ndata; i++) {
00449 T* tmp_item=NULL;
00450 data.remove(tmp_item);
00451 if (data.size() != (ndata-1))
00452 ErrAbort("Bug here - First.");
00453 insert(tmp_item,false);
00454 if (data.size() != ndata)
00455 ErrAbort("Bug here - Second.");
00456 }
00457
00458
00459
00460
00461
00462 flag = false;
00463 }
00464 }
00465
00466
00467 template <class T, class KEY>
00468 int AbstractHashTable<T,KEY>::write(ostream& os) const
00469 {
00470 os << data.size() << endl;
00471
00472 hash_element curr = data.head();
00473 while (curr) {
00474 curr->data()->write(os);
00475 os << endl;
00476 curr = data.next(curr);
00477 }
00478
00479 return OK;
00480 }
00481
00482
00483
00484
00485
00486
00487 template <class T, class KEY>
00488 int AbstractHashTable<T,KEY>::read(istream& is)
00489 {
00490 clear();
00491 size_type Size;
00492 is >> Size;
00493 for (size_type i=0; i<Size; i++) {
00494 T* item;
00495 item->read(is);
00496 insert(item);
00497 }
00498
00499 return OK;
00500 }
00501
00502
00503 #endif