stx/btree.h

Go to the documentation of this file.
00001 // $Id: btree.h 72 2008-01-25 12:47:26Z tb $
00006 /*
00007  * STX B+ Tree Template Classes v0.8.1
00008  * Copyright (C) 2008 Timo Bingmann
00009  *
00010  * This library is free software; you can redistribute it and/or modify it
00011  * under the terms of the GNU Lesser General Public License as published by the
00012  * Free Software Foundation; either version 2.1 of the License, or (at your
00013  * option) any later version.
00014  *
00015  * This library is distributed in the hope that it will be useful, but WITHOUT
00016  * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
00017  * FITNESS FOR A PARTICULAR PURPOSE.  See the GNU Lesser General Public License
00018  * for more details.
00019  *
00020  * You should have received a copy of the GNU Lesser General Public License
00021  * along with this library; if not, write to the Free Software Foundation,
00022  * Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
00023  */
00024 
00025 #ifndef _STX_BTREE_H_
00026 #define _STX_BTREE_H_
00027 
00028 // *** Required Headers from the STL
00029 
00030 #include <algorithm>
00031 #include <functional>
00032 #include <istream>
00033 #include <ostream>
00034 #include <assert.h>
00035 
00036 // *** Debugging Macros
00037 
00038 #ifdef BTREE_DEBUG
00039 
00040 #include <iostream>
00041 
00043 #define BTREE_PRINT(x)          do { if (debug) (std::cout << x); } while(0)
00044 
00046 #define BTREE_ASSERT(x)         do { assert(x); } while(0)
00047 
00048 #else
00049 
00051 #define BTREE_PRINT(x)          do { } while(0)
00052 
00054 #define BTREE_ASSERT(x)         do { } while(0)
00055 
00056 #endif
00057 
00059 #define BTREE_MAX(a,b)          ((a) < (b) ? (b) : (a))
00060 
00061 #ifndef BTREE_FRIENDS
00065 #define BTREE_FRIENDS           friend class btree_friend;
00066 #endif
00067 
00069 namespace stx {
00070 
00073 template <typename _Key>
00074 struct btree_default_set_traits
00075 {
00078     static const bool   selfverify = false;
00079 
00084     static const bool   debug = false;
00085 
00088     static const int    leafslots = BTREE_MAX( 8, 256 / (sizeof(_Key)) );
00089 
00092     static const int    innerslots = BTREE_MAX( 8, 256 / (sizeof(_Key) + sizeof(void*)) );
00093 };
00094 
00097 template <typename _Key, typename _Data>
00098 struct btree_default_map_traits
00099 {
00102     static const bool   selfverify = false;
00103 
00108     static const bool   debug = false;
00109 
00112     static const int    leafslots = BTREE_MAX( 8, 256 / (sizeof(_Key) + sizeof(_Data)) );
00113 
00116     static const int    innerslots = BTREE_MAX( 8, 256 / (sizeof(_Key) + sizeof(void*)) );
00117 };
00118 
00132 template <typename _Key, typename _Data,
00133           typename _Value = std::pair<_Key, _Data>,
00134           typename _Compare = std::less<_Key>,
00135           typename _Traits = btree_default_map_traits<_Key, _Data>,
00136           bool _Duplicates = false>
00137 class btree
00138 {
00139 public:
00140     // *** Template Parameter Types
00141 
00144     typedef _Key                        key_type;
00145 
00148     typedef _Data                       data_type;
00149 
00154     typedef _Value                      value_type;
00155 
00157     typedef _Compare                    key_compare;
00158 
00161     typedef _Traits                     traits;
00162 
00165     static const bool                   allow_duplicates = _Duplicates;
00166 
00167     // The macro BTREE_FRIENDS can be used by outside class to access the B+
00168     // tree internals. This was added for wxBTreeDemo to be able to draw the
00169     // tree.
00170     BTREE_FRIENDS
00171 
00172 public:
00173     // *** Constructed Types
00174 
00176     typedef btree<key_type, data_type, value_type,
00177                   key_compare, traits, allow_duplicates>        btree_self;
00178 
00180     typedef size_t                              size_type;
00181 
00183     typedef std::pair<key_type, data_type>      pair_type;
00184 
00185 public:
00186     // *** Static Constant Options and Values of the B+ Tree
00187 
00189     static const unsigned short         leafslotmax =  traits::leafslots;
00190 
00193     static const unsigned short         innerslotmax =  traits::innerslots;
00194 
00198     static const unsigned short minleafslots = (leafslotmax / 2);
00199 
00203     static const unsigned short mininnerslots = (innerslotmax / 2);
00204 
00207     static const bool                   selfverify = traits::selfverify;
00208 
00212     static const bool                   debug = traits::debug;
00213 
00214 private:
00215     // *** Node Classes for In-Memory Nodes
00216 
00219     struct node
00220     {
00222         unsigned short  level;
00223 
00226         unsigned short  slotuse;
00227 
00229         inline void initialize(const unsigned short l)
00230         {
00231             level = l;
00232             slotuse = 0;
00233         }
00234 
00236         inline bool isleafnode() const
00237         {
00238             return (level == 0);
00239         }
00240     };
00241 
00244     struct inner_node : public node
00245     {
00247         key_type        slotkey[innerslotmax];
00248 
00250         node*           childid[innerslotmax+1];
00251 
00253         inline void initialize(const unsigned short l)
00254         {
00255             node::initialize(l);
00256         }
00257 
00259         inline bool isfull() const
00260         {
00261             return (node::slotuse == innerslotmax);
00262         }
00263 
00265         inline bool isfew() const
00266         {
00267             return (node::slotuse <= mininnerslots);
00268         }
00269 
00271         inline bool isunderflow() const
00272         {
00273             return (node::slotuse < mininnerslots);
00274         }
00275     };
00276 
00280     struct leaf_node : public node
00281     {
00283         leaf_node       *prevleaf;
00284 
00286         leaf_node       *nextleaf;
00287 
00289         key_type        slotkey[leafslotmax];
00290 
00292         data_type       slotdata[leafslotmax];
00293 
00295         inline void initialize()
00296         {
00297             node::initialize(0);
00298             prevleaf = nextleaf = NULL;
00299         }
00300 
00302         inline bool isfull() const
00303         {
00304             return (node::slotuse == leafslotmax);
00305         }
00306 
00308         inline bool isfew() const
00309         {
00310             return (node::slotuse <= minleafslots);
00311         }
00312 
00314         inline bool isunderflow() const
00315         {
00316             return (node::slotuse < minleafslots);
00317         }
00318     };
00319 
00320 private:
00321     // *** Template Magic to Convert a pair or key/data types to a value_type
00322 
00325     template <typename value_type, typename pair_type>
00326     struct btree_pair_to_value
00327     {
00329         inline value_type operator()(pair_type& p) const {
00330             return p.first;
00331         }
00333         inline value_type operator()(const pair_type& p) const {
00334             return p.first;
00335         }
00336     };
00337 
00339     template <typename value_type>
00340     struct btree_pair_to_value<value_type, value_type>
00341     {
00343         inline value_type operator()(pair_type& p) const {
00344             return p;
00345         }
00347         inline value_type operator()(const pair_type& p) const {
00348             return p;
00349         }
00350     };
00351 
00354     typedef btree_pair_to_value<value_type, pair_type> pair_to_value_type;
00355 
00356 public:
00357     // *** Iterators and Reverse Iterators
00358 
00359     class iterator;
00360     class const_iterator;
00361 
00364     class iterator
00365     {
00366     public:
00367         // *** Types
00368 
00370         typedef typename btree::key_type                key_type;
00371 
00373         typedef typename btree::data_type               data_type;
00374 
00376         typedef typename btree::value_type              value_type;
00377 
00379         typedef typename btree::pair_type               pair_type;
00380 
00382         typedef value_type&             reference;
00383 
00385         typedef value_type*             pointer;
00386 
00388         typedef std::bidirectional_iterator_tag iterator_category;
00389 
00391         typedef ptrdiff_t               difference_type;
00392 
00394         typedef iterator                self;
00395 
00396     private:
00397         // *** Members
00398 
00400         typename btree::leaf_node*      currnode;
00401 
00403         unsigned short  currslot;
00404 
00406         friend class btree<key_type, data_type, value_type, key_compare, traits, allow_duplicates>::const_iterator;
00407 
00410         mutable value_type              temp_value;
00411 
00412         // The macro BTREE_FRIENDS can be used by outside class to access the B+
00413         // tree internals. This was added for wxBTreeDemo to be able to draw the
00414         // tree.
00415         BTREE_FRIENDS
00416 
00417     public:
00418         // *** Methods
00419 
00421         inline iterator(typename btree::leaf_node *l, unsigned short s)
00422             : currnode(l), currslot(s)
00423         { }
00424 
00427         inline reference operator*() const
00428         {
00429             temp_value = pair_to_value_type()( pair_type(currnode->slotkey[currslot],
00430                                                          currnode->slotdata[currslot]) );
00431             return temp_value;
00432         }
00433 
00437         inline pointer operator->() const
00438         {
00439             temp_value = pair_to_value_type()( pair_type(currnode->slotkey[currslot],
00440                                                          currnode->slotdata[currslot]) );
00441             return &temp_value;
00442         }
00443 
00445         inline const key_type& key() const
00446         {
00447             return currnode->slotkey[currslot];
00448         }
00449 
00451         inline data_type& data() const
00452         {
00453             return currnode->slotdata[currslot];
00454         }
00455 
00457         inline self& operator++()
00458         {
00459             if (currslot + 1 < currnode->slotuse) {
00460                 ++currslot;
00461             }
00462             else if (currnode->nextleaf != NULL) {
00463                 currnode = currnode->nextleaf;
00464                 currslot = 0;
00465             }
00466             else {
00467                 // this is end()
00468                 currslot = currnode->slotuse;
00469             }
00470 
00471             return *this;
00472         }
00473 
00475         inline self operator++(int)
00476         {
00477             self tmp = *this;   // copy ourselves
00478 
00479             if (currslot + 1 < currnode->slotuse) {
00480                 ++currslot;
00481             }
00482             else if (currnode->nextleaf != NULL) {
00483                 currnode = currnode->nextleaf;
00484                 currslot = 0;
00485             }
00486             else {
00487                 // this is end()
00488                 currslot = currnode->slotuse;
00489             }
00490 
00491             return tmp;
00492         }
00493 
00495         inline self& operator--()
00496         {
00497             if (currslot > 0) {
00498                 --currslot;
00499             }
00500             else if (currnode->prevleaf != NULL) {
00501                 currnode = currnode->prevleaf;
00502                 currslot = currnode->slotuse - 1;
00503             }
00504             else {
00505                 // this is begin()
00506                 currslot = 0;
00507             }
00508 
00509             return *this;
00510         }
00511 
00513         inline self operator--(int)
00514         {
00515             self tmp = *this;   // copy ourselves
00516 
00517             if (currslot > 0) {
00518                 --currslot;
00519             }
00520             else if (currnode->prevleaf != NULL) {
00521                 currnode = currnode->prevleaf;
00522                 currslot = currnode->slotuse - 1;
00523             }
00524             else {
00525                 // this is begin()
00526                 currslot = 0;
00527             }
00528 
00529             return tmp;
00530         }
00531 
00533         inline bool operator==(const self& x) const
00534         {
00535             return (x.currnode == currnode) && (x.currslot == currslot);
00536         }
00537 
00539         inline bool operator!=(const self& x) const
00540         {
00541             return (x.currnode != currnode) || (x.currslot != currslot);
00542         }
00543     };
00544 
00547     class const_iterator
00548     {
00549     public:
00550         // *** Types
00551 
00553         typedef typename btree::key_type                key_type;
00554 
00556         typedef typename btree::data_type               data_type;
00557 
00559         typedef typename btree::value_type              value_type;
00560 
00562         typedef typename btree::pair_type               pair_type;
00563 
00565         typedef const value_type&       reference;
00566 
00568         typedef const value_type*       pointer;
00569 
00571         typedef std::bidirectional_iterator_tag iterator_category;
00572 
00574         typedef ptrdiff_t       difference_type;
00575 
00577         typedef const_iterator          self;
00578 
00579     private:
00580         // *** Members
00581 
00583         const typename btree::leaf_node*        currnode;
00584 
00586         unsigned short  currslot;
00587 
00590         mutable value_type              temp_value;
00591 
00592         // The macro BTREE_FRIENDS can be used by outside class to access the B+
00593         // tree internals. This was added for wxBTreeDemo to be able to draw the
00594         // tree.
00595         BTREE_FRIENDS
00596 
00597     public:
00598         // *** Methods
00599 
00601         inline const_iterator(const typename btree::leaf_node *l, unsigned short s)
00602             : currnode(l), currslot(s)
00603         { }
00604 
00606         inline const_iterator(const iterator &it)
00607             : currnode(it.currnode), currslot(it.currslot)
00608         { }
00609 
00613         inline reference operator*() const
00614         {
00615             temp_value = pair_to_value_type()( pair_type(currnode->slotkey[currslot],
00616                                                          currnode->slotdata[currslot]) );
00617             return temp_value;
00618         }
00619 
00623         inline pointer operator->() const
00624         {
00625             temp_value = pair_to_value_type()( pair_type(currnode->slotkey[currslot],
00626                                                          currnode->slotdata[currslot]) );
00627             return &temp_value;
00628         }
00629 
00631         inline const key_type& key() const
00632         {
00633             return currnode->slotkey[currslot];
00634         }
00635 
00637         inline const data_type& data() const
00638         {
00639             return currnode->slotdata[currslot];
00640         }
00641 
00643         inline self& operator++()
00644         {
00645             if (currslot + 1 < currnode->slotuse) {
00646                 ++currslot;
00647             }
00648             else if (currnode->nextleaf != NULL) {
00649                 currnode = currnode->nextleaf;
00650                 currslot = 0;
00651             }
00652             else {
00653                 // this is end()
00654                 currslot = currnode->slotuse;
00655             }
00656 
00657             return *this;
00658         }
00659 
00661         inline self operator++(int)
00662         {
00663             self tmp = *this;   // copy ourselves
00664 
00665             if (currslot + 1 < currnode->slotuse) {
00666                 ++currslot;
00667             }
00668             else if (currnode->nextleaf != NULL) {
00669                 currnode = currnode->nextleaf;
00670                 currslot = 0;
00671             }
00672             else {
00673                 // this is end()
00674                 currslot = currnode->slotuse;
00675             }
00676 
00677             return tmp;
00678         }
00679 
00681         inline self& operator--()
00682         {
00683             if (currslot > 0) {
00684                 --currslot;
00685             }
00686             else if (currnode->prevleaf != NULL) {
00687                 currnode = currnode->prevleaf;
00688                 currslot = currnode->slotuse - 1;
00689             }
00690             else {
00691                 // this is begin()
00692                 currslot = 0;
00693             }
00694 
00695             return *this;
00696         }
00697 
00699         inline self operator--(int)
00700         {
00701             self tmp = *this;   // copy ourselves
00702 
00703             if (currslot > 0) {
00704                 --currslot;
00705             }
00706             else if (currnode->prevleaf != NULL) {
00707                 currnode = currnode->prevleaf;
00708                 currslot = currnode->slotuse - 1;
00709             }
00710             else {
00711                 // this is begin()
00712                 currslot = 0;
00713             }
00714 
00715             return tmp;
00716         }
00717 
00719         inline bool operator==(const self& x) const
00720         {
00721             return (x.currnode == currnode) && (x.currslot == currslot);
00722         }
00723 
00725         inline bool operator!=(const self& x) const
00726         {
00727             return (x.currnode != currnode) || (x.currslot != currslot);
00728         }
00729     };
00730 
00732     typedef std::reverse_iterator<iterator>       reverse_iterator;
00733 
00735     typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
00736 
00737 public:
00738     // *** Small Statistics Structure
00739 
00742     struct tree_stats
00743     {
00745         size_type       itemcount;
00746 
00748         size_type       leaves;
00749 
00751         size_type       innernodes;
00752 
00754         static const unsigned short     leafslots = btree_self::leafslotmax;
00755 
00757         static const unsigned short     innerslots = btree_self::innerslotmax;
00758 
00760         inline tree_stats()
00761             : itemcount(0),
00762               leaves(0), innernodes(0)
00763         {
00764         }
00765 
00767         inline size_type nodes() const
00768         {
00769             return innernodes + leaves;
00770         }
00771 
00773         inline double avgfill_leaves() const
00774         {
00775             return static_cast<double>(itemcount) / (leaves * leafslots);
00776         }
00777     };
00778 
00779 private:
00780     // *** Tree Object Data Members
00781 
00783     node*       root;
00784 
00786     leaf_node   *headleaf;
00787 
00789     leaf_node   *tailleaf;
00790 
00792     tree_stats  stats;
00793 
00796     key_compare key_less;
00797 
00798 public:
00799     // *** Constructors and Destructor
00800 
00803     inline btree()
00804         : root(NULL), headleaf(NULL), tailleaf(NULL)
00805     {
00806     }
00807 
00810     inline btree(const key_compare &kcf)
00811         : root(NULL), headleaf(NULL), tailleaf(NULL),
00812           key_less(kcf)
00813     {
00814     }
00815 
00817     template <class InputIterator>
00818     inline btree(InputIterator first, InputIterator last)
00819         : root(NULL), headleaf(NULL), tailleaf(NULL)
00820     {
00821         insert(first, last);
00822     }
00823 
00826     template <class InputIterator>
00827     inline btree(InputIterator first, InputIterator last, const key_compare &kcf)
00828         : root(NULL), headleaf(NULL), tailleaf(NULL),
00829           key_less(kcf)
00830     {
00831         insert(first, last);
00832     }
00833 
00835     inline ~btree()
00836     {
00837         clear();
00838     }
00839 
00841     void swap(btree_self& from)
00842     {
00843         std::swap(root, from.root);
00844         std::swap(headleaf, from.headleaf);
00845         std::swap(tailleaf, from.tailleaf);
00846         std::swap(stats, from.stats);
00847         std::swap(key_less, from.key_less);
00848     }
00849 
00850 public:
00851     // *** Key and Value Comparison Function Objects
00852 
00854     class value_compare
00855     {
00856     protected:
00858         key_compare     key_comp;
00859 
00861         inline value_compare(key_compare kc)
00862             : key_comp(kc)
00863         { }
00864 
00866         friend class btree<key_type, data_type, value_type, key_compare, traits, allow_duplicates>;
00867 
00868     public:
00870         inline bool operator()(const value_type& x, const value_type& y) const
00871         {
00872             return key_comp(x.first, y.first);
00873         }
00874     };
00875 
00877     inline key_compare key_comp() const
00878     {
00879         return key_less;
00880     }
00881 
00884     inline value_compare value_comp() const
00885     {
00886         return value_compare(key_less);
00887     }
00888 
00889 private:
00890     // *** Convenient Key Comparison Functions Generated From key_less
00891 
00893     inline bool key_lessequal(const key_type &a, const key_type b) const
00894     {
00895         return !key_less(b, a);
00896     }
00897 
00899     inline bool key_greater(const key_type &a, const key_type &b) const
00900     {
00901         return key_less(b, a);
00902     }
00903 
00905     inline bool key_greaterequal(const key_type &a, const key_type b) const
00906     {
00907         return !key_less(a, b);
00908     }
00909 
00912     inline bool key_equal(const key_type &a, const key_type &b) const
00913     {
00914         return !key_less(a, b) && !key_less(b, a);
00915     }
00916 
00917 private:
00918     // *** Node Object Allocation and Deallocation Functions
00919 
00921     inline leaf_node* allocate_leaf()
00922     {
00923         leaf_node* n = new leaf_node;
00924         n->initialize();
00925         stats.leaves++;
00926         return n;
00927     }
00928 
00930     inline inner_node* allocate_inner(unsigned short l)
00931     {
00932         inner_node* n = new inner_node;
00933         n->initialize(l);
00934         stats.innernodes++;
00935         return n;
00936     }
00937 
00940     inline void free_node(node *n)
00941     {
00942         if (n->isleafnode()) {
00943             delete static_cast<leaf_node*>(n);
00944             stats.leaves--;
00945         }
00946         else {
00947             delete static_cast<inner_node*>(n);
00948             stats.innernodes--;
00949         }
00950     }
00951 
00952 public:
00953     // *** Fast Destruction of the B+ Tree
00954 
00956     void clear()
00957     {
00958         if (root)
00959         {
00960             clear_recursive(root);
00961             free_node(root);
00962 
00963             root = NULL;
00964             headleaf = tailleaf = NULL;
00965 
00966             stats = tree_stats();
00967         }
00968 
00969         BTREE_ASSERT(stats.itemcount == 0);
00970     }
00971 
00972 private:
00974     void clear_recursive(node *n)
00975     {
00976         if (n->isleafnode())
00977         {
00978             leaf_node *leafnode = static_cast<leaf_node*>(n);
00979 
00980             for (unsigned int slot = 0; slot < leafnode->slotuse; ++slot)
00981             {
00982                 // data objects are deleted by leaf_node's destructor
00983             }
00984         }
00985         else
00986         {
00987             inner_node *innernode = static_cast<inner_node*>(n);
00988 
00989             for (unsigned short slot = 0; slot < innernode->slotuse + 1; ++slot)
00990             {
00991                 clear_recursive(innernode->childid[slot]);
00992                 free_node(innernode->childid[slot]);
00993             }
00994         }
00995     }
00996 
00997 public:
00998     // *** STL Iterator Construction Functions
00999 
01002     inline iterator begin()
01003     {
01004         return iterator(headleaf, 0);
01005     }
01006 
01009     inline iterator end()
01010     {
01011         return iterator(tailleaf, tailleaf ? tailleaf->slotuse : 0);
01012     }
01013 
01016     inline const_iterator begin() const
01017     {
01018         return const_iterator(headleaf, 0);
01019     }
01020 
01023     inline const_iterator end() const
01024     {
01025         return const_iterator(tailleaf, tailleaf ? tailleaf->slotuse : 0);
01026     }
01027 
01030     inline reverse_iterator rbegin()
01031     {
01032         return reverse_iterator(end());
01033     }
01034 
01037     inline reverse_iterator rend()
01038     {
01039         return reverse_iterator(begin());
01040     }
01041 
01044     inline const_reverse_iterator rbegin() const
01045