codecvt_specializations.h

Go to the documentation of this file.
00001 // Locale support (codecvt) -*- C++ -*-
00002 
00003 // Copyright (C) 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2009
00004 //  Free Software Foundation, Inc.
00005 //
00006 // This file is part of the GNU ISO C++ Library.  This library is free
00007 // software; you can redistribute it and/or modify it under the
00008 // terms of the GNU General Public License as published by the
00009 // Free Software Foundation; either version 3, or (at your option)
00010 // any later version.
00011 
00012 // This library is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public License for more details.
00016 
00017 // Under Section 7 of GPL version 3, you are granted additional
00018 // permissions described in the GCC Runtime Library Exception, version
00019 // 3.1, as published by the Free Software Foundation.
00020 
00021 // You should have received a copy of the GNU General Public License and
00022 // a copy of the GCC Runtime Library Exception along with this program;
00023 // see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
00024 // <http://www.gnu.org/licenses/>.
00025 
00026 //
00027 // ISO C++ 14882: 22.2.1.5 Template class codecvt
00028 //
00029 
00030 // Written by Benjamin Kosnik <bkoz@redhat.com>
00031 
00032 /** @file ext/codecvt_specializations.h
00033  *  This file is a GNU extension to the Standard C++ Library.
00034  */
00035 
00036 #ifndef _EXT_CODECVT_SPECIALIZATIONS_H
00037 #define _EXT_CODECVT_SPECIALIZATIONS_H 1
00038 
00039 #include <bits/c++config.h>
00040 #include <locale>
00041 #include <iconv.h>
00042 
00043 _GLIBCXX_BEGIN_NAMESPACE(__gnu_cxx)
00044 
00045   /// Extension to use iconv for dealing with character encodings.
00046   // This includes conversions and comparisons between various character
00047   // sets.  This object encapsulates data that may need to be shared between
00048   // char_traits, codecvt and ctype.
00049   class encoding_state
00050   {
00051   public:
00052     // Types: 
00053     // NB: A conversion descriptor subsumes and enhances the
00054     // functionality of a simple state type such as mbstate_t.
00055     typedef iconv_t descriptor_type;
00056     
00057   protected:
00058     // Name of internal character set encoding.
00059     std::string         _M_int_enc;
00060 
00061     // Name of external character set encoding.
00062     std::string     _M_ext_enc;
00063 
00064     // Conversion descriptor between external encoding to internal encoding.
00065     descriptor_type _M_in_desc;
00066 
00067     // Conversion descriptor between internal encoding to external encoding.
00068     descriptor_type _M_out_desc;
00069 
00070     // The byte-order marker for the external encoding, if necessary.
00071     int         _M_ext_bom;
00072 
00073     // The byte-order marker for the internal encoding, if necessary.
00074     int         _M_int_bom;
00075 
00076     // Number of external bytes needed to construct one complete
00077     // character in the internal encoding.
00078     // NB: -1 indicates variable, or stateful, encodings.
00079     int         _M_bytes;
00080 
00081   public:
00082     explicit 
00083     encoding_state() 
00084     : _M_in_desc(0), _M_out_desc(0), _M_ext_bom(0), _M_int_bom(0), _M_bytes(0)
00085     { }
00086 
00087     explicit 
00088     encoding_state(const char* __int, const char* __ext, 
00089            int __ibom = 0, int __ebom = 0, int __bytes = 1)
00090     : _M_int_enc(__int), _M_ext_enc(__ext), _M_in_desc(0), _M_out_desc(0), 
00091       _M_ext_bom(__ebom), _M_int_bom(__ibom), _M_bytes(__bytes)
00092     { init(); }
00093 
00094     // 21.1.2 traits typedefs
00095     // p4
00096     // typedef STATE_T state_type
00097     // requires: state_type shall meet the requirements of
00098     // CopyConstructible types (20.1.3)
00099     // NB: This does not preserve the actual state of the conversion
00100     // descriptor member, but it does duplicate the encoding
00101     // information.
00102     encoding_state(const encoding_state& __obj) : _M_in_desc(0), _M_out_desc(0)
00103     { construct(__obj); }
00104 
00105     // Need assignment operator as well.
00106     encoding_state&
00107     operator=(const encoding_state& __obj)
00108     {
00109       construct(__obj);
00110       return *this;
00111     }
00112 
00113     ~encoding_state()
00114     { destroy(); } 
00115 
00116     bool
00117     good() const throw()
00118     { 
00119       const descriptor_type __err = (iconv_t)(-1);
00120       bool __test = _M_in_desc && _M_in_desc != __err; 
00121       __test &=  _M_out_desc && _M_out_desc != __err;
00122       return __test;
00123     }
00124     
00125     int
00126     character_ratio() const
00127     { return _M_bytes; }
00128 
00129     const std::string
00130     internal_encoding() const
00131     { return _M_int_enc; }
00132 
00133     int 
00134     internal_bom() const
00135     { return _M_int_bom; }
00136 
00137     const std::string
00138     external_encoding() const
00139     { return _M_ext_enc; }
00140 
00141     int 
00142     external_bom() const
00143     { return _M_ext_bom; }
00144 
00145     const descriptor_type&
00146     in_descriptor() const
00147     { return _M_in_desc; }
00148 
00149     const descriptor_type&
00150     out_descriptor() const
00151     { return _M_out_desc; }
00152 
00153   protected:
00154     void
00155     init()
00156     {
00157       const descriptor_type __err = (iconv_t)(-1);
00158       const bool __have_encodings = _M_int_enc.size() && _M_ext_enc.size();
00159       if (!_M_in_desc && __have_encodings)
00160     {
00161       _M_in_desc = iconv_open(_M_int_enc.c_str(), _M_ext_enc.c_str());
00162       if (_M_in_desc == __err)
00163         std::__throw_runtime_error(__N("encoding_state::_M_init "
00164                     "creating iconv input descriptor failed"));
00165     }
00166       if (!_M_out_desc && __have_encodings)
00167     {
00168       _M_out_desc = iconv_open(_M_ext_enc.c_str(), _M_int_enc.c_str());
00169       if (_M_out_desc == __err)
00170         std::__throw_runtime_error(__N("encoding_state::_M_init "
00171                   "creating iconv output descriptor failed"));
00172     }
00173     }
00174 
00175     void
00176     construct(const encoding_state& __obj)
00177     {
00178       destroy();
00179       _M_int_enc = __obj._M_int_enc;
00180       _M_ext_enc = __obj._M_ext_enc;
00181       _M_ext_bom = __obj._M_ext_bom;
00182       _M_int_bom = __obj._M_int_bom;
00183       _M_bytes = __obj._M_bytes;
00184       init();
00185     }
00186 
00187     void
00188     destroy() throw()
00189     {
00190       const descriptor_type __err = (iconv_t)(-1);
00191       if (_M_in_desc && _M_in_desc != __err) 
00192     {
00193       iconv_close(_M_in_desc);
00194       _M_in_desc = 0;
00195     }
00196       if (_M_out_desc && _M_out_desc != __err) 
00197     {
00198       iconv_close(_M_out_desc);
00199       _M_out_desc = 0;
00200     }
00201     }
00202   };
00203 
00204   /// encoding_char_traits
00205   // Custom traits type with encoding_state for the state type, and the
00206   // associated fpos<encoding_state> for the position type, all other
00207   // bits equivalent to the required char_traits instantiations.
00208   template<typename _CharT>
00209     struct encoding_char_traits : public std::char_traits<_CharT>
00210     {
00211       typedef encoding_state                state_type;
00212       typedef typename std::fpos<state_type>        pos_type;
00213     };
00214 
00215 _GLIBCXX_END_NAMESPACE
00216 
00217 
00218 _GLIBCXX_BEGIN_NAMESPACE(std)
00219 
00220   using __gnu_cxx::encoding_state;
00221 
00222   /// codecvt<InternT, _ExternT, encoding_state> specialization.
00223   // This partial specialization takes advantage of iconv to provide
00224   // code conversions between a large number of character encodings.
00225   template<typename _InternT, typename _ExternT>
00226     class codecvt<_InternT, _ExternT, encoding_state>
00227     : public __codecvt_abstract_base<_InternT, _ExternT, encoding_state>
00228     {
00229     public:      
00230       // Types:
00231       typedef codecvt_base::result          result;
00232       typedef _InternT                  intern_type;
00233       typedef _ExternT                  extern_type;
00234       typedef __gnu_cxx::encoding_state         state_type;
00235       typedef state_type::descriptor_type       descriptor_type;
00236 
00237       // Data Members:
00238       static locale::id         id;
00239 
00240       explicit 
00241       codecvt(size_t __refs = 0)
00242       : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
00243       { }
00244 
00245       explicit 
00246       codecvt(state_type& __enc, size_t __refs = 0)
00247       : __codecvt_abstract_base<intern_type, extern_type, state_type>(__refs)
00248       { }
00249 
00250      protected:
00251       virtual 
00252       ~codecvt() { }
00253 
00254       virtual result
00255       do_out(state_type& __state, const intern_type* __from, 
00256          const intern_type* __from_end, const intern_type*& __from_next,
00257          extern_type* __to, extern_type* __to_end,
00258          extern_type*& __to_next) const;
00259 
00260       virtual result
00261       do_unshift(state_type& __state, extern_type* __to, 
00262          extern_type* __to_end, extern_type*& __to_next) const;
00263 
00264       virtual result
00265       do_in(state_type& __state, const extern_type* __from, 
00266         const extern_type* __from_end, const extern_type*& __from_next,
00267         intern_type* __to, intern_type* __to_end, 
00268         intern_type*& __to_next) const;
00269 
00270       virtual int 
00271       do_encoding() const throw();
00272 
00273       virtual bool 
00274       do_always_noconv() const throw();
00275 
00276       virtual int 
00277       do_length(state_type&, const extern_type* __from, 
00278         const extern_type* __end, size_t __max) const;
00279 
00280       virtual int 
00281       do_max_length() const throw();
00282     };
00283 
00284   template<typename _InternT, typename _ExternT>
00285     locale::id 
00286     codecvt<_InternT, _ExternT, encoding_state>::id;
00287 
00288   // This adaptor works around the signature problems of the second
00289   // argument to iconv():  SUSv2 and others use 'const char**', but glibc 2.2
00290   // uses 'char**', which matches the POSIX 1003.1-2001 standard.
00291   // Using this adaptor, g++ will do the work for us.
00292   template<typename _Tp>
00293     inline size_t
00294     __iconv_adaptor(size_t(*__func)(iconv_t, _Tp, size_t*, char**, size_t*),
00295                     iconv_t __cd, char** __inbuf, size_t* __inbytes,
00296                     char** __outbuf, size_t* __outbytes)
00297     { return __func(__cd, (_Tp)__inbuf, __inbytes, __outbuf, __outbytes); }
00298 
00299   template<typename _InternT, typename _ExternT>
00300     codecvt_base::result
00301     codecvt<_InternT, _ExternT, encoding_state>::
00302     do_out(state_type& __state, const intern_type* __from, 
00303        const intern_type* __from_end, const intern_type*& __from_next,
00304        extern_type* __to, extern_type* __to_end,
00305        extern_type*& __to_next) const
00306     {
00307       result __ret = codecvt_base::error;
00308       if (__state.good())
00309     {
00310       const descriptor_type& __desc = __state.out_descriptor();
00311       const size_t __fmultiple = sizeof(intern_type);
00312       size_t __fbytes = __fmultiple * (__from_end - __from);
00313       const size_t __tmultiple = sizeof(extern_type);
00314       size_t __tbytes = __tmultiple * (__to_end - __to); 
00315       
00316       // Argument list for iconv specifies a byte sequence. Thus,
00317       // all to/from arrays must be brutally casted to char*.
00318       char* __cto = reinterpret_cast<char*>(__to);
00319       char* __cfrom;
00320       size_t __conv;
00321 
00322       // Some encodings need a byte order marker as the first item
00323       // in the byte stream, to designate endian-ness. The default
00324       // value for the byte order marker is NULL, so if this is
00325       // the case, it's not necessary and we can just go on our
00326       // merry way.
00327       int __int_bom = __state.internal_bom();
00328       if (__int_bom)
00329         {     
00330           size_t __size = __from_end - __from;
00331           intern_type* __cfixed = static_cast<intern_type*>
00332         (__builtin_alloca(sizeof(intern_type) * (__size + 1)));
00333           __cfixed[0] = static_cast<intern_type>(__int_bom);
00334           char_traits<intern_type>::copy(__cfixed + 1, __from, __size);
00335           __cfrom = reinterpret_cast<char*>(__cfixed);
00336           __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
00337                                         &__fbytes, &__cto, &__tbytes); 
00338         }
00339       else
00340         {
00341           intern_type* __cfixed = const_cast<intern_type*>(__from);
00342           __cfrom = reinterpret_cast<char*>(__cfixed);
00343           __conv = __iconv_adaptor(iconv, __desc, &__cfrom, &__fbytes, 
00344                        &__cto, &__tbytes); 
00345         }
00346 
00347       if (__conv != size_t(-1))
00348         {
00349           __from_next = reinterpret_cast<const intern_type*>(__cfrom);
00350           __to_next = reinterpret_cast<extern_type*>(__cto);
00351           __ret = codecvt_base::ok;
00352         }
00353       else 
00354         {
00355           if (__fbytes < __fmultiple * (__from_end - __from))
00356         {
00357           __from_next = reinterpret_cast<const intern_type*>(__cfrom);
00358           __to_next = reinterpret_cast<extern_type*>(__cto);
00359           __ret = codecvt_base::partial;
00360         }
00361           else
00362         __ret = codecvt_base::error;
00363         }
00364     }
00365       return __ret; 
00366     }
00367 
00368   template<typename _InternT, typename _ExternT>
00369     codecvt_base::result
00370     codecvt<_InternT, _ExternT, encoding_state>::
00371     do_unshift(state_type& __state, extern_type* __to, 
00372            extern_type* __to_end, extern_type*& __to_next) const
00373     {
00374       result __ret = codecvt_base::error;
00375       if (__state.good())
00376     {
00377       const descriptor_type& __desc = __state.in_descriptor();
00378       const size_t __tmultiple = sizeof(intern_type);
00379       size_t __tlen = __tmultiple * (__to_end - __to); 
00380       
00381       // Argument list for iconv specifies a byte sequence. Thus,
00382       // all to/from arrays must be brutally casted to char*.
00383       char* __cto = reinterpret_cast<char*>(__to);
00384       size_t __conv = __iconv_adaptor(iconv,__desc, NULL, NULL,
00385                                           &__cto, &__tlen); 
00386       
00387       if (__conv != size_t(-1))
00388         {
00389           __to_next = reinterpret_cast<extern_type*>(__cto);
00390           if (__tlen == __tmultiple * (__to_end - __to))
00391         __ret = codecvt_base::noconv;
00392           else if (__tlen == 0)
00393         __ret = codecvt_base::ok;
00394           else
00395         __ret = codecvt_base::partial;
00396         }
00397       else 
00398         __ret = codecvt_base::error;
00399     }
00400       return __ret; 
00401     }
00402    
00403   template<typename _InternT, typename _ExternT>
00404     codecvt_base::result
00405     codecvt<_InternT, _ExternT, encoding_state>::
00406     do_in(state_type& __state, const extern_type* __from, 
00407       const extern_type* __from_end, const extern_type*& __from_next,
00408       intern_type* __to, intern_type* __to_end, 
00409       intern_type*& __to_next) const
00410     { 
00411       result __ret = codecvt_base::error;
00412       if (__state.good())
00413     {
00414       const descriptor_type& __desc = __state.in_descriptor();
00415       const size_t __fmultiple = sizeof(extern_type);
00416       size_t __flen = __fmultiple * (__from_end - __from);
00417       const size_t __tmultiple = sizeof(intern_type);
00418       size_t __tlen = __tmultiple * (__to_end - __to); 
00419       
00420       // Argument list for iconv specifies a byte sequence. Thus,
00421       // all to/from arrays must be brutally casted to char*.
00422       char* __cto = reinterpret_cast<char*>(__to);
00423       char* __cfrom;
00424       size_t __conv;
00425 
00426       // Some encodings need a byte order marker as the first item
00427       // in the byte stream, to designate endian-ness. The default
00428       // value for the byte order marker is NULL, so if this is
00429       // the case, it's not necessary and we can just go on our
00430       // merry way.
00431       int __ext_bom = __state.external_bom();
00432       if (__ext_bom)
00433         {     
00434           size_t __size = __from_end - __from;
00435           extern_type* __cfixed =  static_cast<extern_type*>
00436         (__builtin_alloca(sizeof(extern_type) * (__size + 1)));
00437           __cfixed[0] = static_cast<extern_type>(__ext_bom);
00438           char_traits<extern_type>::copy(__cfixed + 1, __from, __size);
00439           __cfrom = reinterpret_cast<char*>(__cfixed);
00440           __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
00441                                        &__flen, &__cto, &__tlen); 
00442         }
00443       else
00444         {
00445           extern_type* __cfixed = const_cast<extern_type*>(__from);
00446           __cfrom = reinterpret_cast<char*>(__cfixed);
00447           __conv = __iconv_adaptor(iconv, __desc, &__cfrom,
00448                                        &__flen, &__cto, &__tlen); 
00449         }
00450 
00451       
00452       if (__conv != size_t(-1))
00453         {
00454           __from_next = reinterpret_cast<const extern_type*>(__cfrom);
00455           __to_next = reinterpret_cast<intern_type*>(__cto);
00456           __ret = codecvt_base::ok;
00457         }
00458       else 
00459         {
00460           if (__flen < static_cast<size_t>(__from_end - __from))
00461         {
00462           __from_next = reinterpret_cast<const extern_type*>(__cfrom);
00463           __to_next = reinterpret_cast<intern_type*>(__cto);
00464           __ret = codecvt_base::partial;
00465         }
00466           else
00467         __ret = codecvt_base::error;
00468         }
00469     }
00470       return __ret; 
00471     }
00472   
00473   template<typename _InternT, typename _ExternT>
00474     int 
00475     codecvt<_InternT, _ExternT, encoding_state>::
00476     do_encoding() const throw()
00477     {
00478       int __ret = 0;
00479       if (sizeof(_ExternT) <= sizeof(_InternT))
00480     __ret = sizeof(_InternT) / sizeof(_ExternT);
00481       return __ret; 
00482     }
00483   
00484   template<typename _InternT, typename _ExternT>
00485     bool 
00486     codecvt<_InternT, _ExternT, encoding_state>::
00487     do_always_noconv() const throw()
00488     { return false; }
00489   
00490   template<typename _InternT, typename _ExternT>
00491     int 
00492     codecvt<_InternT, _ExternT, encoding_state>::
00493     do_length(state_type&, const extern_type* __from, 
00494           const extern_type* __end, size_t __max) const
00495     { return std::min(__max, static_cast<size_t>(__end - __from)); }
00496 
00497   // _GLIBCXX_RESOLVE_LIB_DEFECTS
00498   // 74.  Garbled text for codecvt::do_max_length
00499   template<typename _InternT, typename _ExternT>
00500     int 
00501     codecvt<_InternT, _ExternT, encoding_state>::
00502     do_max_length() const throw()
00503     { return 1; }
00504 
00505 _GLIBCXX_END_NAMESPACE
00506 
00507 #endif

Generated on Thu Jul 23 21:16:02 2009 for libstdc++ by  doxygen 1.5.8