unicode.C

00001 /*
00002  * This file is part of the "Archon" framework.
00003  * (http://files3d.sourceforge.net)
00004  *
00005  * Copyright © 2002 by Kristian Spangsege and Brian Kristiansen.
00006  *
00007  * Permission to use, copy, modify, and distribute this software and
00008  * its documentation under the terms of the GNU General Public License is
00009  * hereby granted. No representations are made about the suitability of
00010  * this software for any purpose. It is provided "as is" without express
00011  * or implied warranty. See the GNU General Public License
00012  * (http://www.gnu.org/copyleft/gpl.html) for more details.
00013  *
00014  * The characters in this file are ISO8859-1 encoded.
00015  *
00016  * The documentation in this file is in "Doxygen" style
00017  * (http://www.doxygen.org).
00018  */
00019 
00020 #include <errno.h>
00021 #include <string.h>
00022 #include <iconv.h>
00023 #include <iostream>
00024 
00025 #include <archon/util/exception.H>
00026 #include <archon/util/unicode.H>
00027 
00028 using namespace std;
00029 
00030 namespace Archon
00031 {
00032   namespace Utilities
00033   {
00034     namespace Unicode
00035     {
00036       string  encodeUtf8(ustring u)
00037       {
00038         string s;
00039         iconv_t cd = iconv_open("UTF-8", "UCS-4LE");
00040         if(cd==reinterpret_cast<iconv_t>(-1))
00041           ARCHON_THROW1(ResourceException,
00042                         string("Unexpected error from 'iconv_open': ") +
00043                         strerror(errno));
00044         char *inbuf = const_cast<char *>(reinterpret_cast<const char *>(u.data()));
00045         size_t inbytesleft = u.size() * 4;
00046         const size_t buffersize = 256;
00047         char buffer[buffersize];
00048         while(inbytesleft)
00049         {
00050           char *outbuf = buffer;
00051           size_t outbytesleft = buffersize;     
00052           const size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
00053           if(outbytesleft < buffersize) s.append(buffer, buffersize - outbytesleft);
00054           if(n==static_cast<size_t>(-1) && errno != E2BIG)
00055           {
00056             s += "?";
00057             if(errno == EINVAL) break;
00058             if(errno == EILSEQ)
00059             {
00060               inbytesleft -= 4;
00061               inbuf += 4;
00062             }
00063             else
00064             {
00065               iconv_close(cd);
00066               ARCHON_THROW1(ResourceException,
00067                             string("Unexpected error from 'iconv': ") +
00068                             strerror(errno));
00069             }
00070           }
00071         }
00072         iconv_close(cd);
00073         return s;
00074       }
00075 
00076       ustring decodeUtf8(string s)
00077       {
00078         ustring u;
00079         iconv_t cd = iconv_open("UCS-4LE", "UTF-8");
00080         if(cd==reinterpret_cast<iconv_t>(-1))
00081           ARCHON_THROW1(ResourceException,
00082                         string("Unexpected error from 'iconv_open': ") +
00083                         strerror(errno));
00084         char *inbuf = const_cast<char *>(reinterpret_cast<const char *>(s.data()));
00085         size_t inbytesleft = s.size();
00086         const size_t buffersize = 256;
00087         char buffer[buffersize];
00088         while(inbytesleft)
00089         {
00090           char *outbuf = buffer;
00091           size_t outbytesleft = buffersize;     
00092           const size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
00093           if(outbytesleft < buffersize)
00094             u.append(reinterpret_cast<const uchar *>(buffer), (buffersize - outbytesleft)/4);
00095           if(n==static_cast<size_t>(-1) && errno != E2BIG)
00096           {
00097             u.append(1, static_cast<uchar>('?'));
00098             if(errno == EINVAL) break;
00099             if(errno == EILSEQ)
00100             {
00101               --inbytesleft;
00102               ++inbuf;
00103             }
00104             else
00105             {
00106               iconv_close(cd);
00107               ARCHON_THROW1(ResourceException,
00108                             string("Unexpected error from 'iconv': ") +
00109                             strerror(errno));
00110             }
00111           }
00112         }
00113         iconv_close(cd);
00114         return u;
00115       }
00116 
00117       int decodeUtf8(const char *in, int inSize, uchar *out, int &outSize)
00118       {
00119         ustring u;
00120         iconv_t cd = iconv_open("UCS-4LE", "UTF-8");
00121         if(cd==reinterpret_cast<iconv_t>(-1))
00122           ARCHON_THROW1(ResourceException,
00123                         string("Unexpected error from 'iconv_open': ") +
00124                         strerror(errno));
00125         char *inbuf = const_cast<char *>(static_cast<const char *>(in));
00126         size_t inbytesleft = inSize;
00127         char *outbuf = reinterpret_cast<char *>(out);
00128         size_t outbytesleft = outSize*4;
00129         while(inbytesleft)
00130         {
00131           const size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
00132           if(n==static_cast<size_t>(-1))
00133           {
00134             if(errno == E2BIG || errno == EINVAL) break;
00135 
00136             if(errno != EILSEQ)
00137             {
00138               iconv_close(cd);
00139               ARCHON_THROW1(ResourceException,
00140                             string("Unexpected error from 'iconv': ") +
00141                             strerror(errno));
00142             }
00143 
00144             if(!outbytesleft) break;
00145 
00146             *reinterpret_cast<uchar *>(outbuf) = '?';
00147             outbytesleft -= 4;
00148             outbuf += 4;
00149             --inbytesleft;
00150             ++inbuf;
00151           }
00152         }
00153         iconv_close(cd);
00154         outSize -= outbytesleft/4;
00155         return inSize-inbytesleft;
00156       }
00157     }
00158   }
00159 }

Generated on Sun Jul 30 22:55:46 2006 for Archon by  doxygen 1.4.4