00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include <errno.h>
00021 #include <string.h>
00022 #include <iconv.h>
00023 #include <iostream>
00024
00025 #include <archon/util/exception.H>
00026 #include <archon/util/unicode.H>
00027
00028 using namespace std;
00029
00030 namespace Archon
00031 {
00032 namespace Utilities
00033 {
00034 namespace Unicode
00035 {
00036 string encodeUtf8(ustring u)
00037 {
00038 string s;
00039 iconv_t cd = iconv_open("UTF-8", "UCS-4LE");
00040 if(cd==reinterpret_cast<iconv_t>(-1))
00041 ARCHON_THROW1(ResourceException,
00042 string("Unexpected error from 'iconv_open': ") +
00043 strerror(errno));
00044 char *inbuf = const_cast<char *>(reinterpret_cast<const char *>(u.data()));
00045 size_t inbytesleft = u.size() * 4;
00046 const size_t buffersize = 256;
00047 char buffer[buffersize];
00048 while(inbytesleft)
00049 {
00050 char *outbuf = buffer;
00051 size_t outbytesleft = buffersize;
00052 const size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
00053 if(outbytesleft < buffersize) s.append(buffer, buffersize - outbytesleft);
00054 if(n==static_cast<size_t>(-1) && errno != E2BIG)
00055 {
00056 s += "?";
00057 if(errno == EINVAL) break;
00058 if(errno == EILSEQ)
00059 {
00060 inbytesleft -= 4;
00061 inbuf += 4;
00062 }
00063 else
00064 {
00065 iconv_close(cd);
00066 ARCHON_THROW1(ResourceException,
00067 string("Unexpected error from 'iconv': ") +
00068 strerror(errno));
00069 }
00070 }
00071 }
00072 iconv_close(cd);
00073 return s;
00074 }
00075
00076 ustring decodeUtf8(string s)
00077 {
00078 ustring u;
00079 iconv_t cd = iconv_open("UCS-4LE", "UTF-8");
00080 if(cd==reinterpret_cast<iconv_t>(-1))
00081 ARCHON_THROW1(ResourceException,
00082 string("Unexpected error from 'iconv_open': ") +
00083 strerror(errno));
00084 char *inbuf = const_cast<char *>(reinterpret_cast<const char *>(s.data()));
00085 size_t inbytesleft = s.size();
00086 const size_t buffersize = 256;
00087 char buffer[buffersize];
00088 while(inbytesleft)
00089 {
00090 char *outbuf = buffer;
00091 size_t outbytesleft = buffersize;
00092 const size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
00093 if(outbytesleft < buffersize)
00094 u.append(reinterpret_cast<const uchar *>(buffer), (buffersize - outbytesleft)/4);
00095 if(n==static_cast<size_t>(-1) && errno != E2BIG)
00096 {
00097 u.append(1, static_cast<uchar>('?'));
00098 if(errno == EINVAL) break;
00099 if(errno == EILSEQ)
00100 {
00101 --inbytesleft;
00102 ++inbuf;
00103 }
00104 else
00105 {
00106 iconv_close(cd);
00107 ARCHON_THROW1(ResourceException,
00108 string("Unexpected error from 'iconv': ") +
00109 strerror(errno));
00110 }
00111 }
00112 }
00113 iconv_close(cd);
00114 return u;
00115 }
00116
00117 int decodeUtf8(const char *in, int inSize, uchar *out, int &outSize)
00118 {
00119 ustring u;
00120 iconv_t cd = iconv_open("UCS-4LE", "UTF-8");
00121 if(cd==reinterpret_cast<iconv_t>(-1))
00122 ARCHON_THROW1(ResourceException,
00123 string("Unexpected error from 'iconv_open': ") +
00124 strerror(errno));
00125 char *inbuf = const_cast<char *>(static_cast<const char *>(in));
00126 size_t inbytesleft = inSize;
00127 char *outbuf = reinterpret_cast<char *>(out);
00128 size_t outbytesleft = outSize*4;
00129 while(inbytesleft)
00130 {
00131 const size_t n = iconv(cd, &inbuf, &inbytesleft, &outbuf, &outbytesleft);
00132 if(n==static_cast<size_t>(-1))
00133 {
00134 if(errno == E2BIG || errno == EINVAL) break;
00135
00136 if(errno != EILSEQ)
00137 {
00138 iconv_close(cd);
00139 ARCHON_THROW1(ResourceException,
00140 string("Unexpected error from 'iconv': ") +
00141 strerror(errno));
00142 }
00143
00144 if(!outbytesleft) break;
00145
00146 *reinterpret_cast<uchar *>(outbuf) = '?';
00147 outbytesleft -= 4;
00148 outbuf += 4;
00149 --inbytesleft;
00150 ++inbuf;
00151 }
00152 }
00153 iconv_close(cd);
00154 outSize -= outbytesleft/4;
00155 return inSize-inbytesleft;
00156 }
00157 }
00158 }
00159 }