uri.C

00001 /*
00002  * This file is part of the "Archon" framework.
00003  * (http://files3d.sourceforge.net)
00004  *
00005  * Copyright © 2002 by Kristian Spangsege and Brian Kristiansen.
00006  *
00007  * Permission to use, copy, modify, and distribute this software and
00008  * its documentation under the terms of the GNU General Public License is
00009  * hereby granted. No representations are made about the suitability of
00010  * this software for any purpose. It is provided "as is" without express
00011  * or implied warranty. See the GNU General Public License
00012  * (http://www.gnu.org/copyleft/gpl.html) for more details.
00013  *
00014  * The characters in this file are ISO8859-1 encoded.
00015  *
00016  * The documentation in this file is in "Doxygen" style
00017  * (http://www.doxygen.org).
00018  */
00019 
00020 #include <string>
00021 
00022 #include <archon/util/file.H>
00023 #include <archon/util/text.H>
00024 
00025 #include <archon/util/uri.H>
00026 
00027 /*
00028 
00029 NOTES:
00030 
00031 Problem with allowed relative URI's in file:d/u
00032 
00033 
00034 
00035 Excerpt from RFC 2396:
00036 
00037 5.1.3. Base URI from the Retrieval URI
00038 
00039 If no base URI is embedded and the document is not encapsulated
00040 within some other entity (e.g., the top level of a composite entity),
00041 then, if a URI was used to retrieve the base document, that URI shall
00042 be considered the base URI.  Note that if the retrieval was the
00043 result of a redirected request, the last URI used (i.e., that which
00044 resulted in the actual retrieval of the document) is the base URI.
00045 
00046 Note the remark about redirection!!!
00047 
00048 */
00049 
00050 namespace Archon
00051 {
00052   namespace Utilities
00053   {
00054     using namespace std;
00055 
00056 
00057     enum Scheme
00058     {
00059       scheme_file,
00060       scheme_http,
00061       scheme_ftp
00062     };
00063 
00064 
00065     string schemeName(Scheme scheme)
00066     {
00067       switch(scheme)
00068       {
00069       case scheme_file: return "FILE";
00070       case scheme_http: return "HTTP";
00071       case scheme_ftp: return "FTP";
00072       }
00073       return "<unknown>";
00074     }
00075 
00076 
00077     /*
00078      * mark:           ! ' ( ) * - . _ ~
00079      *
00080      * common:         $ & + ,
00081      *
00082      * user:           $ & + ,     ; =
00083      * userinfo:       $ & + ,   : ; =
00084      *
00085      * pchar_no_equal: $ & + ,   :       @
00086      * pchar:          $ & + ,   :   =   @
00087      * uric_no_slash:  $ & + ,   : ; = ? @
00088      * reserved:       $ & + , / : ; = ? @
00089      *
00090      */
00091 
00092     const unsigned lowalphaMask       = 1;
00093     const unsigned upalphaMask        = 2;
00094     const unsigned digitMask          = 4;
00095     const unsigned markMask           = 8;
00096     const unsigned commonMask         = 16;
00097     const unsigned slashMask          = 32;
00098     const unsigned colonMask          = 64;
00099     const unsigned semicolonMask      = 128;
00100     const unsigned equalMask          = 256;
00101     const unsigned questionmarkMask   = 512;
00102     const unsigned commercialAtMask   = 1024;
00103     const unsigned otherMask          = 2048;
00104 
00105     const unsigned alphaMask        = lowalphaMask|upalphaMask;
00106     const unsigned alphanumMask     = alphaMask|digitMask;
00107     const unsigned unreservedMask   = alphanumMask|markMask;
00108     const unsigned reservedMask     = commonMask|slashMask|colonMask|
00109     semicolonMask|equalMask|
00110     questionmarkMask|commercialAtMask;
00111 
00112     const unsigned userMask         = unreservedMask|commonMask|semicolonMask|
00113     equalMask;
00114     const unsigned userInfoMask     = userMask|colonMask;
00115 
00116     const unsigned pcharNoEqualMask = unreservedMask|commonMask|colonMask|
00117     commercialAtMask;
00118     const unsigned pcharMask        = pcharNoEqualMask|equalMask;
00119     const unsigned uricNoSlashMask  = pcharMask|semicolonMask|questionmarkMask;
00120     const unsigned uricMask         = uricNoSlashMask|slashMask;
00121 
00122     const unsigned anyMask          = uricMask|otherMask;
00123 
00128     unsigned charClassTable[127-32] =
00129     {
00130       /*  32    ! " #  */     2048,        8,     2048,     2048,
00131       /*  36  $ % & '  */       16,     2048,       16,        8,
00132       /*  40  ( ) * +  */        8,        8,        8,       16,
00133       /*  44  , - . /  */       16,        8,        8,       32,
00134       /*  48  0 1 2 3  */        4,        4,        4,        4,
00135       /*  52  4 5 6 7  */        4,        4,        4,        4,
00136       /*  56  8 9 : ;  */        4,        4,       64,      128,
00137       /*  60  < = > ?  */     2048,      256,     2048,      512,
00138       /*  64  @ A B C  */     1024,        2,        2,        2,
00139       /*  68  D E F G  */        2,        2,        2,        2,
00140       /*  72  H I J K  */        2,        2,        2,        2,
00141       /*  76  L M N O  */        2,        2,        2,        2,
00142       /*  80  P Q R S  */        2,        2,        2,        2,
00143       /*  84  T U V W  */        2,        2,        2,        2,
00144       /*  88  X Y Z [  */        2,        2,        2,     2048,
00145       /*  92  \ ] ^ _  */     2048,     2048,     2048,        8,
00146       /*  96  ` a b c  */     2048,        1,        1,        1,
00147       /* 100  d e f g  */        1,        1,        1,        1,
00148       /* 104  h i j k  */        1,        1,        1,        1,
00149       /* 108  l m n o  */        1,        1,        1,        1,
00150       /* 112  p q r s  */        1,        1,        1,        1,
00151       /* 116  t u v w  */        1,        1,        1,        1,
00152       /* 120  x y z {  */        1,        1,        1,     2048,
00153       /* 124  | } ~    */     2048,     2048,        8
00154     };
00155 
00156 
00157     static void validatePart(const string &v, const string &uri, int offset,
00158                              unsigned charClassMask, bool allowEscape,
00159                              const string &partName)
00160     {
00161       for(string::size_type i=0; i<v.size(); ++i)
00162       {
00163         unsigned char c = static_cast<unsigned char>(v[i]);
00164         if(allowEscape && c == '%')
00165         {
00166           if(v.size() <= i+2)
00167             ARCHON_THROW4(Uri::SyntaxException,
00168                           "Un-terminated escape sequence "
00169                           "within " + partName, uri, offset+i,
00170                           offset + v.size());
00171           if(!isxdigit(static_cast<unsigned char>(v[i+1])) ||
00172              !isxdigit(static_cast<unsigned char>(v[i+2])))
00173             ARCHON_THROW4(Uri::SyntaxException,
00174                           "Invalid escape sequence "
00175                           "within " + partName, uri,
00176                           offset+i, offset+i+3);
00177           i += 2;
00178         }
00179         else if(c < '\x20' || c > '\x7e' ||
00180                 !(charClassTable[c-'\x20']&charClassMask))
00181           ARCHON_THROW4(Uri::SyntaxException,
00182                         "Illegal character within " +
00183                         partName, uri, offset+i, offset+i+1);
00184       }
00185     }
00186 
00187 
00188     static void validateUser(const string &v, const string &uri,
00189                              int offset, Scheme scheme)
00190     {
00191       validatePart(v, uri, offset, userMask, true, "username");
00192     }
00193 
00194 
00195     static void validatePassword(const string &v, const string &uri,
00196                                  int offset, Scheme scheme)
00197     {
00198       /*
00199        * Password is restructed to the same character class as the
00200        * username
00201        */
00202       validatePart(v, uri, offset, userMask, true, "password");
00203     }
00204 
00205 
00206     static void validateUserinfo(const string &v, const string &uri,
00207                                  int offset, Scheme scheme)
00208     {
00209       /*
00210        * RFC 2396 does not specify precisely how to seperate the
00211        * password from the username, and even advices aginst ever having
00212        * a password in a URI.
00213        *
00214        * The partially deprecated RFC 1738 specifies though, that the
00215        * only two schemes that supports a user-info part before the
00216        * domain name, is FTP and TELNET. In both schemes the seperator
00217        * is ":", and ":" is neither allowed in the username nor in the
00218        * password. So this is what I'll stick to.  */
00219       string::size_type i = v.find(':');
00220       if(i != string::npos)
00221       {
00222         validateUser(v.substr(0, i), uri, offset, scheme);
00223         validatePassword(v.substr(i+1), uri, offset+i+1, scheme);
00224       }
00225       else validateUser(v, uri, offset, scheme);
00226     }
00227 
00228 
00229     static void validateHost(const string &v, const string &uri,
00230                              int offset, Scheme scheme)
00231     {
00232       string::size_type i = v.rfind('.');
00233       if(i != string::npos && i+1 < v.size() && isdigit(static_cast<unsigned char>(v[i+1])))
00234       {
00235         // IPv4 address
00236         const string::size_type i1 =
00237           v.find('.');
00238         const string::size_type i2 =
00239           i1 == string::npos ? string::npos : v.find('.', i1+1);
00240         const string::size_type i3 =
00241           i2 == string::npos ? string::npos : v.find('.', i2+1);
00242         const string::size_type i4 =
00243           i3 == string::npos ? string::npos : v.find('.', i3+1);
00244         if(i4 != string::npos || i3 == string::npos)
00245           ARCHON_THROW4(Uri::SyntaxException,
00246                         "Wrong number of fields in IPv4 address",
00247                         uri, offset, offset+v.size());
00248         if(i1 == 0 || i1+1 == i2 || i2+1 == i3 || i3+1 == v.size())
00249           ARCHON_THROW4(Uri::SyntaxException,
00250                         "Empty fields in IPv4 address",
00251                         uri, offset, offset+v.size());
00252         for(i=0; i<v.size(); ++i)
00253           if(!isdigit(v[i]) && v[i] != '.')
00254             ARCHON_THROW4(Uri::SyntaxException,
00255                           "Illegal character within IPv4 address",
00256                           uri, offset+i, offset+i+1);
00257       }
00258       else
00259       {
00260         // Internet domain name
00261 
00262         i = 0;
00263         string::size_type j;
00264         /*
00265          * 'i' is position of first character of current domain label
00266          * 'j' is 1 + position of last character of current domain label
00267          */
00268 
00269         for(;;)
00270         {
00271           j = v.find('.', i);
00272           if(j == string::npos) j = v.size();
00273 
00274           if(j == i && j < v.size())
00275             ARCHON_THROW4(Uri::SyntaxException,
00276                           "Empty label within domain name",
00277                           uri, offset+i, offset+i+1);
00278           for(string::size_type k = i; k<j; ++k)
00279             if(!isalnum(v[k]) && (v[k] != '-' || k == i || k == j-1))
00280               ARCHON_THROW4(Uri::SyntaxException,
00281                             "Illegal character within domain name",
00282                             uri, offset+k, offset+k+1);
00283           if(j == v.size()) break;
00284 
00285           i = j+1;
00286         }
00287       }
00288     }
00289 
00290 
00291     static void validatePort(const string &v, const string &uri,
00292                              int offset, Scheme scheme)
00293     {
00294       validatePart(v, uri, offset, digitMask, false, "port number");
00295     }
00296 
00297 
00298     static void validateHostPort(const string &v, const string &uri,
00299                                  int offset, Scheme scheme)
00300     {
00301       string::size_type i = v.rfind(':');
00302       if(i != string::npos)
00303       {
00304         validateHost(v.substr(0, i), uri, offset, scheme);
00305 
00306         if(scheme == scheme_file)
00307           ARCHON_THROW4(Uri::SyntaxException,
00308                         "Port number not allowed in " +
00309                         schemeName(scheme) + " URI",
00310                         uri, offset+i, offset+v.size());
00311 
00312         validatePort(v.substr(i+1), uri, offset+i+1, scheme);
00313       }
00314       else validateHost(v, uri, offset, scheme);
00315     }
00316 
00317 
00318     static void validateAuthority(string v, const string &uri,
00319                                   int offset, Scheme scheme)
00320     {
00321       if(v.empty()) return;
00322       v.erase(0, 2);
00323       offset += 2;
00324       string::size_type i = v.rfind('@');
00325       if(i != string::npos)
00326       {
00327         if(scheme != scheme_ftp)
00328           ARCHON_THROW4(Uri::SyntaxException,
00329                         "User info not allowed in " +
00330                         schemeName(scheme) + " URI",
00331                         uri, offset, offset+i+1);
00332 
00333         validateUserinfo(v.substr(0, i), uri, offset, scheme);
00334         validateHostPort(v.substr(i+1), uri, offset+i+1, scheme);
00335       }
00336       else validateHostPort(v, uri, offset, scheme);
00337     }
00338 
00339 
00340     static void validatePathSegmentParameter(const string &v, const string &uri,
00341                                              int offset, Scheme scheme)
00342     {
00343       if(scheme == scheme_ftp)
00344       {
00345         if(v.size() == 0)
00346           ARCHON_THROW4(Uri::SyntaxException,
00347                         "Path segment parameter expexted",
00348                         uri, offset, offset+1);
00349 
00350         if(v.substr(0, 5) == "type=")
00351         {
00352           const string t = v.substr(5);
00353           if(t == "a" || t == "A" ||
00354              t == "i" || t == "I" ||
00355              t == "d" || t == "D") return;
00356         }
00357         ARCHON_THROW4(Uri::SyntaxException,
00358                       "A path segment parameter must "
00359                       "have the form 'type=[aidAID]' in the " +
00360                       schemeName(scheme) +
00361                       " URI scheme", uri, offset,
00362                       offset+v.size());
00363       }
00364       else
00365       {
00366         validatePart(v, uri, offset, pcharMask, true, "path segment parameter ");      
00367       }
00368     }
00369 
00370 
00371     static void validatePathSegment(const string &v, const string &uri,
00372                                     int offset, Scheme scheme,
00373                                     bool lastSegment)
00374     {
00375       string::size_type i = v.find(';');
00376       if(i != string::npos)
00377       {
00378         if(scheme == scheme_http ||
00379            scheme == scheme_file)
00380           ARCHON_THROW4(Uri::SyntaxException,
00381                         "Path segment parameters not "
00382                         "allowed in " + schemeName(scheme) +
00383                         " URI scheme", uri, offset+i,
00384                         offset+v.size());
00385 
00386         if(scheme == scheme_ftp && !lastSegment)
00387           ARCHON_THROW4(Uri::SyntaxException,
00388                         "Path segment parameters may "
00389                         "only be applied to the final path segemnt "
00390                         "in the " + schemeName(scheme) +
00391                         " URI scheme", uri, offset+i,
00392                         offset+v.size());
00393 
00394         // 'j' is position of first character of current parameter
00395         // 'k' is 1 + position of last character of current parameter
00396         string::size_type j=i+1, k;
00397 
00398         for(;;)
00399         {
00400           k = v.find(';', j);
00401           if(k == string::npos) k = v.size();
00402 
00403           validatePathSegmentParameter(v.substr(j, k-j), uri, offset+j,
00404                                        scheme);
00405 
00406           if(k == v.size()) break;
00407 
00408           if(scheme == scheme_ftp)
00409             ARCHON_THROW4(Uri::SyntaxException,
00410                           "Only one path segemnt parameter "
00411                           "allowed in the " + schemeName(scheme) +
00412                           " URI scheme", uri, offset+k,
00413                           offset+v.size());
00414 
00415           j = k+1;
00416         }
00417       }
00418       else i = v.size();
00419 
00420       /*
00421        * I assume here that "=" is not allowed un-escaped in the part
00422        * of the path segment that preceeds the path segment
00423        * parameters.
00424        *
00425        * In RFC 2396 it is stated that "=" is a part of the character
00426        * class "pchar", and thus may occur before the parameters, but
00427        * also that it is reserved for special purpose if occuring
00428        * somewhere in a path segemnt.
00429        *
00430        * For any URI scheme that I know of, "=" has no special meaning
00431        * whitin the part of a path segment that preceeds the
00432        * parameters, so I have chosen only to allow "=" in the
00433        * parameter section of the path segment.
00434        */
00435       validatePart(v.substr(0, i), uri, offset, uricMask, true,
00436                    "path segment");
00437     }
00438 
00439 
00440     static void validatePath(const string &v, const string &uri,
00441                              int offset, Scheme scheme)
00442     {
00443       // 'i' is position of first character of current path segemnt
00444       // 'j' is 1 + position of last character of current path segemnt
00445       string::size_type i=0, j;
00446 
00447       for(;;)
00448       {
00449         j = v.find('/', i);
00450         if(j == string::npos) j = v.size();
00451 
00452         validatePathSegment(v.substr(i, j-i), uri, offset+i, scheme,
00453                             j == v.size());
00454 
00455         if(j == v.size()) break;
00456 
00457         i = j+1;
00458       }
00459     }
00460 
00461 
00462     static void validateQuery(string v, const string &uri,
00463                               int offset, Scheme scheme)
00464     {
00465       if(v.empty()) return;
00466       v.erase(0, 1);
00467       ++offset;
00468       validatePart(v, uri, offset, uricMask, true, "query");      
00469     }
00470 
00471 
00472     static void validateFragmentIdentifier(string v, const string &uri,
00473                                            int offset)
00474     {
00475       if(v.empty()) return;
00476       v.erase(0, 1);
00477       ++offset;
00478       validatePart(v, uri, offset, uricMask, true, "fragment identifier");      
00479     }
00480 
00481 
00482     static string makePointerLine(int from, int to)
00483     {
00484       return string(from, ' ') + string(to-from, '^') + "\n";
00485     }
00486 
00487 
00488 
00534     void Uri::canonicalizePath()
00535     {
00536       string::size_type i = 0;
00537       for(;;)
00538       {
00539         i = path.find("/.", i);
00540         if(i == string::npos) break;
00541         if(i == path.size()-2 || path[i+2] == '/') path.erase(i+1, 2);
00542         else if(path[i+2] == '.' && (i == path.size()-3 || path[i+3] == '/') &&
00543                 i>0)
00544         {
00545           string::size_type j = path.rfind('/', i-1)+1;
00546           // The slash will always be there as long as paths are absolute
00547           path.erase(j, i+4-j);
00548           i = j-1;
00549         }
00550         else i += 2;
00551       }
00552     }
00553 
00554 
00562     void Uri::resolveRelative(const Uri &baseUri)
00563     {
00564       scheme = baseUri.scheme;
00565       if(!authority.empty()) return;
00566       authority = baseUri.authority;
00567       if(!path.empty() && path[0] == '/') return;
00568       if(!path.empty())
00569       {
00570         // Relative path encountered
00571         path = (baseUri.path.empty() ? "/" :
00572                 baseUri.path.substr(0, baseUri.path.rfind('/')+1)) + path;
00573         canonicalizePath();
00574         return;
00575       }
00576       query = baseUri.query;
00577     }
00578 
00579 
00580     Uri::Uri()
00581     {
00582       scheme = "file:";
00583     
00584       string cwd = File::getCWD();
00585       if(cwd[cwd.size()-1] != '/') cwd += '/';
00586 
00587       // 'i' is position of first character of current path segemnt
00588       // 'j' is 1 + position of last character of current path segemnt
00589       string::size_type i=0, j;
00590 
00591       for(;;)
00592       {
00593         j = cwd.find('/', i);
00594         if(j == string::npos) j = cwd.size();
00595 
00596         path.append(encode(cwd.substr(i, j-i), false));
00597 
00598         if(j == cwd.size()) break;
00599 
00600         path += '/';
00601 
00602         i = j+1;
00603       }
00604     }
00605 
00606 
00607     Uri::Uri(const string &uri, const Uri &baseUri)
00608     {
00609       // Decompose URI into the four main parts (all four may be empty)
00610 
00611       // Scheme
00612       string::size_type i = uri.find_first_of(":/?#");
00613       if(i != string::npos && uri[i] == ':') scheme = uri.substr(0, ++i);
00614       else i = 0;
00615 
00616       // Authority
00617       const string::size_type iAuthority = i;
00618       if(uri.size() >= i+2 && uri.substr(i, 2) == "//")
00619       {
00620         string::size_type j = uri.find_first_of("/?#", i+2);
00621         if(j == string::npos) j = uri.size();
00622         authority = uri.substr(i, j-i);
00623         i = j;
00624       }
00625 
00626       // Path
00627       const string::size_type iPath = i;
00628       {
00629         string::size_type j = uri.find_first_of("?#", i);
00630         if(j == string::npos) j = uri.size();
00631         path = uri.substr(i, j-i);
00632         i = j;
00633       }
00634 
00635       // Query
00636       const string::size_type iQuery = i;
00637       if(uri.size() >= i+1 && uri[i] == '?')
00638       {
00639         string::size_type j = uri.find('#', i+1);
00640         if(j == string::npos) j = uri.size();
00641         query = uri.substr(i, j-i);
00642         i = j;
00643       }
00644 
00645       if(i < uri.size())
00646         ARCHON_THROW4(SyntaxException,
00647                       "Illegal character in URI (this looks more "
00648                       "like a URI Reference)", uri, i, i+1);
00649 
00650       string effectiveScheme = scheme.empty() ? baseUri.scheme : scheme;
00651       effectiveScheme.erase(effectiveScheme.size()-1);
00652       if(Text::compareIgnoreCase(effectiveScheme, "file") == 0)
00653       {
00654         validateAuthority(authority, uri, iAuthority, scheme_file);
00655         validatePath(path, uri, iPath, scheme_file);
00656 
00657         if(!query.empty())
00658           ARCHON_THROW4(SyntaxException,
00659                         "Queries are not supported by the " +
00660                         schemeName(scheme_file) + " URI scheme",
00661                         uri, iQuery, uri.size());
00662 
00663         if(scheme.empty()) resolveRelative(baseUri);
00664       }
00665       else if(Text::compareIgnoreCase(effectiveScheme, "http") == 0 ||
00666               Text::compareIgnoreCase(effectiveScheme, "https") == 0)
00667       {
00668         validateAuthority(authority, uri, iAuthority, scheme_http);
00669         validatePath(path, uri, iPath, scheme_http);
00670         validateQuery(query, uri, iQuery, scheme_http);
00671 
00672         if(scheme.empty()) resolveRelative(baseUri);
00673         else if(authority.size() < 3)
00674           ARCHON_THROW4(SyntaxException,
00675                         "Authority (hostname) required "
00676                         "for absolute " + schemeName(scheme_http) +
00677                         " URI", uri,
00678                         iAuthority, iAuthority+1);
00679       }
00680       else if(Text::compareIgnoreCase(effectiveScheme, "ftp") == 0)
00681       {
00682         validateAuthority(authority, uri, iAuthority, scheme_ftp);
00683         validatePath(path, uri, iPath, scheme_ftp);
00684 
00685         if(!query.empty())
00686           ARCHON_THROW4(SyntaxException,
00687                         "Queries are not supported by the " +
00688                         schemeName(scheme_ftp) + " URI scheme",
00689                         uri, iQuery, uri.size());
00690 
00691         if(scheme.empty()) resolveRelative(baseUri);
00692         else if(authority.size() < 3)
00693           ARCHON_THROW4(SyntaxException,
00694                         "Authority (hostname) required "
00695                         "for absolute " + schemeName(scheme_ftp) +
00696                         " URI", uri,
00697                         iAuthority, iAuthority+1);
00698       }
00699       else
00700         ARCHON_THROW4(SyntaxException,
00701                       "Invalid or unsupported URI scheme",
00702                       uri, 0, iAuthority);
00703     }
00704 
00705     bool Uri::isFileScheme() const
00706     {
00707       return Text::compareIgnoreCase(scheme, "file:") == 0;
00708     }
00709 
00710     string Uri::getFile() const
00711     {
00712       const string::size_type i = path.rfind('/');
00713       return path.substr(i == string::npos ? 0 : i+1);
00714     }
00715 
00716 
00717     static const string hexDigits = "0123456789abcdef";
00718 
00719     string Uri::encode(const string &v, bool plusForSpace)
00720     {
00721       string result;
00722       result.reserve(string::size_type(v.size()*1.5));
00723       for(unsigned i=0; i<v.size(); ++i)
00724       {
00725         unsigned char c = static_cast<unsigned char>(v[i]);
00726         if(c == ' ' && plusForSpace) result += '+';
00727         else if(c < '\x20' || c > '\x7e' ||
00728                 !(charClassTable[c-'\x20']&unreservedMask))
00729         {
00730           result += "%";
00731           result += hexDigits[c >> 4];
00732           result += hexDigits[c & 15];
00733         }
00734         else result += c;
00735       }
00736 
00737       return result;
00738     }
00739 
00740 
00741     string Uri::decode(const string &v, bool plusForSpace)
00742     {
00743       string result;
00744       result.reserve(v.size());
00745       for(unsigned i=0; i<v.size(); ++i)
00746       {
00747         unsigned char c = static_cast<unsigned char>(v[i]);
00748         if(c == '+' && plusForSpace) c = ' ';
00749         else if(c == '%')
00750         {
00751           if(v.size() <= i+2)
00752             ARCHON_THROW1(ArgumentException,
00753                           "Un-terminated escape sequence '" + v + "'");
00754 
00755           int p;
00756           c = static_cast<unsigned char>(v[++i]);
00757           if(c >= '0' && c <= '9') p = c - '0';
00758           else if(c >= 'A' && c <= 'F') p = c - 'A' + 10;
00759           else if(c >= 'a' && c <= 'f') p = c - 'a' + 10;
00760           else ARCHON_THROW1(ArgumentException,
00761                              "Invalid escape sequence '" + v + "'");
00762           p *= 16;
00763           c = static_cast<unsigned char>(v[++i]);
00764           if(c >= '0' && c <= '9') p += c - '0';
00765           else if(c >= 'A' && c <= 'F') p += c - 'A' + 10;
00766           else if(c >= 'a' && c <= 'f') p += c - 'a' + 10;
00767           else ARCHON_THROW1(ArgumentException,
00768                              "Invalid escape sequence '" + v + "'");
00769 
00770           c = static_cast<unsigned char>(p);
00771         }
00772 
00773         result += static_cast<char>(c);
00774       }
00775 
00776       return result;
00777     }
00778 
00779 
00780     string Uri::explain(const SyntaxException &e)
00781     {
00782       return
00783         "Malformed URI: " + e.getMessage() + "\n" +
00784         e.val + "\n" +
00785         makePointerLine(e.indexFrom, e.indexTo);
00786     }
00787 
00788 
00789     string Uri::toString() const
00790     {
00791       return scheme + authority + path + query;
00792     }
00793 
00794     ostream &operator<<(ostream &out, const Uri &u)
00795     {
00796       out << u.toString();
00797       return out;
00798     }
00799 
00800 
00801     UriReference::UriReference(const string &uriReference,
00802                                const UriReference &baseReference)
00803     {
00804       if(uriReference.empty())
00805       {
00806         uri = baseReference.uri;
00807         fragmentIdentifier = baseReference.fragmentIdentifier;
00808         return;
00809       }
00810 
00811       string::size_type i = uriReference.find('#');
00812       if(i == string::npos) uri = Uri(uriReference, baseReference.uri);
00813       else
00814       {
00815         uri = Uri(uriReference.substr(0, i), baseReference.uri);
00816         fragmentIdentifier = uriReference.substr(i);
00817 
00818         validateFragmentIdentifier(fragmentIdentifier, uriReference, i);
00819       }
00820     }
00821 
00822 
00823     string UriReference::toString() const
00824     {
00825       return uri.toString() + fragmentIdentifier;
00826     }
00827 
00828 
00829     ostream &operator<<(ostream &out, const UriReference &r)
00830     {
00831       out << r.toString();
00832       return out;
00833     }
00834   }
00835 }

Generated on Sun Jul 30 22:55:44 2006 for Archon by  doxygen 1.4.4