lexer.H

00001 /*
00002  * This file is part of the "Archon" framework.
00003  * (http://files3d.sourceforge.net)
00004  *
00005  * Copyright © 2002 by Kristian Spangsege and Brian Kristiansen.
00006  *
00007  * Permission to use, copy, modify, and distribute this software and
00008  * its documentation under the terms of the GNU General Public License is
00009  * hereby granted. No representations are made about the suitability of
00010  * this software for any purpose. It is provided "as is" without express
00011  * or implied warranty. See the GNU General Public License
00012  * (http://www.gnu.org/copyleft/gpl.html) for more details.
00013  *
00014  * The characters in this file are ISO8859-1 encoded.
00015  *
00016  * The documentation in this file is in "Doxygen" style
00017  * (http://www.doxygen.org).
00018  */
00019 
00020 #ifndef ARCHON_UTILITIES_LEXER_H
00021 #define ARCHON_UTILITIES_LEXER_H
00022 
00023 #include <memory>
00024 #include <vector>
00025 
00026 #include <archon/util/ref.H>
00027 #include <archon/util/unicode.H>
00028 #include <archon/util/stream.H>
00029 #include <archon/util/dfa.H>
00030 
00031 #include <archon/util/regex.H>
00032 
00033 namespace Archon
00034 {
00035   namespace Utilities
00036   {
00037     using namespace std;
00038 
00050     struct LexerBase
00051     {
00052       struct InputException: virtual Utilities::Exception
00053       {
00054         int lineNumber;
00055         ustring text;
00056         InputException(int lineNumber, ustring text):
00057           lineNumber(lineNumber), text(text) {}
00058         virtual ~InputException() throw() {}
00059         string getMessage() const throw();
00060       };
00061 
00062       struct Lexeme
00063       {
00064         int type;
00065         RefAny value;
00066         Lexeme(int type = -1, RefAny value = 0): type(type), value(value) {}
00067       };
00068 
00069       virtual ~LexerBase() {}
00070 
00077       virtual void getNext(Lexeme &l) = 0;
00078 
00084       virtual ustring getText() const = 0;
00085 
00090       virtual int getType() const = 0;
00091     };
00092 
00120     class Lexer: public LexerBase
00121     {
00122     public:
00132       struct Context
00133       {
00134         virtual ~Context() {}
00135         virtual void lexerError() = 0;
00136       };
00137 
00143       class ActorBase
00144       {
00145         friend class Lexer;
00146 
00147       protected:
00148         struct MethodBase: virtual RefObjectBase
00149         {
00150           virtual ~MethodBase() {}
00151           virtual void call(Context *, Lexeme &) const = 0;
00152         };
00153 
00154         vector<Ref<const MethodBase> > methods;
00155 
00156         virtual ~ActorBase() {}
00157 
00158         void call(int method, Context *c, Lexeme &l) const
00159         {
00160           methods[method]->call(c, l);
00161         }
00162 
00163         virtual bool verifyContext(const Context &) const = 0;
00164       };
00165 
00177       template<class C>
00178       class Actor: public ActorBase
00179       {
00180         bool verifyContext(const Context &c) const
00181         {
00182           return dynamic_cast<const C *>(&c);
00183         }
00184 
00185         class Method0: public MethodBase
00186         {
00187           void (C::*method)();
00188 
00189         public:
00190           Method0(void (C::*method)()): method(method) {}
00191 
00192           void call(Lexer::Context *c, Lexer::Lexeme &l) const
00193           {
00194             (static_cast<C *>(c)->*method)();
00195             l.type = -1;
00196           }
00197         };
00198 
00199         class Method1: public MethodBase
00200         {
00201           void (C::*method)(Lexeme &);
00202 
00203         public:
00204           Method1(void (C::*method)(Lexeme &)): method(method) {}
00205 
00206           void call(Context *c, Lexeme &l) const
00207           {
00208             (static_cast<C *>(c)->*method)(l);
00209           }
00210         };
00211 
00212         template<typename R>
00213         class Method2: public MethodBase
00214         {
00215           R (C::*method)();
00216 
00217         public:
00218           Method2(R (C::*method)()): method(method) {}
00219 
00220           void call(Context *c, Lexeme &l) const
00221           {
00222             l.value = new RefObject<R>((static_cast<C *>(c)->*method)());
00223           }
00224         };
00225 
00226       public:
00227         int registerMethod(void (C::*method)())
00228         {
00229           methods.push_back(new Method0(method));
00230           return methods.size()-1;
00231         }
00232 
00233         int registerMethod(void (C::*method)(Lexeme &))
00234         {
00235           methods.push_back(new Method1(method));
00236           return methods.size()-1;
00237         }
00238 
00239         template<typename R>
00240         int registerMethod(R (C::*method)())
00241         {
00242           methods.push_back(new Method2<R>(method));
00243           return methods.size()-1;
00244         }
00245       };
00246 
00247       class Engine;
00248 
00258       class RuleSet
00259       {
00260         friend class Engine;
00261 
00262         struct Rule
00263         {
00264           Regex regex;
00265           int method;
00266           Lexeme lexeme;
00267           Rule(const Regex &r, int method, Lexeme lexeme):
00268             regex(r), method(method), lexeme(lexeme) {}
00269         };
00270 
00271         vector<Rule> rules;
00272 
00273       public:
00285         template<typename L>
00286         void add(const Regex &r, int method, int terminal, L lexemeValue)
00287         {
00288           rules.push_back(Rule(r, method,
00289                                Lexeme(terminal,
00290                                       new RefObject<L>(lexemeValue))));
00291         }
00292 
00293         template<typename L>
00294         void add(const Regex &r, int method, int terminal, Ref<L> lexemeValue)
00295         {
00296           rules.push_back(Rule(r, method, Lexeme(terminal, lexemeValue)));
00297         }
00298 
00299         void add(const Regex &r, int method, int terminal, RefAny lexemeValue)
00300         {
00301           rules.push_back(Rule(r, method, Lexeme(terminal, lexemeValue)));
00302         }
00303 
00304         void add(const Regex &r, int method = -1, int terminal = -1)
00305         {
00306           rules.push_back(Rule(r, method, Lexeme(terminal, 0)));
00307         }
00308       };
00309 
00315       class Engine
00316       {
00317         friend class Lexer;
00318 
00319         struct Rule
00320         {
00321           int method;
00322           Lexeme lexeme;
00323 
00324           Rule(): method(-1) {}
00325           Rule(int method, Lexeme lexeme): method(method), lexeme(lexeme) {}
00326         };
00327 
00328         const ActorBase *actor;
00329         auto_ptr<DFA> dfa;
00330         vector<Rule> rules;
00331 
00332         // Bit patterns for the types of anchors. One for each state.
00333         vector<unsigned> anchorMasks;
00334 
00335         // Prevent copying
00336         Engine(const Engine &);
00337         Engine &operator=(const Engine &);
00338 
00339       public:
00349         Engine(const RuleSet &, const ActorBase * =0);
00350       };
00351 
00352     private:
00353       const Engine &engine;
00354       Ref<Stream::UReader> &reader; // Should this really be an alias???
00355       Context *context;
00356       ustring buffer;
00357       int bufferSize;
00358       int charCount;
00359       int type;
00360       int anchorState;
00361       int lineNumber;
00362 
00363 
00364     public:
00377       Lexer(const Engine &e, Ref<Stream::UReader> &r, Context *c=0):
00378         engine(e), reader(r), context(c), bufferSize(0), charCount(0),
00379         type(-1), anchorState(0), lineNumber(1) {}
00380 
00387       void getNext(Lexeme &);
00388 
00394       ustring getText() const
00395       {
00396         return buffer.substr(0, charCount);
00397       }
00398 
00403       int getType() const
00404       {
00405         return type;
00406       }
00407 
00412       int getLineNumber() const { return lineNumber; }
00413     };
00414   }
00415 }
00416 
00417 #endif // ARCHON_UTILITIES_LEXER_H

Generated on Sun Jul 30 22:55:44 2006 for Archon by  doxygen 1.4.4