lr_parser_base.H

00001 /*
00002  * This file is part of the "Archon" framework.
00003  * (http://files3d.sourceforge.net)
00004  *
00005  * Copyright  2002 by Kristian Spangsege and Brian Kristiansen.
00006  *
00007  * Permission to use, copy, modify, and distribute this software and
00008  * its documentation under the terms of the GNU General Public License is
00009  * hereby granted. No representations are made about the suitability of
00010  * this software for any purpose. It is provided "as is" without express
00011  * or implied warranty. See the GNU General Public License
00012  * (http://www.gnu.org/copyleft/gpl.html) for more details.
00013  *
00014  * The characters in this file are ISO8859-1 encoded.
00015  *
00016  * The documentation in this file is in "Doxygen" style
00017  * (http://www.doxygen.org).
00018  */
00019 
00020 
00021 /*
00022 
00023 While Bison is a parser generator that outputs C source code, this
00024 parser can and must be configured at run-time. This has both
00025 advantages and draw-backs. One adavantage is that the grammer need not
00026 be known at compile-time, and a draw-back is that the parsing tables
00027 needs to be re-created every time your application is re-started.
00028 
00029 Other advantages compared to Bison:
00030 
00031 - Totally re-entrant (multiple threads may use the same parser
00032 instance to parse different inputs at the same time)
00033 
00034 - No memory leak on abrupt termination of the parser
00035 
00036 - Semantic actions may throw exceptions to terminate the parser, and
00037 such exceptions travel all the way back to the point where your
00038 application calls the parse method.
00039 
00040 - The parser can output various kinds of information while preparing
00041 parsing tables and while running, which may be a great help for
00042 those who try to understand how an LR parser works.
00043 
00044 - Lets you choose which of the generally known parser table types to
00045 create: simple, canonical or look-ahead. Bison is fixed to
00046 look-ahead (LALR).
00047 
00048 
00049 NOTE: When using the special 'concat' method in a grammar action you
00050 must ensure that its arguments always are either null or a
00051 reference to LrParserBase::Attr<ustring> ie. a unicode string
00052 attribute.
00053 
00054 Whenever possible make productions left recursive instead of right
00055 recursive as it will keep the stack size (memory usage)
00056 smaller. Eg. for a simple sequence 'seq -> seq item' is prefered over
00057 'seq -> item seq'.
00058 
00059 */
00060 
00061 #ifndef ARCHON_UTILITIES_LR_PARSER_BASE_H
00062 #define ARCHON_UTILITIES_LR_PARSER_BASE_H
00063 
00064 #include <string>
00065 #include <vector>
00066 #include <typeinfo>
00067 
00068 #include <archon/util/exception.H>
00069 #include <archon/util/ref.H>
00070 #include <archon/util/cfg.H>
00071 #include <archon/util/lexer.H>
00072 #include <archon/util/logger.H>
00073 
00074 namespace Archon
00075 {
00076   namespace Utilities
00077   {
00078     using namespace std;
00079 
00086     class LrParserBase
00087     {
00088     public:
00089       struct Context
00090       {
00091         virtual ~Context() {}
00092         virtual void parserError() = 0;
00093       };
00094 
00095       struct Printer
00096       {
00097         virtual string print(const RefAnyConst &attribute) const = 0;
00098         virtual ~Printer() {}
00099       };
00100 
00101     protected:
00105       struct CallException: virtual Exception
00106       {
00107         int arg;
00108         const type_info &t;
00109         CallException(string l, int arg, const type_info &t): Exception(l), arg(arg), t(t) {}
00110       };
00111 
00112       struct AttributeTypeException: virtual Exception
00113       {
00114         AttributeTypeException(string l, string m): Exception(l, m) {}
00115       };
00116 
00117       struct Production
00118       {
00119         int rule;       // index of nonterminal on the left hand side
00120         int production; // index of production within left hand nonterminal
00121         vector<int> right; // Right hand side symbols i<0 is nonterminal -1-i, i>=0 is terminal i
00122         int method; // Method index, -1, -2 & -3 are special methods and -4 is no method
00123         vector<int> args;
00124 
00125         Production(int rule, int production):
00126           rule(rule), production(production), method(-4) {}
00127       };
00128 
00129       struct ActorBase: CFG::Actor
00130       {
00131         virtual bool verifyContext(const Context &) const = 0;
00132         virtual RefAnyConst call(int method, const vector<RefAnyConst> &args, Context &) const = 0;
00133       };
00134 
00135       CFG grammar;
00136       vector<Production> productions;
00137 
00138       LrParserBase(CFG &, const ActorBase *, const Printer *);
00139 
00149       virtual int getAction(int state, int terminal) const = 0;
00150 
00156       virtual int getGoto(int state, int nonterminal) const = 0;
00157 
00158     private:
00159       const ActorBase *actor;
00160       const Printer *printer;
00161 
00162       void attrTypeError(int arg, int rule, int production,
00163                          const type_info &actual,
00164                          const type_info &expected) const;
00165 
00166       RefAnyConst parse(LexerBase &, Context *, Logger *) const;
00167 
00168     public:
00169       template<class C>
00170       class Actor: public ActorBase
00171       {
00172         struct MethodBase: virtual RefObjectBase
00173         {
00174           virtual ~MethodBase() {}
00175           virtual string getName() const = 0;
00176           virtual int getArity() const = 0;
00177           virtual RefAnyConst call(C &context, const vector<RefAnyConst> &) const = 0;
00178         };
00179 
00180         vector<Ref<const MethodBase> > methods;
00181         map<string, int> methodMap;      // Maps method names to method indices.
00182 
00183         bool verifyContext(const Context &c) const
00184         {
00185           return dynamic_cast<const C *>(&c);
00186         }
00187 
00188         RefAnyConst call(int method, const vector<RefAnyConst> &args, Context &c) const
00189         {
00190           return methods[method]->call(static_cast<C &>(c), args);
00191         }
00192 
00193         /*
00194          * The purpose of the following method classes are to convert a
00195          * general call into a specific real method call. 'MethodV<n>'
00196          * knows of a method that takes n arguments and return nothing
00197          * (void). 'Method<n>' knows of a mehod that takes n arguments
00198          * and return one value.
00199          *
00200          * Note: If you need more arguments then just add more classes
00201          * yourself it is straight foreward.
00202          */
00203         class MethodV0;
00204         template<class> class Method0;
00205         template<class> class MethodV1;
00206         template<class, class> class Method1;
00207         template<class, class> class MethodV2;
00208         template<class, class, class> class Method2;
00209 
00210         int registerMethod(string name, const MethodBase *m)
00211         {
00212           pair<map<string, int>::iterator, bool> r =
00213             methodMap.insert(make_pair(name, methods.size()));
00214           if(!r.second)
00215             ARCHON_THROW1(ArgumentException, "Redefinition of method '" + name + "'");
00216           methods.push_back(m);
00217           return methods.size()-1;
00218         }
00219 
00220       public:
00221         int getNumberOfMethods() const
00222         {
00223           return methods.size();
00224         }
00225 
00226         int getMethodArity(int i) const
00227         {
00228           // Handle the special actions 'null', 'copy' & 'concat'
00229           return i < -2 ? 2 : i < -1 ? 1 : i < 0 ? 0 : methods[i]->getArity();
00230         }
00231 
00232         string getMethodName(int i) const
00233         {
00234           // Handle the special actions 'null', 'copy' & 'concat'
00235           return i < -2 ? "<concat>" : i < -1 ? "<copy>" : i < 0 ? "<null>" : methods[i]->getName();
00236         }
00237 
00238         int registerMethod(string name, void (C::*method)())
00239         {
00240           return registerMethod(name, new MethodV0(name, method));
00241         }
00242 
00243         template<class R>
00244         int registerMethod(string name, Ref<const R> (C::*method)())
00245         {
00246           return registerMethod(name, new Method0<R>(name, method));
00247         }
00248 
00249         template<class A1>
00250         int registerMethod(string name,
00251                            void (C::*method)(const Ref<const A1> &))
00252         {
00253           return registerMethod(name, new MethodV1<A1>(name, method));
00254         }
00255 
00256         template<class R, class A1>
00257         int registerMethod(string name,
00258                            Ref<const R> (C::*method)(const Ref<const A1> &))
00259         {
00260           return registerMethod(name, new Method1<R, A1>(name, method));
00261         }
00262 
00263         template<class A1, class A2>
00264         int registerMethod(string name,
00265                            void (C::*method)(const Ref<const A1> &,
00266                                              const Ref<const A2> &))
00267         {
00268           return registerMethod(name, new MethodV2<A1, A2>(name, method));
00269         }
00270 
00271         template<class R, class A1, class A2>
00272         int registerMethod(string name,
00273                            Ref<const R> (C::*method)(const Ref<const A1> &,
00274                                                      const Ref<const A2> &))
00275         {
00276           return registerMethod(name, new Method2<R, A1, A2>(name, method));
00277         }
00278       };
00279 
00280       virtual ~LrParserBase();
00281 
00285       void parse(LexerBase &lexer, Logger *l=0) const
00286       {
00287         parse(lexer, 0, l);
00288       }
00289 
00294       void parse(LexerBase &lexer, Context &c, Logger *l=0) const
00295       {
00296         parse(lexer, &c, l);
00297       }
00298 
00305       template<class R>
00306       void parse(LexerBase &lexer, Context &c, Ref<const R> &result, Logger *l=0)
00307       {
00308         RefAnyConst r1 = parse(lexer, &c, l);
00309         const R *r2 = dynamic_cast<const R *>(r1.get());
00310         if(!r2 && r1)
00311           ARCHON_THROW1(AttributeTypeException, string("Parser result has type ") +
00312                         typeid(*r1.get()).name() + " but its type was "
00313                         "expected to be " + typeid(R).name());
00314         result = r2;
00315       }
00316     };
00317   }
00318 }
00319 
00320 #endif // ARCHON_UTILITIES_LR_PARSER_BASE_H

Generated on Sun Jul 30 22:55:44 2006 for Archon by  doxygen 1.4.4