lr_parser_base.C

00001 /*
00002  * This file is part of the "Archon" framework.
00003  * (http://files3d.sourceforge.net)
00004  *
00005  * Copyright © 2002 by Kristian Spangsege and Brian Kristiansen.
00006  *
00007  * Permission to use, copy, modify, and distribute this software and
00008  * its documentation under the terms of the GNU General Public License is
00009  * hereby granted. No representations are made about the suitability of
00010  * this software for any purpose. It is provided "as is" without express
00011  * or implied warranty. See the GNU General Public License
00012  * (http://www.gnu.org/copyleft/gpl.html) for more details.
00013  *
00014  * The characters in this file are ISO8859-1 encoded.
00015  *
00016  * The documentation in this file is in "Doxygen" style
00017  * (http://www.doxygen.org).
00018  */
00019 
00020 #include <iostream>
00021 
00022 #include <archon/util/unicode.H>
00023 
00024 #include <archon/util/lr_parser_base.H>
00025 
00026 using namespace std;
00027 
00028 namespace Archon
00029 {
00030   namespace Utilities
00031   {
00032     LrParserBase::LrParserBase(CFG &g, const ActorBase *actor,
00033                                const Printer *printer):
00034       grammar(g), actor(actor), printer(printer)
00035     {
00036       grammar.eliminateMidRuleActions();
00037 
00038       const int numberOfRules = grammar.getNumberOfRules();
00039       for(int i=0; i<numberOfRules; ++i)
00040       {
00041         const CFG::Rule &r = grammar.getRule(i);
00042         const int numberOfProductions = r.getNumberOfProductions();
00043         for(int j=0; j<numberOfProductions; ++j)
00044         {
00045           const CFG::Production &p = r.getProduction(j);
00046           const int numberOfSymbols = p.getNumberOfSymbols();
00047           Production q(i, j);
00048           if(numberOfSymbols)
00049           {
00050             const CFG::Symbol &s = p.getSymbol(numberOfSymbols-1);
00051             if(s.getType() == CFG::Symbol::action)
00052             {
00053               q.right.resize(numberOfSymbols-1);
00054               q.method = s.getIndex();
00055               q.args = s.getArgs();
00056             }
00057             else q.right.resize(numberOfSymbols);
00058           }
00059           for(unsigned k=0; k<q.right.size(); ++k)
00060           {
00061             const CFG::Symbol &s = p.getSymbol(k);
00062             switch(s.getType())
00063             {
00064             case CFG::Symbol::terminal:
00065               q.right[k] = s.getIndex();
00066               break;
00067             case CFG::Symbol::nonterminal:
00068               q.right[k] = -1-s.getIndex();
00069               break;
00070             case CFG::Symbol::action:
00071               ARCHON_THROW1(InternalException, "No actions should appear here");
00072             case CFG::Symbol::nil:
00073               ARCHON_THROW1(InternalException, "Nil-symbols should not appear here");       
00074             }
00075           }
00076 
00077           productions.push_back(q);
00078         }
00079       }
00080     }
00081 
00082     LrParserBase::~LrParserBase()
00083     {{ // The extra scope is needed to work around gcc3.2 bug #8287
00084     }}
00085 
00086     void LrParserBase::attrTypeError(int arg, int rule, int production,
00087                                      const type_info &actual,
00088                                      const type_info &expected) const
00089     {
00090       ARCHON_THROW1(AttributeTypeException,
00091                     "Argument " + Text::toString(arg) + " of " +
00092                     grammar.printProduction(rule, production) + " had type " +
00093                     actual.name() + " and should have type " + expected.name());
00094     }
00095 
00096     RefAnyConst LrParserBase::parse(LexerBase &lexer, Context *context, Logger *logger) const
00097     {
00098       if(actor && context && !actor->verifyContext(*context))
00099         ARCHON_THROW1(ArgumentException, "Invalid context type");
00100 
00101       vector<pair<int, RefAnyConst> > stateStack;
00102       pair<int, RefAnyConst> stackTop = make_pair(0, RefAnyConst(0));
00103       stateStack.push_back(stackTop);
00104 
00105       LexerBase::Lexeme lexeme;
00106       lexer.getNext(lexeme);
00107       for(;;)
00108       {
00109         const int action = getAction(stackTop.first, lexeme.type);
00110         if(action > -1)
00111         {
00112           // Shift
00113           if(logger) logger->log("Shift: " + grammar.printTerminal(lexeme.type));
00114 
00115           stackTop = make_pair(action, lexeme.value);
00116           stateStack.push_back(stackTop);
00117           lexer.getNext(lexeme);
00118         }
00119         else if(action < -2)
00120         {
00121           // Reduce
00122           const Production &p = productions[-3-action];
00123 
00124           if(logger) logger->log("Reduce: " + grammar.printProduction(p.rule, p.production));
00125 
00126           if(p.method > -4 && actor && context)
00127           {
00128             vector<RefAnyConst> args(p.args.size());
00129             for(unsigned i=0; i<p.args.size(); ++i)
00130               if(p.args[i]>=0) args[i] = (stateStack.end()-p.args[i]-1)->second;
00131             stateStack.resize(stateStack.size()-p.right.size()+1);
00132             try
00133             {
00134               if(p.method == -3)
00135               {
00136                 // Concat
00137                 const RefObject<ustring> *a1 =
00138                   dynamic_cast<const RefObject<ustring> *>(args[0].get());
00139                 const RefObject<ustring> *a2 =
00140                   dynamic_cast<const RefObject<ustring> *>(args[1].get());
00141                 if(!a1 && args[0])
00142                   ARCHON_THROW2(CallException, 0, typeid(RefObject<ustring>));
00143                 if(!a2 && args[1])
00144                   ARCHON_THROW2(CallException, 1, typeid(RefObject<ustring>));
00145                 stateStack.back().second =
00146                   !a1 ? a2 :
00147                   !a2 ? a1 : new RefObject<ustring>(a1->value + a2->value);
00148               }
00149               else if(p.method == -2)
00150               {
00151                 // Copy
00152                 stateStack.back().second = args[0];
00153               }
00154               else if(p.method == -1)
00155               {
00156                 // Null
00157                 stateStack.back().second = 0;
00158               }
00159               else
00160               {
00161                 // User defined
00162                 stateStack.back().second = actor->call(p.method, args, *context);
00163               }
00164             }
00165             catch(CallException &e)
00166             {
00167               attrTypeError(e.arg+1, p.rule, p.production,
00168                             typeid(*args[e.arg].get()), e.t);
00169             }
00170 
00171             if(logger && printer)
00172             {
00173               string s = "Action: " + actor->getMethodName(p.method) + "(";
00174               for(unsigned i=0; i<args.size(); ++i)
00175               {
00176                 if(i) s += ", ";
00177                 s += printer->print(args[i]);
00178               }
00179               s += ") = " + printer->print(stateStack.back().second);
00180               logger->log(s);
00181             }
00182           }
00183           else stateStack.resize(stateStack.size()-p.right.size()+1);
00184 
00185           stateStack.back().first = getGoto((stateStack.end()-2)->first, p.rule);
00186           stackTop = stateStack.back();
00187         }
00188         else if(action == -2)
00189         {
00190           // Accept
00191           if(logger && printer) logger->log("Accept: " + printer->print(stackTop.second));
00192           return stackTop.second;
00193         }
00194         else
00195         {
00196           // Error
00197           if(context) context->parserError();
00198           else if(logger)
00199           {
00200             if(lexeme.type == -1)
00201               logger->log("Syntax error. Unexpected end of input.");
00202             else
00203               logger->log("Syntax error. Unexpected symbol " +
00204                           Text::toString(Text::escapeNonprintable(lexer.getText())) +
00205                           ".");
00206           }
00207           return 0;
00208         }
00209 
00210         if(logger && printer)
00211         {
00212           string s = "Stack:";
00213           for(unsigned i=0; i<stateStack.size(); ++i)
00214             s += " " + printer->print(stateStack[i].second);
00215           logger->log(s);
00216         }
00217       }
00218     }
00219   }
00220 }

Generated on Sun Jul 30 22:55:44 2006 for Archon by  doxygen 1.4.4