00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include <iostream>
00021
00022 #include <archon/util/unicode.H>
00023
00024 #include <archon/util/lr_parser_base.H>
00025
00026 using namespace std;
00027
00028 namespace Archon
00029 {
00030 namespace Utilities
00031 {
00032 LrParserBase::LrParserBase(CFG &g, const ActorBase *actor,
00033 const Printer *printer):
00034 grammar(g), actor(actor), printer(printer)
00035 {
00036 grammar.eliminateMidRuleActions();
00037
00038 const int numberOfRules = grammar.getNumberOfRules();
00039 for(int i=0; i<numberOfRules; ++i)
00040 {
00041 const CFG::Rule &r = grammar.getRule(i);
00042 const int numberOfProductions = r.getNumberOfProductions();
00043 for(int j=0; j<numberOfProductions; ++j)
00044 {
00045 const CFG::Production &p = r.getProduction(j);
00046 const int numberOfSymbols = p.getNumberOfSymbols();
00047 Production q(i, j);
00048 if(numberOfSymbols)
00049 {
00050 const CFG::Symbol &s = p.getSymbol(numberOfSymbols-1);
00051 if(s.getType() == CFG::Symbol::action)
00052 {
00053 q.right.resize(numberOfSymbols-1);
00054 q.method = s.getIndex();
00055 q.args = s.getArgs();
00056 }
00057 else q.right.resize(numberOfSymbols);
00058 }
00059 for(unsigned k=0; k<q.right.size(); ++k)
00060 {
00061 const CFG::Symbol &s = p.getSymbol(k);
00062 switch(s.getType())
00063 {
00064 case CFG::Symbol::terminal:
00065 q.right[k] = s.getIndex();
00066 break;
00067 case CFG::Symbol::nonterminal:
00068 q.right[k] = -1-s.getIndex();
00069 break;
00070 case CFG::Symbol::action:
00071 ARCHON_THROW1(InternalException, "No actions should appear here");
00072 case CFG::Symbol::nil:
00073 ARCHON_THROW1(InternalException, "Nil-symbols should not appear here");
00074 }
00075 }
00076
00077 productions.push_back(q);
00078 }
00079 }
00080 }
00081
00082 LrParserBase::~LrParserBase()
00083 {{
00084 }}
00085
00086 void LrParserBase::attrTypeError(int arg, int rule, int production,
00087 const type_info &actual,
00088 const type_info &expected) const
00089 {
00090 ARCHON_THROW1(AttributeTypeException,
00091 "Argument " + Text::toString(arg) + " of " +
00092 grammar.printProduction(rule, production) + " had type " +
00093 actual.name() + " and should have type " + expected.name());
00094 }
00095
00096 RefAnyConst LrParserBase::parse(LexerBase &lexer, Context *context, Logger *logger) const
00097 {
00098 if(actor && context && !actor->verifyContext(*context))
00099 ARCHON_THROW1(ArgumentException, "Invalid context type");
00100
00101 vector<pair<int, RefAnyConst> > stateStack;
00102 pair<int, RefAnyConst> stackTop = make_pair(0, RefAnyConst(0));
00103 stateStack.push_back(stackTop);
00104
00105 LexerBase::Lexeme lexeme;
00106 lexer.getNext(lexeme);
00107 for(;;)
00108 {
00109 const int action = getAction(stackTop.first, lexeme.type);
00110 if(action > -1)
00111 {
00112
00113 if(logger) logger->log("Shift: " + grammar.printTerminal(lexeme.type));
00114
00115 stackTop = make_pair(action, lexeme.value);
00116 stateStack.push_back(stackTop);
00117 lexer.getNext(lexeme);
00118 }
00119 else if(action < -2)
00120 {
00121
00122 const Production &p = productions[-3-action];
00123
00124 if(logger) logger->log("Reduce: " + grammar.printProduction(p.rule, p.production));
00125
00126 if(p.method > -4 && actor && context)
00127 {
00128 vector<RefAnyConst> args(p.args.size());
00129 for(unsigned i=0; i<p.args.size(); ++i)
00130 if(p.args[i]>=0) args[i] = (stateStack.end()-p.args[i]-1)->second;
00131 stateStack.resize(stateStack.size()-p.right.size()+1);
00132 try
00133 {
00134 if(p.method == -3)
00135 {
00136
00137 const RefObject<ustring> *a1 =
00138 dynamic_cast<const RefObject<ustring> *>(args[0].get());
00139 const RefObject<ustring> *a2 =
00140 dynamic_cast<const RefObject<ustring> *>(args[1].get());
00141 if(!a1 && args[0])
00142 ARCHON_THROW2(CallException, 0, typeid(RefObject<ustring>));
00143 if(!a2 && args[1])
00144 ARCHON_THROW2(CallException, 1, typeid(RefObject<ustring>));
00145 stateStack.back().second =
00146 !a1 ? a2 :
00147 !a2 ? a1 : new RefObject<ustring>(a1->value + a2->value);
00148 }
00149 else if(p.method == -2)
00150 {
00151
00152 stateStack.back().second = args[0];
00153 }
00154 else if(p.method == -1)
00155 {
00156
00157 stateStack.back().second = 0;
00158 }
00159 else
00160 {
00161
00162 stateStack.back().second = actor->call(p.method, args, *context);
00163 }
00164 }
00165 catch(CallException &e)
00166 {
00167 attrTypeError(e.arg+1, p.rule, p.production,
00168 typeid(*args[e.arg].get()), e.t);
00169 }
00170
00171 if(logger && printer)
00172 {
00173 string s = "Action: " + actor->getMethodName(p.method) + "(";
00174 for(unsigned i=0; i<args.size(); ++i)
00175 {
00176 if(i) s += ", ";
00177 s += printer->print(args[i]);
00178 }
00179 s += ") = " + printer->print(stateStack.back().second);
00180 logger->log(s);
00181 }
00182 }
00183 else stateStack.resize(stateStack.size()-p.right.size()+1);
00184
00185 stateStack.back().first = getGoto((stateStack.end()-2)->first, p.rule);
00186 stackTop = stateStack.back();
00187 }
00188 else if(action == -2)
00189 {
00190
00191 if(logger && printer) logger->log("Accept: " + printer->print(stackTop.second));
00192 return stackTop.second;
00193 }
00194 else
00195 {
00196
00197 if(context) context->parserError();
00198 else if(logger)
00199 {
00200 if(lexeme.type == -1)
00201 logger->log("Syntax error. Unexpected end of input.");
00202 else
00203 logger->log("Syntax error. Unexpected symbol " +
00204 Text::toString(Text::escapeNonprintable(lexer.getText())) +
00205 ".");
00206 }
00207 return 0;
00208 }
00209
00210 if(logger && printer)
00211 {
00212 string s = "Stack:";
00213 for(unsigned i=0; i<stateStack.size(); ++i)
00214 s += " " + printer->print(stateStack[i].second);
00215 logger->log(s);
00216 }
00217 }
00218 }
00219 }
00220 }