00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include <string>
00021 #include <iostream>
00022
00023 #include <archon/util/nfa.H>
00024 #include <archon/util/dfa.H>
00025 #include <archon/util/text.H>
00026
00027 #include <archon/util/lr_parser_simple.H>
00028
00029 using namespace std;
00030
00031 namespace Archon
00032 {
00033 namespace Utilities
00034 {
00035 int SlrParser::getAction(int state, int terminal) const
00036 {
00037 return actionTable[state+numberOfStates*(terminal+1)];
00038 }
00039
00040 int SlrParser::getGoto(int state, int nonterminal) const
00041 {
00042 return gotoTable[state+numberOfStates*nonterminal];
00043 }
00044
00045 string SlrParser::printItemSets(const vector<pair<int, int> > &items,
00046 const vector<set<int> > &itemSets, int width)
00047 {
00048 vector<double> columnWidthFractions;
00049 columnWidthFractions.push_back(0.2);
00050 columnWidthFractions.push_back(0.9);
00051 Text::Table<string> table(itemSets.size()+1, columnWidthFractions);
00052 table(0, 0) = "State";
00053 table(0, 1) = "LR(0) item set";
00054 for(unsigned i=0; i<itemSets.size(); ++i)
00055 {
00056 const set<int> &s = itemSets[i];
00057 table(i+1, 0) = Text::toString(i);
00058 for(set<int>::const_iterator j=s.begin(); j!=s.end(); ++j)
00059 {
00060 if(j!=s.begin()) table(i+1, 1) += ", ";
00061 const pair<int, int> &item = items[*j];
00062 const Production &p = productions[item.first];
00063 table(i+1, 1) += grammar.printItem(CFG::Item(p.rule, p.production, item.second));
00064 }
00065 }
00066
00067 return Text::format(table, width, 2, true);
00068 }
00069
00070 struct AutomatonSymbolPrinter: NFA::Printer, DFA::Printer
00071 {
00072 const CFG &g;
00073 AutomatonSymbolPrinter(const CFG &g): g(g) {}
00074 string printSymbol(unsigned i) const
00075 {
00076 const int s = static_cast<int>(i) - g.getNumberOfRules();
00077 return s<0 ? g.printNonterminal(-s-1) : g.printTerminal(s);
00078 }
00079 };
00080
00081 SlrParser::SlrParser(CFG &g, const ActorBase *actor, const Printer *printer):
00082 LrParserBase((g.introduceNewStartSymbol(), g), actor, printer)
00083 {
00084
00085
00086 const int automatonSymbolShift = grammar.getNumberOfRules();
00087
00088
00089 NFA nfa;
00090 vector<pair<int, int> > items;
00091 {
00092 map<pair<int, int>, int> nfaStateMap;
00093 multimap<int, int> nonterminalToProductionMap;
00094
00095 for(unsigned i=0; i<productions.size(); ++i)
00096 {
00097 const Production &p = productions[i];
00098 nonterminalToProductionMap.insert(make_pair(p.rule, i));
00099 int s = nfaStateMap[make_pair(i, 0)] =
00100 i ? nfa.addState() : nfa.addStartState();
00101 for(unsigned j=0; j<p.right.size(); ++j)
00102 {
00103 const int t = nfaStateMap[make_pair(i, j+1)] = nfa.addState();
00104 nfa.addEdge(s, t, automatonSymbolShift + p.right[j]);
00105 s = t;
00106 }
00107 }
00108
00109 for(unsigned i=0; i<productions.size(); ++i)
00110 {
00111 const Production &p = productions[i];
00112 for(unsigned j=0; j<p.right.size(); ++j)
00113 {
00114 const int t = p.right[j];
00115 if(t > -1) continue;
00116 const int s = nfaStateMap[make_pair(i, j)];
00117 pair<multimap<int, int>::iterator, multimap<int, int>::iterator> r =
00118 nonterminalToProductionMap.equal_range(-1-t);
00119 for(multimap<int, int>::iterator k=r.first; k!=r.second; ++k)
00120 nfa.addEdge(s, nfaStateMap[make_pair(k->second, 0)]);
00121 }
00122 }
00123
00124 items.resize(nfaStateMap.size());
00125 for(map<pair<int, int>, int>::iterator i=nfaStateMap.begin(); i!=nfaStateMap.end(); ++i)
00126 items[i->second] = i->first;
00127 }
00128
00129
00130
00131
00132
00133
00134 vector<set<int> > itemSets;
00135 DFA dfa(nfa, &itemSets);
00136
00137
00138
00139
00140
00141
00142 numberOfStates = dfa.getNumberOfStates();
00143
00144 actionTable.resize((grammar.getNumberOfTerminals()+1) * numberOfStates, -1);
00145 gotoTable.resize(grammar.getNumberOfRules() * numberOfStates, -1);
00146
00147
00148 for(int i=0; i<numberOfStates; ++i)
00149 {
00150 const DFA::State &s = dfa.getState(i);
00151 for(int j=0; j<s.getNumberOfEdges(); ++j)
00152 {
00153 const DFA::Edge &e = s.getEdge(j);
00154 for(unsigned k=e.getBegin(); k<=e.getEnd(); ++k)
00155 {
00156 if(k<static_cast<unsigned>(automatonSymbolShift))
00157 gotoTable[i+numberOfStates*(automatonSymbolShift-k-1)] =
00158 e.getTargetState();
00159 else actionTable[i+numberOfStates*(k-automatonSymbolShift+1)] =
00160 e.getTargetState();
00161 }
00162 }
00163 }
00164
00165 CFG::FirstSets firstSets(grammar);
00166
00167 CFG::FollowSets followSets(firstSets);
00168
00169
00170
00171
00172
00173
00174
00175
00176
00177
00178 for(unsigned i=0; i<itemSets.size(); ++i)
00179 {
00180 const set<int> &s = itemSets[i];
00181 for(set<int>::const_iterator j=s.begin(); j!=s.end(); ++j)
00182 {
00183 const pair<int, int> &item = items[*j];
00184 const Production &p = productions[item.first];
00185 if(static_cast<unsigned>(item.second) != p.right.size()) continue;
00186 if(item.first == 0)
00187 {
00188 actionTable[i] = -2;
00189 continue;
00190 }
00191 const set<int> &f = followSets.get(p.rule);
00192 for(set<int>::const_iterator k=f.begin(); k!=f.end(); ++k)
00193 {
00194 int &v = actionTable[i+numberOfStates*(*k+1)];
00195 if(v < -2)
00196 {
00197 const Production &q = productions[-3-v];
00198 ARCHON_THROW1(ArgumentException,
00199 "reduce/reduce conflict at state " +
00200 Text::toString(i) + " on input terminal " +
00201 grammar.printTerminal(*k) + ": " +
00202 grammar.printProduction(q.rule,
00203 q.production) +
00204 " or " +
00205 grammar.printProduction(p.rule,
00206 p.production));
00207 }
00208 if(v > -1)
00209 ARCHON_THROW1(ArgumentException,
00210 "shift/reduce conflict at state " +
00211 Text::toString(i) + " on input terminal " +
00212 grammar.printTerminal(*k) + ": shift " +
00213 "and goto state " + Text::toString(v) +
00214 " or reduce by " +
00215 grammar.printProduction(p.rule,
00216 p.production));
00217 v = -3 - item.first;
00218 }
00219 }
00220 }
00221 }
00222
00223 SlrParser::~SlrParser()
00224 {
00225 }
00226 }
00227 }