00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032
00033
00034
00035 #include <string>
00036 #include <iostream>
00037
00038 #include <archon/util/nfa.H>
00039 #include <archon/util/dfa.H>
00040 #include <archon/util/text.H>
00041
00042 #include <archon/util/lr_parser_canonical.H>
00043
00044 using namespace std;
00045
00046 namespace Archon
00047 {
00048 namespace Utilities
00049 {
00050 int ClrParser::getAction(int state, int terminal) const
00051 {
00052 return actionTable[state+numberOfStates*(terminal+1)];
00053 }
00054
00055 int ClrParser::getGoto(int state, int nonterminal) const
00056 {
00057 return gotoTable[state+numberOfStates*nonterminal];
00058 }
00059
00060 string ClrParser::printItemSets(const vector<LR1Item> &items,
00061 const vector<set<int> > &itemSets, int width)
00062 {
00063 vector<double> columnWidthFractions;
00064 columnWidthFractions.push_back(0.2);
00065 columnWidthFractions.push_back(0.9);
00066 Text::Table<string> table(itemSets.size()+1, columnWidthFractions);
00067 table(0, 0) = "State";
00068 table(0, 1) = "LR(1) item set";
00069 for(unsigned i=0; i<itemSets.size(); ++i)
00070 {
00071 const set<int> &s = itemSets[i];
00072 table(i+1, 0) = Text::toString(i);
00073 for(set<int>::const_iterator j=s.begin(); j!=s.end(); ++j)
00074 {
00075 if(j!=s.begin()) table(i+1, 1) += ", ";
00076 const LR1Item &item = items[*j];
00077 const Production &p = productions[item.production];
00078 table(i+1, 1) +=
00079 grammar.printItem(CFG::Item(p.rule, p.production, item.position)) +
00080 ":" + grammar.printTerminal(item.lookahead);
00081 }
00082 }
00083
00084 return Text::format(table, width, 2, true);
00085 }
00086
00087 struct AutomatonSymbolPrinter: NFA::Printer, DFA::Printer
00088 {
00089 const CFG &g;
00090 AutomatonSymbolPrinter(const CFG &g): g(g) {}
00091 string printSymbol(unsigned i) const
00092 {
00093 const int s = static_cast<int>(i) - g.getNumberOfRules();
00094 return s<0 ? g.printNonterminal(-s-1) : g.printTerminal(s);
00095 }
00096 };
00097
00098 ClrParser::ClrParser(CFG &g, const ActorBase *actor, const Printer *printer):
00099 LrParserBase((g.introduceNewStartSymbol(), g), actor, printer)
00100 {
00101
00102
00103 CFG::FirstSets firstSets(grammar);
00104
00105
00106 const int automatonSymbolShift = grammar.getNumberOfRules();
00107
00108
00109 NFA nfa;
00110 vector<LR1Item> items;
00111 {
00112 bool f = true;
00113 map<LR1Item, int> nfaStateMap;
00114 multimap<int, int> nonterminalToProductionMap;
00115
00116 for(unsigned i=0; i<productions.size(); ++i)
00117 {
00118 const Production &p = productions[i];
00119 nonterminalToProductionMap.insert(make_pair(p.rule, i));
00120 for(int k=-1; k<grammar.getNumberOfTerminals(); ++k)
00121 {
00122 int s = nfaStateMap[LR1Item(i, 0, k)] =
00123 f ? nfa.addStartState() : nfa.addState();
00124 f = false;
00125 for(unsigned j=0; j<p.right.size(); ++j)
00126 {
00127 const int t = nfaStateMap[LR1Item(i, j+1, k)] = nfa.addState();
00128 nfa.addEdge(s, t, automatonSymbolShift + p.right[j]);
00129 s = t;
00130 }
00131 }
00132 }
00133
00134 for(unsigned i=0; i<productions.size(); ++i)
00135 {
00136 const Production &p = productions[i];
00137 for(unsigned j=0; j<p.right.size(); ++j)
00138 {
00139 const int t = p.right[j];
00140 if(t > -1) continue;
00141 CFG::Item h(p.rule, p.production, j+1);
00142 set<int> f;
00143 const bool e = firstSets.includeFirstSet(h, f);
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153 for(int k=-1; k<grammar.getNumberOfTerminals(); ++k)
00154 {
00155 const int s = nfaStateMap[LR1Item(i, j, k)];
00156 pair<multimap<int, int>::iterator, multimap<int, int>::iterator> r =
00157 nonterminalToProductionMap.equal_range(-1-t);
00158 for(multimap<int, int>::iterator l=r.first; l!=r.second; ++l)
00159 {
00160 for(set<int>::iterator m=f.begin(); m!=f.end(); ++m)
00161 {
00162 nfa.addEdge(s, nfaStateMap[LR1Item(l->second, 0, *m)]);
00163 }
00164 if(e && (k == -1 || f.find(k) == f.end()))
00165 {
00166 nfa.addEdge(s, nfaStateMap[LR1Item(l->second, 0, k)]);
00167 }
00168 }
00169 }
00170 }
00171 }
00172
00173 items.resize(nfaStateMap.size());
00174 for(map<LR1Item, int>::iterator i=nfaStateMap.begin(); i!=nfaStateMap.end(); ++i)
00175 items[i->second] = i->first;
00176 }
00177
00178
00179
00180
00181
00182
00183 vector<set<int> > itemSets;
00184 DFA dfa(nfa, &itemSets);
00185
00186
00187
00188
00189
00190
00191 numberOfStates = dfa.getNumberOfStates();
00192
00193 actionTable.resize((grammar.getNumberOfTerminals()+1) * numberOfStates, -1);
00194 gotoTable.resize(grammar.getNumberOfRules() * numberOfStates, -1);
00195
00196
00197 for(int i=0; i<numberOfStates; ++i)
00198 {
00199 const DFA::State &s = dfa.getState(i);
00200 for(int j=0; j<s.getNumberOfEdges(); ++j)
00201 {
00202 const DFA::Edge &e = s.getEdge(j);
00203 for(unsigned k=e.getBegin(); k<=e.getEnd(); ++k)
00204 {
00205 if(k<static_cast<unsigned>(automatonSymbolShift))
00206 gotoTable[i+numberOfStates*(automatonSymbolShift-k-1)] =
00207 e.getTargetState();
00208 else actionTable[i+numberOfStates*(k-automatonSymbolShift+1)] =
00209 e.getTargetState();
00210 }
00211 }
00212 }
00213
00214
00215 for(unsigned i=0; i<itemSets.size(); ++i)
00216 {
00217 const set<int> &s = itemSets[i];
00218 for(set<int>::const_iterator j=s.begin(); j!=s.end(); ++j)
00219 {
00220 const LR1Item &item = items[*j];
00221 const Production &p = productions[item.production];
00222 if(static_cast<unsigned>(item.position) != p.right.size()) continue;
00223 if(item.production == 0)
00224 {
00225 if(item.lookahead == -1) actionTable[i] = -2;
00226 continue;
00227 }
00228 int &v = actionTable[i+numberOfStates*(item.lookahead+1)];
00229 if(v < -2)
00230 {
00231 const Production &q = productions[-3-v];
00232 ARCHON_THROW1(ArgumentException, "reduce/reduce conflict at state " +
00233 Text::toString(i) + " on input terminal " +
00234 grammar.printTerminal(item.lookahead) +
00235 ": " +
00236 grammar.printProduction(q.rule,
00237 q.production) +
00238 " or " +
00239 grammar.printProduction(p.rule,
00240 p.production));
00241 }
00242 if(v > -1)
00243 ARCHON_THROW1(ArgumentException,
00244 "shift/reduce conflict at state " +
00245 Text::toString(i) + " on input terminal " +
00246 grammar.printTerminal(item.lookahead) +
00247 ": shift and goto state " +
00248 Text::toString(v) + " or reduce by " +
00249 grammar.printProduction(p.rule,
00250 p.production));
00251
00252 v = -3 - item.production;
00253 }
00254 }
00255 }
00256
00257 ClrParser::~ClrParser()
00258 {
00259 }
00260 }
00261 }