lr_parser_canonical.C

00001 /*
00002  * This file is part of the "Archon" framework.
00003  * (http://files3d.sourceforge.net)
00004  *
00005  * Copyright © 2002 by Kristian Spangsege and Brian Kristiansen.
00006  *
00007  * Permission to use, copy, modify, and distribute this software and
00008  * its documentation under the terms of the GNU General Public License is
00009  * hereby granted. No representations are made about the suitability of
00010  * this software for any purpose. It is provided "as is" without express
00011  * or implied warranty. See the GNU General Public License
00012  * (http://www.gnu.org/copyleft/gpl.html) for more details.
00013  *
00014  * The characters in this file are ISO8859-1 encoded.
00015  *
00016  * The documentation in this file is in "Doxygen" style
00017  * (http://www.doxygen.org).
00018  */
00019 
00020 
00021 
00022 /*
00023 
00024 HAS A BUG (OR MANY)
00025 
00026 */
00027 
00028 /*
00029 
00030 DFA construction takes a looooong time on grammars like REGEX.
00031 
00032 */
00033 
00034 
00035 #include <string>
00036 #include <iostream>
00037 
00038 #include <archon/util/nfa.H>
00039 #include <archon/util/dfa.H>
00040 #include <archon/util/text.H>
00041 
00042 #include <archon/util/lr_parser_canonical.H>
00043 
00044 using namespace std;
00045 
00046 namespace Archon
00047 {
00048   namespace Utilities
00049   {
00050     int ClrParser::getAction(int state, int terminal) const
00051     {
00052       return actionTable[state+numberOfStates*(terminal+1)];
00053     }
00054 
00055     int ClrParser::getGoto(int state, int nonterminal) const
00056     {
00057       return gotoTable[state+numberOfStates*nonterminal];
00058     }
00059 
00060     string ClrParser::printItemSets(const vector<LR1Item> &items,
00061                                     const vector<set<int> > &itemSets, int width)
00062     {
00063       vector<double> columnWidthFractions;
00064       columnWidthFractions.push_back(0.2);
00065       columnWidthFractions.push_back(0.9);
00066       Text::Table<string> table(itemSets.size()+1, columnWidthFractions);
00067       table(0, 0) = "State";
00068       table(0, 1) = "LR(1) item set";
00069       for(unsigned i=0; i<itemSets.size(); ++i)
00070       {
00071         const set<int> &s = itemSets[i];
00072         table(i+1, 0) = Text::toString(i);
00073         for(set<int>::const_iterator j=s.begin(); j!=s.end(); ++j)
00074         {
00075           if(j!=s.begin()) table(i+1, 1) += ", ";
00076           const LR1Item &item = items[*j];
00077           const Production &p = productions[item.production];
00078           table(i+1, 1) +=
00079             grammar.printItem(CFG::Item(p.rule, p.production, item.position)) +
00080             ":" + grammar.printTerminal(item.lookahead);
00081         }
00082       }
00083 
00084       return Text::format(table, width, 2, true);
00085     }
00086 
00087     struct AutomatonSymbolPrinter: NFA::Printer, DFA::Printer
00088     {
00089       const CFG &g;
00090       AutomatonSymbolPrinter(const CFG &g): g(g) {}
00091       string printSymbol(unsigned i) const
00092       {
00093         const int s = static_cast<int>(i) - g.getNumberOfRules();
00094         return s<0 ? g.printNonterminal(-s-1) : g.printTerminal(s);
00095       }
00096     };
00097 
00098     ClrParser::ClrParser(CFG &g, const ActorBase *actor, const Printer *printer):
00099       LrParserBase((g.introduceNewStartSymbol(), g), actor, printer)
00100     {
00101       //cerr << "Augmented Grammar without mid-rule actions:\n" << grammar.print(80) << "\n";
00102 
00103       CFG::FirstSets firstSets(grammar);
00104       //cerr << firstSets.print() << "\n";
00105 
00106       const int automatonSymbolShift = grammar.getNumberOfRules();
00107 
00108       // Construct the NFA
00109       NFA nfa;
00110       vector<LR1Item> items;
00111       {
00112         bool f = true;
00113         map<LR1Item, int> nfaStateMap;
00114         multimap<int, int> nonterminalToProductionMap;
00115 
00116         for(unsigned i=0; i<productions.size(); ++i)
00117         {
00118           const Production &p = productions[i];
00119           nonterminalToProductionMap.insert(make_pair(p.rule, i));
00120           for(int k=-1; k<grammar.getNumberOfTerminals(); ++k)
00121           {
00122             int s = nfaStateMap[LR1Item(i, 0, k)] =
00123               f ? nfa.addStartState() : nfa.addState();
00124             f = false;
00125             for(unsigned j=0; j<p.right.size(); ++j)
00126             {
00127               const int t = nfaStateMap[LR1Item(i, j+1, k)] = nfa.addState();
00128               nfa.addEdge(s, t, automatonSymbolShift + p.right[j]);
00129               s = t;
00130             }
00131           }
00132         }
00133 
00134         for(unsigned i=0; i<productions.size(); ++i)
00135         {
00136           const Production &p = productions[i];
00137           for(unsigned j=0; j<p.right.size(); ++j)
00138           {
00139             const int t = p.right[j];
00140             if(t > -1) continue;
00141             CFG::Item h(p.rule, p.production, j+1);
00142             set<int> f;
00143             const bool e = firstSets.includeFirstSet(h, f);
00144             /*
00145               cerr << "first(" << grammar.printItem(h) << ") = " << e << ", {";
00146               for(set<int>::iterator m=f.begin(); m!=f.end(); ++m)
00147               {
00148               if(m!=f.begin()) cerr << " ";
00149               cerr << grammar.printTerminal(*m);
00150               }
00151               cerr << "}\n";
00152             */
00153             for(int k=-1; k<grammar.getNumberOfTerminals(); ++k)
00154             {
00155               const int s = nfaStateMap[LR1Item(i, j, k)];
00156               pair<multimap<int, int>::iterator, multimap<int, int>::iterator> r =
00157                 nonterminalToProductionMap.equal_range(-1-t);
00158               for(multimap<int, int>::iterator l=r.first; l!=r.second; ++l)
00159               {
00160                 for(set<int>::iterator m=f.begin(); m!=f.end(); ++m)
00161                 {
00162                   nfa.addEdge(s, nfaStateMap[LR1Item(l->second, 0, *m)]);
00163                 }
00164                 if(e && (k == -1 || f.find(k) == f.end()))
00165                 {
00166                   nfa.addEdge(s, nfaStateMap[LR1Item(l->second, 0, k)]);
00167                 }
00168               }
00169             }
00170           }
00171         }
00172 
00173         items.resize(nfaStateMap.size());
00174         for(map<LR1Item, int>::iterator i=nfaStateMap.begin(); i!=nfaStateMap.end(); ++i)
00175           items[i->second] = i->first;
00176       }
00177 
00178       //AutomatonSymbolPrinter p(grammar);
00179 
00180       //cerr << "NFA for grammar (recognition of viable prefixes):\n" << nfa.print(p, 80) << "\n";
00181 
00182       // Construct the DFA
00183       vector<set<int> > itemSets; // One outer entry per new state
00184       DFA dfa(nfa, &itemSets);
00185 
00186       //cerr << "DFA for grammar (recognition of viable prefixes):\n" << dfa.print(p, 80) << "\n";
00187 
00188       //cerr << "Cannonical LR(1) collection of item sets:\n";
00189       //cerr << printItemSets(items, itemSets, 80) << "\n";
00190 
00191       numberOfStates = dfa.getNumberOfStates();
00192 
00193       actionTable.resize((grammar.getNumberOfTerminals()+1) * numberOfStates, -1); // Fill with error
00194       gotoTable.resize(grammar.getNumberOfRules() * numberOfStates, -1); // Fill with error
00195 
00196       // Construct the shift actions and the goto table from the DFA
00197       for(int i=0; i<numberOfStates; ++i)
00198       {
00199         const DFA::State &s = dfa.getState(i);
00200         for(int j=0; j<s.getNumberOfEdges(); ++j)
00201         {
00202           const DFA::Edge &e = s.getEdge(j);
00203           for(unsigned k=e.getBegin(); k<=e.getEnd(); ++k)
00204           {
00205             if(k<static_cast<unsigned>(automatonSymbolShift))
00206               gotoTable[i+numberOfStates*(automatonSymbolShift-k-1)] =
00207                 e.getTargetState();
00208             else actionTable[i+numberOfStates*(k-automatonSymbolShift+1)] =
00209                    e.getTargetState();
00210           }
00211         }
00212       }
00213 
00214       // Construct the reduce actions from the items with end position
00215       for(unsigned i=0; i<itemSets.size(); ++i)
00216       {
00217         const set<int> &s = itemSets[i];
00218         for(set<int>::const_iterator j=s.begin(); j!=s.end(); ++j)
00219         {
00220           const LR1Item &item = items[*j];
00221           const Production &p = productions[item.production];
00222           if(static_cast<unsigned>(item.position) != p.right.size()) continue;
00223           if(item.production == 0)
00224           {
00225             if(item.lookahead == -1) actionTable[i] = -2;
00226             continue;
00227           }
00228           int &v = actionTable[i+numberOfStates*(item.lookahead+1)];
00229           if(v < -2)
00230           {
00231             const Production &q = productions[-3-v];
00232             ARCHON_THROW1(ArgumentException, "reduce/reduce conflict at state " +
00233                           Text::toString(i) + " on input terminal " +
00234                           grammar.printTerminal(item.lookahead) +
00235                           ": " +
00236                           grammar.printProduction(q.rule,
00237                                                   q.production) +
00238                           " or " +
00239                           grammar.printProduction(p.rule,
00240                                                   p.production));
00241           }
00242           if(v > -1)
00243             ARCHON_THROW1(ArgumentException,
00244                           "shift/reduce conflict at state " +
00245                           Text::toString(i) + " on input terminal " +
00246                           grammar.printTerminal(item.lookahead) +
00247                           ": shift and goto state " +
00248                           Text::toString(v) + " or reduce by " +
00249                           grammar.printProduction(p.rule,
00250                                                   p.production));
00251 
00252           v = -3 - item.production;
00253         }
00254       }
00255     }
00256 
00257     ClrParser::~ClrParser()
00258     {
00259     }
00260   }
00261 }

Generated on Sun Jul 30 22:55:44 2006 for Archon by  doxygen 1.4.4