Archon: archon/util/lr_parser_simple.C Source File

00001 /*
00002  * This file is part of the "Archon" framework.
00003  * (http://files3d.sourceforge.net)
00004  *
00005  * Copyright © 2002 by Kristian Spangsege and Brian Kristiansen.
00006  *
00007  * Permission to use, copy, modify, and distribute this software and
00008  * its documentation under the terms of the GNU General Public License is
00009  * hereby granted. No representations are made about the suitability of
00010  * this software for any purpose. It is provided "as is" without express
00011  * or implied warranty. See the GNU General Public License
00012  * (http://www.gnu.org/copyleft/gpl.html) for more details.
00013  *
00014  * The characters in this file are ISO8859-1 encoded.
00015  *
00016  * The documentation in this file is in "Doxygen" style
00017  * (http://www.doxygen.org).
00018  */
00019 
00020 #include <string>
00021 #include <iostream>
00022 
00023 #include <archon/util/nfa.H>
00024 #include <archon/util/dfa.H>
00025 #include <archon/util/text.H>
00026 
00027 #include <archon/util/lr_parser_simple.H>
00028 
00029 using namespace std;
00030 
00031 namespace Archon
00032 {
00033   namespace Utilities
00034   {
00035     int SlrParser::getAction(int state, int terminal) const
00036     {
00037       return actionTable[state+numberOfStates*(terminal+1)];
00038     }
00039 
00040     int SlrParser::getGoto(int state, int nonterminal) const
00041     {
00042       return gotoTable[state+numberOfStates*nonterminal];
00043     }
00044 
00045     string SlrParser::printItemSets(const vector<pair<int, int> > &items,
00046                                     const vector<set<int> > &itemSets, int width)
00047     {
00048       vector<double> columnWidthFractions;
00049       columnWidthFractions.push_back(0.2);
00050       columnWidthFractions.push_back(0.9);
00051       Text::Table<string> table(itemSets.size()+1, columnWidthFractions);
00052       table(0, 0) = "State";
00053       table(0, 1) = "LR(0) item set";
00054       for(unsigned i=0; i<itemSets.size(); ++i)
00055       {
00056         const set<int> &s = itemSets[i];
00057         table(i+1, 0) = Text::toString(i);
00058         for(set<int>::const_iterator j=s.begin(); j!=s.end(); ++j)
00059         {
00060           if(j!=s.begin()) table(i+1, 1) += ", ";
00061           const pair<int, int> &item = items[*j];
00062           const Production &p = productions[item.first];
00063           table(i+1, 1) += grammar.printItem(CFG::Item(p.rule, p.production, item.second));
00064         }
00065       }
00066 
00067       return Text::format(table, width, 2, true);
00068     }
00069 
00070     struct AutomatonSymbolPrinter: NFA::Printer, DFA::Printer
00071     {
00072       const CFG &g;
00073       AutomatonSymbolPrinter(const CFG &g): g(g) {}
00074       string printSymbol(unsigned i) const
00075       {
00076         const int s = static_cast<int>(i) - g.getNumberOfRules();
00077         return s<0 ? g.printNonterminal(-s-1) : g.printTerminal(s);
00078       }
00079     };
00080 
00081     SlrParser::SlrParser(CFG &g, const ActorBase *actor, const Printer *printer):
00082       LrParserBase((g.introduceNewStartSymbol(), g), actor, printer)
00083     {
00084       //cerr << "Augmented Grammar without mid-rule actions:\n" << grammar.print(80) << "\n";
00085 
00086       const int automatonSymbolShift = grammar.getNumberOfRules();
00087 
00088       // Construct the NFA
00089       NFA nfa;
00090       vector<pair<int, int> > items;
00091       {
00092         map<pair<int, int>, int> nfaStateMap;
00093         multimap<int, int> nonterminalToProductionMap;
00094 
00095         for(unsigned i=0; i<productions.size(); ++i)
00096         {
00097           const Production &p = productions[i];
00098           nonterminalToProductionMap.insert(make_pair(p.rule, i));
00099           int s = nfaStateMap[make_pair(i, 0)] =
00100             i ? nfa.addState() : nfa.addStartState();
00101           for(unsigned j=0; j<p.right.size(); ++j)
00102           {
00103             const int t = nfaStateMap[make_pair(i, j+1)] = nfa.addState();
00104             nfa.addEdge(s, t, automatonSymbolShift + p.right[j]);
00105             s = t;
00106           }
00107         }
00108 
00109         for(unsigned i=0; i<productions.size(); ++i)
00110         {
00111           const Production &p = productions[i];
00112           for(unsigned j=0; j<p.right.size(); ++j)
00113           {
00114             const int t = p.right[j];
00115             if(t > -1) continue;
00116             const int s = nfaStateMap[make_pair(i, j)];
00117             pair<multimap<int, int>::iterator, multimap<int, int>::iterator> r =
00118               nonterminalToProductionMap.equal_range(-1-t);
00119             for(multimap<int, int>::iterator k=r.first; k!=r.second; ++k)
00120               nfa.addEdge(s, nfaStateMap[make_pair(k->second, 0)]);
00121           }
00122         }
00123 
00124         items.resize(nfaStateMap.size());
00125         for(map<pair<int, int>, int>::iterator i=nfaStateMap.begin(); i!=nfaStateMap.end(); ++i)
00126           items[i->second] = i->first;
00127       }
00128 
00129       //AutomatonSymbolPrinter p(grammar);
00130 
00131       //cerr << "NFA for grammar (recognition of viable prefixes):\n" << nfa.print(p, 80) << "\n";
00132 
00133       // Construct the DFA
00134       vector<set<int> > itemSets; // One outer entry per new state
00135       DFA dfa(nfa, &itemSets);
00136 
00137       //cerr << "DFA for grammar (recognition of viable prefixes):\n" << dfa.print(p, 80) << "\n";
00138 
00139       //cerr << "Cannonical LR(0) collection of item sets:\n";
00140       //cerr << printItemSets(items, itemSets, 80) << "\n";
00141 
00142       numberOfStates = dfa.getNumberOfStates();
00143 
00144       actionTable.resize((grammar.getNumberOfTerminals()+1) * numberOfStates, -1); // Fill with error
00145       gotoTable.resize(grammar.getNumberOfRules() * numberOfStates, -1); // Fill with error
00146 
00147       // Construct the shift actions and the goto table from the DFA
00148       for(int i=0; i<numberOfStates; ++i)
00149       {
00150         const DFA::State &s = dfa.getState(i);
00151         for(int j=0; j<s.getNumberOfEdges(); ++j)
00152         {
00153           const DFA::Edge &e = s.getEdge(j);
00154           for(unsigned k=e.getBegin(); k<=e.getEnd(); ++k)
00155           {
00156             if(k<static_cast<unsigned>(automatonSymbolShift))
00157               gotoTable[i+numberOfStates*(automatonSymbolShift-k-1)] =
00158                 e.getTargetState();
00159             else actionTable[i+numberOfStates*(k-automatonSymbolShift+1)] =
00160                    e.getTargetState();
00161           }
00162         }
00163       }
00164 
00165       CFG::FirstSets firstSets(grammar);
00166       //cerr << firstSets.print() << "\n";
00167       CFG::FollowSets followSets(firstSets);
00168       //cerr << followSets.print() << "\n";
00169 
00170       /*
00171        * In the item sets find all items with end position, then for
00172        * each terminal in the follow set of the left hand side
00173        * nonterminal of the item, set 'action[i, a] = reduce by p'
00174        * where 'i' is the state associated with the current item set
00175        * and 'a' is the terminal and 'p' is the production index from
00176        * the current item item.
00177        */
00178       for(unsigned i=0; i<itemSets.size(); ++i)
00179       {
00180         const set<int> &s = itemSets[i];
00181         for(set<int>::const_iterator j=s.begin(); j!=s.end(); ++j)
00182         {
00183           const pair<int, int> &item = items[*j];
00184           const Production &p = productions[item.first];
00185           if(static_cast<unsigned>(item.second) != p.right.size()) continue;
00186           if(item.first == 0)
00187           {
00188             actionTable[i] = -2;
00189             continue;
00190           }
00191           const set<int> &f = followSets.get(p.rule);
00192           for(set<int>::const_iterator k=f.begin(); k!=f.end(); ++k)
00193           {
00194             int &v = actionTable[i+numberOfStates*(*k+1)];
00195             if(v < -2)
00196             {
00197               const Production &q = productions[-3-v];
00198               ARCHON_THROW1(ArgumentException,
00199                             "reduce/reduce conflict at state " +
00200                             Text::toString(i) + " on input terminal " +
00201                             grammar.printTerminal(*k) + ": " +
00202                             grammar.printProduction(q.rule,
00203                                                     q.production) +
00204                             " or " +
00205                             grammar.printProduction(p.rule,
00206                                                     p.production));
00207             }
00208             if(v > -1)
00209               ARCHON_THROW1(ArgumentException,
00210                             "shift/reduce conflict at state " +
00211                             Text::toString(i) + " on input terminal " +
00212                             grammar.printTerminal(*k) + ": shift " +
00213                             "and goto state " + Text::toString(v) +
00214                             " or reduce by " +
00215                             grammar.printProduction(p.rule,
00216                                                     p.production));
00217             v = -3 - item.first;
00218           }
00219         }
00220       }
00221     }
00222 
00223     SlrParser::~SlrParser()
00224     {
00225     }
00226   }
00227 }