cfg.C

00001 /*
00002  * This file is part of the "Archon" framework.
00003  * (http://files3d.sourceforge.net)
00004  *
00005  * Copyright © 2002 by Kristian Spangsege and Brian Kristiansen.
00006  *
00007  * Permission to use, copy, modify, and distribute this software and
00008  * its documentation under the terms of the GNU General Public License is
00009  * hereby granted. No representations are made about the suitability of
00010  * this software for any purpose. It is provided "as is" without express
00011  * or implied warranty. See the GNU General Public License
00012  * (http://www.gnu.org/copyleft/gpl.html) for more details.
00013  *
00014  * The characters in this file are ISO8859-1 encoded.
00015  *
00016  * The documentation in this file is in "Doxygen" style
00017  * (http://www.doxygen.org).
00018  */
00019 
00020 #include <iostream>
00021 #include <algorithm>
00022 
00023 #include <archon/util/cfg.H>
00024 
00025 using namespace std;
00026 
00027 namespace Archon
00028 {
00029   namespace Utilities
00030   {
00031     void CFG::updateRuleIndices(vector<int> oldToNewMap)
00032     {
00033       for(unsigned i=0; i<rules.size(); ++i)
00034       {
00035         Rule &r = rules[i];
00036         for(unsigned j=0; j<r.productions.size(); ++j)
00037         {
00038           Production &p = r.productions[j];
00039           for(unsigned k=0; k<p.symbols.size(); ++k)
00040           {
00041             Symbol &s = p.symbols[k];
00042             if(s.type == Symbol::nonterminal) s.index = oldToNewMap[s.index];
00043           }
00044         }
00045       }
00046     }
00047 
00048     string CFG::chooseUniqueName(string stem, int enumerator)
00049     {
00050       if(enumerator < 0)
00051       {
00052         while(nonterminalMap.find(stem) != nonterminalMap.end()) stem += "'";
00053         return stem;
00054       }
00055 
00056       for(;;)
00057       {
00058         string n = stem + Text::toString(enumerator);
00059         if(nonterminalMap.find(n) == nonterminalMap.end()) return n;
00060         ++enumerator;
00061       }
00062     }
00063 
00064     void CFG::findNullableNonTerminals(vector<bool> &nullable,
00065                                        vector<vector<int> > &nullActions)
00066     {
00067       nullable.resize(rules.size());
00068       nullActions.resize(rules.size());
00069       bool again;
00070       do
00071       {
00072         again = false;
00073         for(unsigned i=0; i<rules.size(); ++i)
00074         {
00075           if(nullable[i]) continue;
00076           Rule &r = rules[i];
00077           for(unsigned j=0; j<r.productions.size(); ++j)
00078           {
00079             Production &p = r.productions[j];
00080             unsigned k=0;
00081             vector<int> a;
00082             while(k<p.symbols.size())
00083             {
00084               Symbol &s = p.symbols[k];
00085               // Break if the current symbol is not nullable
00086               if(s.type == Symbol::terminal ||
00087                  s.type == Symbol::nonterminal && !nullable[s.index]) break;
00088               if(s.type == Symbol::action) a.push_back(s.index);
00089               else
00090               {
00091                 const vector<int> &v = nullActions[s.index];
00092                 a.insert(a.end(), v.begin(), v.end());
00093               }
00094               ++k;
00095             }
00096             if(k == p.symbols.size())
00097             {
00098               // Detect ambiguous nullability
00099               if(nullable[i] && nullActions[i] != a)
00100                 ARCHON_THROW1(ArgumentException,
00101                               "Ambiguous nullability for "
00102                               "non-terminal '" + r.name + "'");
00103               nullable[i] = true;
00104               nullActions[i] = a;
00105               again = true;
00106             }
00107           }
00108         }
00109       }
00110       while(again);
00111     }
00112 
00113     void CFG::addNullableCombinations(unsigned i, bool epsilon,
00114                                       const Production &production,
00115                                       const vector<bool> &nullable,
00116                                       const vector<vector<int> > &nullActions,
00117                                       vector<Symbol> &prefix,
00118                                       vector<Production> &newProductions)
00119     {
00120       while(i<production.symbols.size())
00121       {
00122         const Symbol &s = production.symbols[i];
00123         ++i;
00124         if(s.type == Symbol::nonterminal && nullable[s.index])
00125         {
00126           vector<Symbol> p = prefix;
00127           const vector<int> &v = nullActions[s.index];
00128           for(unsigned j=0; j<v.size(); ++j)
00129             p.push_back(act(v[j]));
00130           addNullableCombinations(i, epsilon, production, nullable,
00131                                   nullActions, p, newProductions);
00132         }
00133         prefix.push_back(s);
00134         if(s.type != Symbol::action) epsilon = false;
00135       }
00136       if(!epsilon) newProductions.push_back(Production(prefix));
00137     }
00138 
00139     int CFG::eliminateCyclesVisit(int ruleIndex, vector<int> &visitedRules,
00140                                   list<pair<int, int> > &cycle)
00141     {
00142       if(visitedRules[ruleIndex] == 1) return ruleIndex;
00143       if(visitedRules[ruleIndex] == 2) return -1;
00144 
00145       visitedRules[ruleIndex] = 1;
00146 
00147       Rule &r = rules[ruleIndex];
00148 
00149       for(unsigned j=0; j<r.productions.size(); ++j)
00150       {
00151         Production &p = r.productions[j];
00152 
00153         int targetNonTerminal = -1;
00154 
00155         for(unsigned k=0; k<p.symbols.size(); ++k)
00156         {
00157           Symbol &s = p.symbols[k];
00158           if(s.type == Symbol::nonterminal)
00159           {
00160             if(targetNonTerminal >= 0)
00161             {
00162               targetNonTerminal = -1;
00163               break;
00164             }
00165             targetNonTerminal = s.index;
00166           }
00167           else if(s.type == Symbol::terminal)
00168           {
00169             targetNonTerminal = -1;
00170             break;
00171           }
00172         }
00173 
00174         if(targetNonTerminal < 0) continue;
00175 
00176         int s = eliminateCyclesVisit(targetNonTerminal, visitedRules, cycle);
00177         if(s == -2) return -2;
00178         if(s == -1) continue;
00179         if(p.symbols.size() > 1)
00180           ARCHON_THROW1(ArgumentException,
00181                         "Ambiguous count for cycle production '" + r.name +
00182                         " -> " + printProductionRightSide(p) + "'");
00183         cycle.insert(cycle.begin(), make_pair(ruleIndex, j));
00184         return s == ruleIndex ? -2 : s;
00185       }
00186 
00187       visitedRules[ruleIndex] = 2;
00188       return -1;
00189     }
00190 
00191     string CFG::printProductionRightSide(const Production &p, int m) const
00192     {
00193       string r;
00194       for(unsigned i=0; i<p.symbols.size(); ++i)
00195       {
00196         if(static_cast<int>(i) == m) r += "·";
00197         else if(i) r += " ";
00198         const Symbol &s = p.symbols[i];
00199         switch(s.type)
00200         {
00201         case Symbol::terminal:
00202           r += printTerminal(s.index);
00203           break;
00204         case Symbol::nonterminal:
00205           r += printNonterminal(s.index);
00206           break;
00207         case Symbol::action:
00208           r += Text::toLowerCase(actor->getMethodName(s.index)) + "(";
00209           for(unsigned j=0; j<s.args.size(); ++j)
00210           {
00211             if(j) r += ", ";
00212             r += s.args[j]<0 ? "_" : Text::toString(static_cast<int>(i)-s.args[j]);
00213           }
00214           r += ")";
00215           break;
00216         case Symbol::nil:
00217           break;
00218         }
00219       }
00220       if(m == static_cast<int>(p.symbols.size())) r += "·";
00221       return r.size() ? r : "<epsilon>";
00222     }
00223 
00224     CFG::~CFG()
00225     {{ // The extra scope is needed to work around gcc3.2 bug #8287
00226     }}
00227 
00228     int CFG::defineTerminal(string name)
00229     {
00230       pair<map<string, int>::iterator, bool> r =
00231         terminalMap.insert(make_pair(name, terminals.size()));
00232       if(!r.second) ARCHON_THROW1(ArgumentException,
00233                                  "Redefinition of terminal '" + name + "'");
00234       terminals.push_back(name);
00235       return terminals.size()-1;
00236     }
00237 
00238     int CFG::defineNonterminal(string name)
00239     {
00240       pair<map<string, int>::iterator, bool> r =
00241         nonterminalMap.insert(make_pair(name, rules.size()));
00242       if(!r.second) ARCHON_THROW1(ArgumentException,
00243                                  "Redefinition of non-terminal '" + name + "'");
00244       rules.push_back(Rule(name));
00245       return rules.size()-1;
00246     }
00247 
00248     void CFG::addProd(int nontermIndex, const vector<Symbol> &symbols)
00249     {
00250       for(unsigned i=0; i<symbols.size(); ++i)
00251       {
00252         const Symbol &s = symbols[i];
00253         switch(s.type)
00254         {
00255         case Symbol::terminal:
00256           if(s.index < 0 || s.index >= static_cast<int>(terminals.size()))
00257             ARCHON_THROW1(ArgumentException, "Illegal terminal index");
00258           break;
00259         case Symbol::nonterminal:
00260           if(s.index < 0 || s.index >= static_cast<int>(rules.size()))
00261             ARCHON_THROW1(ArgumentException, "Illegal nonterminal index");
00262           break;
00263         case Symbol::action:
00264           if(!actor)
00265             ARCHON_THROW1(ArgumentException, "Can't have actions without an actor");
00266           if(s.index < -3 || s.index >= actor->getNumberOfMethods())
00267             ARCHON_THROW1(ArgumentException, "Illegal method index");
00268           if(static_cast<int>(s.args.size()) != actor->getMethodArity(s.index))
00269             ARCHON_THROW1(ArgumentException, "Wrong number of arguments to '" +
00270                          actor->getMethodName(s.index) + "'");
00271           break;
00272         case Symbol::nil:
00273           break;
00274         }
00275       }
00276       rules[nontermIndex].addProduction(symbols);
00277     }
00278 
00279     void CFG::addProd(int nontermIndex, Symbol s1, Symbol s2, Symbol s3,
00280                       Symbol s4, Symbol s5, Symbol s6, Symbol s7, Symbol s8)
00281     {
00282       vector<Symbol> symbols;
00283       if(s1.type) symbols.push_back(s1);
00284       if(s2.type) symbols.push_back(s2);
00285       if(s3.type) symbols.push_back(s3);
00286       if(s4.type) symbols.push_back(s4);
00287       if(s5.type) symbols.push_back(s5);
00288       if(s6.type) symbols.push_back(s6);
00289       if(s7.type) symbols.push_back(s7);
00290       if(s8.type) symbols.push_back(s8);
00291       addProd(nontermIndex, symbols);
00292     }
00293 
00294     const CFG::Symbol &CFG::nil()
00295     {
00296       static Symbol s;
00297       return s;
00298     }
00299 
00300     CFG::Symbol CFG::act(int methodIndex, int arg1, int arg2, int arg3,
00301                          int arg4, int arg5, int arg6, int arg7)
00302     {
00303       vector<int> args;
00304       if(arg1>-2) args.push_back(arg1);
00305       if(arg2>-2) args.push_back(arg2);
00306       if(arg3>-2) args.push_back(arg3);
00307       if(arg4>-2) args.push_back(arg4);
00308       if(arg5>-2) args.push_back(arg5);
00309       if(arg6>-2) args.push_back(arg6);
00310       if(arg7>-2) args.push_back(arg7);
00311       return Symbol(methodIndex, args);
00312     }
00313 
00314     template<typename T>
00315     void set_include(const set<T> &right, set<T> &left)
00316     {
00317       typename set<T>::iterator i=left.begin();
00318       typename set<T>::const_iterator j=right.begin();
00319       while(j!=right.end())
00320       {
00321         if(i == left.end() || *j < *i) left.insert(i, *j++);
00322         else if(*j == *i) ++j;
00323         else ++i;
00324       }
00325     }
00326 
00327     CFG::FirstSets::FirstSets(const CFG &g): grammar(g)
00328     {
00329       terminals.resize(g.rules.size());
00330       nullable.resize(g.rules.size());
00331       bool again;
00332       do
00333       {
00334         again = false;
00335         for(unsigned i=0; i<g.rules.size(); ++i)
00336         {
00337           const Rule &r = g.rules[i];
00338           set<int> &t = terminals[i];
00339           for(unsigned j=0; j<r.productions.size(); ++j)
00340           {
00341             const unsigned n = t.size();
00342             Item item(i, j, 0);
00343             if(includeFirstSet(item, t) && !nullable[i]) again = nullable[i] = true;
00344             else if(t.size() > n) again = true;
00345           }
00346         }
00347       }
00348       while(again);
00349     }
00350 
00351     bool CFG::FirstSets::includeFirstSet(const Item &item, set<int> &t) const
00352     {
00353       const Production &p =
00354         grammar.rules[item.rule].productions[item.production];
00355       unsigned i;
00356       for(i=item.position; i<p.symbols.size(); ++i)
00357       {
00358         const Symbol &s = p.symbols[i];
00359       
00360         if(s.type == Symbol::terminal)
00361         {
00362           t.insert(s.index);
00363           break;
00364         }
00365 
00366         if(s.type == Symbol::nonterminal)
00367         {
00368           set_include(terminals[s.index], t);
00369           if(!nullable[s.index]) break;
00370         }
00371       }
00372       return i == p.symbols.size();
00373     }
00374 
00375     string CFG::printTerminal(int i) const
00376     {
00377       if(i < 0) return "<eoi>";
00378       return Text::toUpperCase(terminals[i]);
00379     }
00380 
00381     string CFG::printNonterminal(int i) const
00382     {
00383       return Text::toLowerCase(rules[i].name);
00384     }
00385 
00386     string CFG::printProduction(int i, int j) const
00387     {
00388       return printNonterminal(i) + " -> " +
00389         printProductionRightSide(rules[i].productions[j]);
00390     }
00391 
00392     string CFG::printItem(const Item &i) const
00393     {
00394       return printNonterminal(i.rule) + " -> " +
00395         printProductionRightSide(rules[i.rule].productions[i.production],
00396                                  i.position);
00397     }
00398 
00399     string CFG::FirstSets::print(int width) const
00400     {
00401       vector<double> columnWidthFractions;
00402       columnWidthFractions.push_back(0.2);
00403       columnWidthFractions.push_back(0.8);
00404       Text::Table<string> table(terminals.size()+1, columnWidthFractions);
00405       table(0, 0) = "Nonterminal";
00406       table(0, 1) = "First set";
00407       for(unsigned i=0; i<terminals.size(); ++i)
00408       {
00409         const set<int> &t = terminals[i];
00410         table(i+1, 0) = grammar.printNonterminal(i);
00411         for(set<int>::const_iterator j=t.begin(); j!=t.end(); ++j)
00412         {
00413           if(j != t.begin()) table(i+1, 1) += " ";
00414           table(i+1, 1) += grammar.printTerminal(*j);
00415         }
00416         if(nullable[i])
00417         {
00418           if(table(i+1, 1).size()) table(i+1, 1) += " ";
00419           table(i+1, 1) += "<epsilon>";
00420         }
00421       }
00422 
00423       return Text::format(table, width, 2, true);      
00424     }
00425 
00426     CFG::FollowSets::FollowSets(const FirstSets &f): grammar(f.grammar)
00427     {
00428       terminals.resize(grammar.rules.size());
00429       terminals[0].insert(-1);
00430       bool again;
00431       do
00432       {
00433         again = false;
00434         for(unsigned i=0; i<grammar.rules.size(); ++i)
00435         {
00436           const Rule &r = grammar.rules[i];
00437           for(unsigned j=0; j<r.productions.size(); ++j)
00438           {
00439             const Production &p = r.productions[j];
00440             for(unsigned k=0; k<p.symbols.size(); ++k)
00441             {
00442               const Symbol &s = p.symbols[k];
00443               if(s.type != Symbol::nonterminal) continue;
00444               set<int> &t = terminals[s.index];
00445               const unsigned n = t.size();
00446               Item item(i, j, k+1);
00447               if(f.includeFirstSet(item, t)) set_include(terminals[i], t);
00448               if(t.size() > n) again = true;
00449             }
00450           }
00451         }
00452       }
00453       while(again);
00454     }
00455 
00456     string CFG::FollowSets::print(int width) const
00457     {
00458       vector<double> columnWidthFractions;
00459       columnWidthFractions.push_back(0.2);
00460       columnWidthFractions.push_back(0.8);
00461       Text::Table<string> table(terminals.size()+1, columnWidthFractions);
00462       table(0, 0) = "Nonterminal";
00463       table(0, 1) = "Follow set";
00464       for(unsigned i=0; i<terminals.size(); ++i)
00465       {
00466         const set<int> &t = terminals[i];
00467         table(i+1, 0) = grammar.printNonterminal(i);
00468         for(set<int>::const_iterator j=t.begin(); j!=t.end(); ++j)
00469         {
00470           if(*j < 0) continue;
00471           if(table(i+1, 1).size()) table(i+1, 1) += " ";
00472           table(i+1, 1) += grammar.printTerminal(*j);
00473         }
00474         if(t.size() && *t.begin()<0)
00475         {
00476           if(t.size() > 1) table(i+1, 1) += " ";
00477           table(i+1, 1) += "<eoi>";
00478         }
00479       }
00480 
00481       return Text::format(table, width, 2, true);      
00482     }
00483 
00484     string CFG::print(int width) const
00485     {
00486       vector<double> columnWidthFractions;
00487       columnWidthFractions.push_back(0.2);
00488       columnWidthFractions.push_back(0.02);
00489       columnWidthFractions.push_back(0.78);
00490       int numberOfProductions = 0;
00491       for(unsigned i=0; i<rules.size(); ++i)
00492         numberOfProductions += max(1, static_cast<int>(rules[i].productions.size()));
00493       if(rules.size()) numberOfProductions += rules.size()-1;
00494       Text::Table<string> table(numberOfProductions, columnWidthFractions);
00495       int l = 0;
00496       for(unsigned i=0; i<rules.size(); ++i)
00497       {
00498         if(i)
00499         {
00500           table(l, 0) = " ";
00501           ++l;
00502         }
00503         table(l, 0) = printNonterminal(i);
00504         table(l, 1) = "=";
00505         const Rule &r = rules[i];
00506         for(unsigned j=0; j<r.productions.size(); ++j)
00507         {
00508           if(j) table(l, 1) = "|";
00509           table(l, 2) = printProductionRightSide(r.productions[j]);
00510           ++l;
00511         }
00512         if(!r.productions.size()) ++l;
00513       }
00514 
00515       return Text::format(table, width, 1, false);      
00516     }
00517 
00518     void CFG::introduceNewStartSymbol()
00519     {
00520       if(!rules.size()) ARCHON_THROW1(ArgumentException,
00521                                      "Original grammar must have "
00522                                      "at least one nonterminal");
00523       vector<int> m(rules.size());
00524       for(unsigned l=0; l<rules.size(); ++l) m[l] = l+1;
00525       updateRuleIndices(m);
00526       rules.insert(rules.begin(), Rule(chooseUniqueName(rules[0].name, -1)));
00527       addProd(0, nont(1));
00528     }
00529 
00530     void CFG::eliminateEpsilonProductions()
00531     {
00532       vector<bool> nullable;
00533       vector<vector<int> > nullActions;
00534       findNullableNonTerminals(nullable, nullActions);
00535 
00536       // If the start symbol is nullable it may not occur on the right
00537       // hand side of any production
00538       if(nullable[0])
00539       {
00540         for(unsigned i=0; i<rules.size(); ++i)
00541         {
00542           Rule &r = rules[i];
00543           for(unsigned j=0; j<r.productions.size(); ++j)
00544           {
00545             Production &p = r.productions[j];
00546             for(unsigned k=0; k<p.symbols.size(); ++k)
00547             {
00548               Symbol &s = p.symbols[k];
00549               if(s.type == Symbol::nonterminal && s.index == 0)
00550               {
00551                 // Make a brand new start symbol
00552                 nullable.insert(nullable.begin(), true);
00553                 nullActions.insert(nullActions.begin(), nullActions[0]);
00554                 introduceNewStartSymbol();
00555                 i = rules.size();
00556                 j = r.productions.size();
00557                 break;
00558               }
00559             }
00560           }
00561         }
00562       }
00563 
00564       // Rewrite each rule
00565       for(unsigned i=0; i<rules.size(); ++i)
00566       {
00567         Rule &r = rules[i];
00568         vector<Production> newProductions;
00569         for(unsigned j=0; j<r.productions.size(); ++j)
00570         {
00571           vector<Symbol> prefix;
00572           addNullableCombinations(0, true, r.productions[j], nullable,
00573                                   nullActions, prefix, newProductions);
00574         }
00575 
00576         r.productions = newProductions;
00577       }
00578 
00579       // Is the start symbol nullable
00580       if(nullable[0])
00581       {
00582         vector<Symbol> epsilon;
00583         const vector<int> &v = nullActions[0];
00584         for(unsigned j=0; j<v.size(); ++j)
00585           epsilon.push_back(act(v[j]));
00586         addProd(0, epsilon);
00587       }
00588     }
00589 
00590     void CFG::eliminateCycles()
00591     {
00592       CFG g = *this;
00593       g.eliminateEpsilonProductions();
00594       int cycles = 0;
00595       for(;;)
00596       {
00597         // Find a cycle
00598         list<pair<int, int> > cycle;
00599         vector<int> visitedRules(g.rules.size()); // 0=unvisited, 1=in current path, 2=visited but no longer in current path
00600         for(unsigned i=0; i<g.rules.size(); ++i)
00601           if(g.eliminateCyclesVisit(i, visitedRules, cycle) == -2) break;
00602         if(!cycle.size()) break;
00603         ++cycles;
00604 
00605         cerr << "Found cycle: " << g.rules[cycle.front().first].name;
00606         for(list<pair<int, int> >::iterator i=cycle.begin(); i!=cycle.end(); ++i)
00607           cerr << " -> " << g.printProductionRightSide(g.rules[i->first].productions[i->second]);
00608         cerr << "\n";
00609 
00610         set<pair<int, int> > cycleProductions;
00611         set<int> cycleNonTerminals;
00612         for(list<pair<int, int> >::iterator i=cycle.begin(); i!=cycle.end(); ++i)
00613         {
00614           cycleProductions.insert(*i);
00615           cycleNonTerminals.insert(i->first);
00616         }
00617 
00618         // Rewrite each rule to eliminate the cycle
00619         for(unsigned i=0; i<g.rules.size(); ++i)
00620         {
00621           Rule &r = g.rules[i];
00622           vector<Production> newProductions;
00623           for(unsigned j=0; j<r.productions.size(); ++j)
00624           {
00625             Production &p = r.productions[j];
00626 
00627             const pair<int, int> q = make_pair(i, j);
00628             if(cycleProductions.find(q) != cycleProductions.end())
00629             {
00630               if(q != cycle.back()) newProductions.push_back(p);
00631             }
00632             else
00633             {
00634               Production n;
00635               for(unsigned k=0; k<p.symbols.size(); ++k)
00636               {
00637                 Symbol &s = p.symbols[k];
00638                 if(s.type == Symbol::nonterminal &&
00639                    cycleNonTerminals.find(s.index) != cycleNonTerminals.end())
00640                   n.symbols.push_back(nont(cycle.front().first));
00641                 else n.symbols.push_back(s);
00642               }
00643               newProductions.push_back(n);
00644             }
00645           }
00646 
00647           r.productions = newProductions;
00648         }
00649       }
00650 
00651       if(cycles) *this = g;
00652     }
00653 
00654     void CFG::eliminateMidRuleActions()
00655     {
00656       const unsigned n = rules.size();
00657       for(unsigned i=0; i<n; ++i)
00658       {
00659         Rule *r = &rules[i];
00660         for(unsigned j=0; j<r->productions.size(); ++j)
00661         {
00662           Production *p = &r->productions[j];
00663           for(unsigned k=0; k+1<p->symbols.size(); ++k)
00664           {
00665             Symbol *s = &p->symbols[k];
00666             if(s->type != Symbol::action) continue;
00667             const int a = defineNonterminal(chooseUniqueName("action", 1));
00668             // rules vector may be relocated so we need to aquire a new
00669             // pointers.
00670             r = &rules[i];
00671             p = &r->productions[j];
00672             s = &p->symbols[k];
00673             addProd(a, *s);
00674             s->type = Symbol::nonterminal;
00675             s->index = a;
00676             s->args.clear();
00677           }
00678         }
00679       }
00680     }
00681   }
00682 }

Generated on Sun Jul 30 22:55:44 2006 for Archon by  doxygen 1.4.4