00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include <iostream>
00021 #include <algorithm>
00022
00023 #include <archon/util/cfg.H>
00024
00025 using namespace std;
00026
00027 namespace Archon
00028 {
00029 namespace Utilities
00030 {
00031 void CFG::updateRuleIndices(vector<int> oldToNewMap)
00032 {
00033 for(unsigned i=0; i<rules.size(); ++i)
00034 {
00035 Rule &r = rules[i];
00036 for(unsigned j=0; j<r.productions.size(); ++j)
00037 {
00038 Production &p = r.productions[j];
00039 for(unsigned k=0; k<p.symbols.size(); ++k)
00040 {
00041 Symbol &s = p.symbols[k];
00042 if(s.type == Symbol::nonterminal) s.index = oldToNewMap[s.index];
00043 }
00044 }
00045 }
00046 }
00047
00048 string CFG::chooseUniqueName(string stem, int enumerator)
00049 {
00050 if(enumerator < 0)
00051 {
00052 while(nonterminalMap.find(stem) != nonterminalMap.end()) stem += "'";
00053 return stem;
00054 }
00055
00056 for(;;)
00057 {
00058 string n = stem + Text::toString(enumerator);
00059 if(nonterminalMap.find(n) == nonterminalMap.end()) return n;
00060 ++enumerator;
00061 }
00062 }
00063
00064 void CFG::findNullableNonTerminals(vector<bool> &nullable,
00065 vector<vector<int> > &nullActions)
00066 {
00067 nullable.resize(rules.size());
00068 nullActions.resize(rules.size());
00069 bool again;
00070 do
00071 {
00072 again = false;
00073 for(unsigned i=0; i<rules.size(); ++i)
00074 {
00075 if(nullable[i]) continue;
00076 Rule &r = rules[i];
00077 for(unsigned j=0; j<r.productions.size(); ++j)
00078 {
00079 Production &p = r.productions[j];
00080 unsigned k=0;
00081 vector<int> a;
00082 while(k<p.symbols.size())
00083 {
00084 Symbol &s = p.symbols[k];
00085
00086 if(s.type == Symbol::terminal ||
00087 s.type == Symbol::nonterminal && !nullable[s.index]) break;
00088 if(s.type == Symbol::action) a.push_back(s.index);
00089 else
00090 {
00091 const vector<int> &v = nullActions[s.index];
00092 a.insert(a.end(), v.begin(), v.end());
00093 }
00094 ++k;
00095 }
00096 if(k == p.symbols.size())
00097 {
00098
00099 if(nullable[i] && nullActions[i] != a)
00100 ARCHON_THROW1(ArgumentException,
00101 "Ambiguous nullability for "
00102 "non-terminal '" + r.name + "'");
00103 nullable[i] = true;
00104 nullActions[i] = a;
00105 again = true;
00106 }
00107 }
00108 }
00109 }
00110 while(again);
00111 }
00112
00113 void CFG::addNullableCombinations(unsigned i, bool epsilon,
00114 const Production &production,
00115 const vector<bool> &nullable,
00116 const vector<vector<int> > &nullActions,
00117 vector<Symbol> &prefix,
00118 vector<Production> &newProductions)
00119 {
00120 while(i<production.symbols.size())
00121 {
00122 const Symbol &s = production.symbols[i];
00123 ++i;
00124 if(s.type == Symbol::nonterminal && nullable[s.index])
00125 {
00126 vector<Symbol> p = prefix;
00127 const vector<int> &v = nullActions[s.index];
00128 for(unsigned j=0; j<v.size(); ++j)
00129 p.push_back(act(v[j]));
00130 addNullableCombinations(i, epsilon, production, nullable,
00131 nullActions, p, newProductions);
00132 }
00133 prefix.push_back(s);
00134 if(s.type != Symbol::action) epsilon = false;
00135 }
00136 if(!epsilon) newProductions.push_back(Production(prefix));
00137 }
00138
00139 int CFG::eliminateCyclesVisit(int ruleIndex, vector<int> &visitedRules,
00140 list<pair<int, int> > &cycle)
00141 {
00142 if(visitedRules[ruleIndex] == 1) return ruleIndex;
00143 if(visitedRules[ruleIndex] == 2) return -1;
00144
00145 visitedRules[ruleIndex] = 1;
00146
00147 Rule &r = rules[ruleIndex];
00148
00149 for(unsigned j=0; j<r.productions.size(); ++j)
00150 {
00151 Production &p = r.productions[j];
00152
00153 int targetNonTerminal = -1;
00154
00155 for(unsigned k=0; k<p.symbols.size(); ++k)
00156 {
00157 Symbol &s = p.symbols[k];
00158 if(s.type == Symbol::nonterminal)
00159 {
00160 if(targetNonTerminal >= 0)
00161 {
00162 targetNonTerminal = -1;
00163 break;
00164 }
00165 targetNonTerminal = s.index;
00166 }
00167 else if(s.type == Symbol::terminal)
00168 {
00169 targetNonTerminal = -1;
00170 break;
00171 }
00172 }
00173
00174 if(targetNonTerminal < 0) continue;
00175
00176 int s = eliminateCyclesVisit(targetNonTerminal, visitedRules, cycle);
00177 if(s == -2) return -2;
00178 if(s == -1) continue;
00179 if(p.symbols.size() > 1)
00180 ARCHON_THROW1(ArgumentException,
00181 "Ambiguous count for cycle production '" + r.name +
00182 " -> " + printProductionRightSide(p) + "'");
00183 cycle.insert(cycle.begin(), make_pair(ruleIndex, j));
00184 return s == ruleIndex ? -2 : s;
00185 }
00186
00187 visitedRules[ruleIndex] = 2;
00188 return -1;
00189 }
00190
00191 string CFG::printProductionRightSide(const Production &p, int m) const
00192 {
00193 string r;
00194 for(unsigned i=0; i<p.symbols.size(); ++i)
00195 {
00196 if(static_cast<int>(i) == m) r += "·";
00197 else if(i) r += " ";
00198 const Symbol &s = p.symbols[i];
00199 switch(s.type)
00200 {
00201 case Symbol::terminal:
00202 r += printTerminal(s.index);
00203 break;
00204 case Symbol::nonterminal:
00205 r += printNonterminal(s.index);
00206 break;
00207 case Symbol::action:
00208 r += Text::toLowerCase(actor->getMethodName(s.index)) + "(";
00209 for(unsigned j=0; j<s.args.size(); ++j)
00210 {
00211 if(j) r += ", ";
00212 r += s.args[j]<0 ? "_" : Text::toString(static_cast<int>(i)-s.args[j]);
00213 }
00214 r += ")";
00215 break;
00216 case Symbol::nil:
00217 break;
00218 }
00219 }
00220 if(m == static_cast<int>(p.symbols.size())) r += "·";
00221 return r.size() ? r : "<epsilon>";
00222 }
00223
00224 CFG::~CFG()
00225 {{
00226 }}
00227
00228 int CFG::defineTerminal(string name)
00229 {
00230 pair<map<string, int>::iterator, bool> r =
00231 terminalMap.insert(make_pair(name, terminals.size()));
00232 if(!r.second) ARCHON_THROW1(ArgumentException,
00233 "Redefinition of terminal '" + name + "'");
00234 terminals.push_back(name);
00235 return terminals.size()-1;
00236 }
00237
00238 int CFG::defineNonterminal(string name)
00239 {
00240 pair<map<string, int>::iterator, bool> r =
00241 nonterminalMap.insert(make_pair(name, rules.size()));
00242 if(!r.second) ARCHON_THROW1(ArgumentException,
00243 "Redefinition of non-terminal '" + name + "'");
00244 rules.push_back(Rule(name));
00245 return rules.size()-1;
00246 }
00247
00248 void CFG::addProd(int nontermIndex, const vector<Symbol> &symbols)
00249 {
00250 for(unsigned i=0; i<symbols.size(); ++i)
00251 {
00252 const Symbol &s = symbols[i];
00253 switch(s.type)
00254 {
00255 case Symbol::terminal:
00256 if(s.index < 0 || s.index >= static_cast<int>(terminals.size()))
00257 ARCHON_THROW1(ArgumentException, "Illegal terminal index");
00258 break;
00259 case Symbol::nonterminal:
00260 if(s.index < 0 || s.index >= static_cast<int>(rules.size()))
00261 ARCHON_THROW1(ArgumentException, "Illegal nonterminal index");
00262 break;
00263 case Symbol::action:
00264 if(!actor)
00265 ARCHON_THROW1(ArgumentException, "Can't have actions without an actor");
00266 if(s.index < -3 || s.index >= actor->getNumberOfMethods())
00267 ARCHON_THROW1(ArgumentException, "Illegal method index");
00268 if(static_cast<int>(s.args.size()) != actor->getMethodArity(s.index))
00269 ARCHON_THROW1(ArgumentException, "Wrong number of arguments to '" +
00270 actor->getMethodName(s.index) + "'");
00271 break;
00272 case Symbol::nil:
00273 break;
00274 }
00275 }
00276 rules[nontermIndex].addProduction(symbols);
00277 }
00278
00279 void CFG::addProd(int nontermIndex, Symbol s1, Symbol s2, Symbol s3,
00280 Symbol s4, Symbol s5, Symbol s6, Symbol s7, Symbol s8)
00281 {
00282 vector<Symbol> symbols;
00283 if(s1.type) symbols.push_back(s1);
00284 if(s2.type) symbols.push_back(s2);
00285 if(s3.type) symbols.push_back(s3);
00286 if(s4.type) symbols.push_back(s4);
00287 if(s5.type) symbols.push_back(s5);
00288 if(s6.type) symbols.push_back(s6);
00289 if(s7.type) symbols.push_back(s7);
00290 if(s8.type) symbols.push_back(s8);
00291 addProd(nontermIndex, symbols);
00292 }
00293
00294 const CFG::Symbol &CFG::nil()
00295 {
00296 static Symbol s;
00297 return s;
00298 }
00299
00300 CFG::Symbol CFG::act(int methodIndex, int arg1, int arg2, int arg3,
00301 int arg4, int arg5, int arg6, int arg7)
00302 {
00303 vector<int> args;
00304 if(arg1>-2) args.push_back(arg1);
00305 if(arg2>-2) args.push_back(arg2);
00306 if(arg3>-2) args.push_back(arg3);
00307 if(arg4>-2) args.push_back(arg4);
00308 if(arg5>-2) args.push_back(arg5);
00309 if(arg6>-2) args.push_back(arg6);
00310 if(arg7>-2) args.push_back(arg7);
00311 return Symbol(methodIndex, args);
00312 }
00313
00314 template<typename T>
00315 void set_include(const set<T> &right, set<T> &left)
00316 {
00317 typename set<T>::iterator i=left.begin();
00318 typename set<T>::const_iterator j=right.begin();
00319 while(j!=right.end())
00320 {
00321 if(i == left.end() || *j < *i) left.insert(i, *j++);
00322 else if(*j == *i) ++j;
00323 else ++i;
00324 }
00325 }
00326
00327 CFG::FirstSets::FirstSets(const CFG &g): grammar(g)
00328 {
00329 terminals.resize(g.rules.size());
00330 nullable.resize(g.rules.size());
00331 bool again;
00332 do
00333 {
00334 again = false;
00335 for(unsigned i=0; i<g.rules.size(); ++i)
00336 {
00337 const Rule &r = g.rules[i];
00338 set<int> &t = terminals[i];
00339 for(unsigned j=0; j<r.productions.size(); ++j)
00340 {
00341 const unsigned n = t.size();
00342 Item item(i, j, 0);
00343 if(includeFirstSet(item, t) && !nullable[i]) again = nullable[i] = true;
00344 else if(t.size() > n) again = true;
00345 }
00346 }
00347 }
00348 while(again);
00349 }
00350
00351 bool CFG::FirstSets::includeFirstSet(const Item &item, set<int> &t) const
00352 {
00353 const Production &p =
00354 grammar.rules[item.rule].productions[item.production];
00355 unsigned i;
00356 for(i=item.position; i<p.symbols.size(); ++i)
00357 {
00358 const Symbol &s = p.symbols[i];
00359
00360 if(s.type == Symbol::terminal)
00361 {
00362 t.insert(s.index);
00363 break;
00364 }
00365
00366 if(s.type == Symbol::nonterminal)
00367 {
00368 set_include(terminals[s.index], t);
00369 if(!nullable[s.index]) break;
00370 }
00371 }
00372 return i == p.symbols.size();
00373 }
00374
00375 string CFG::printTerminal(int i) const
00376 {
00377 if(i < 0) return "<eoi>";
00378 return Text::toUpperCase(terminals[i]);
00379 }
00380
00381 string CFG::printNonterminal(int i) const
00382 {
00383 return Text::toLowerCase(rules[i].name);
00384 }
00385
00386 string CFG::printProduction(int i, int j) const
00387 {
00388 return printNonterminal(i) + " -> " +
00389 printProductionRightSide(rules[i].productions[j]);
00390 }
00391
00392 string CFG::printItem(const Item &i) const
00393 {
00394 return printNonterminal(i.rule) + " -> " +
00395 printProductionRightSide(rules[i.rule].productions[i.production],
00396 i.position);
00397 }
00398
00399 string CFG::FirstSets::print(int width) const
00400 {
00401 vector<double> columnWidthFractions;
00402 columnWidthFractions.push_back(0.2);
00403 columnWidthFractions.push_back(0.8);
00404 Text::Table<string> table(terminals.size()+1, columnWidthFractions);
00405 table(0, 0) = "Nonterminal";
00406 table(0, 1) = "First set";
00407 for(unsigned i=0; i<terminals.size(); ++i)
00408 {
00409 const set<int> &t = terminals[i];
00410 table(i+1, 0) = grammar.printNonterminal(i);
00411 for(set<int>::const_iterator j=t.begin(); j!=t.end(); ++j)
00412 {
00413 if(j != t.begin()) table(i+1, 1) += " ";
00414 table(i+1, 1) += grammar.printTerminal(*j);
00415 }
00416 if(nullable[i])
00417 {
00418 if(table(i+1, 1).size()) table(i+1, 1) += " ";
00419 table(i+1, 1) += "<epsilon>";
00420 }
00421 }
00422
00423 return Text::format(table, width, 2, true);
00424 }
00425
00426 CFG::FollowSets::FollowSets(const FirstSets &f): grammar(f.grammar)
00427 {
00428 terminals.resize(grammar.rules.size());
00429 terminals[0].insert(-1);
00430 bool again;
00431 do
00432 {
00433 again = false;
00434 for(unsigned i=0; i<grammar.rules.size(); ++i)
00435 {
00436 const Rule &r = grammar.rules[i];
00437 for(unsigned j=0; j<r.productions.size(); ++j)
00438 {
00439 const Production &p = r.productions[j];
00440 for(unsigned k=0; k<p.symbols.size(); ++k)
00441 {
00442 const Symbol &s = p.symbols[k];
00443 if(s.type != Symbol::nonterminal) continue;
00444 set<int> &t = terminals[s.index];
00445 const unsigned n = t.size();
00446 Item item(i, j, k+1);
00447 if(f.includeFirstSet(item, t)) set_include(terminals[i], t);
00448 if(t.size() > n) again = true;
00449 }
00450 }
00451 }
00452 }
00453 while(again);
00454 }
00455
00456 string CFG::FollowSets::print(int width) const
00457 {
00458 vector<double> columnWidthFractions;
00459 columnWidthFractions.push_back(0.2);
00460 columnWidthFractions.push_back(0.8);
00461 Text::Table<string> table(terminals.size()+1, columnWidthFractions);
00462 table(0, 0) = "Nonterminal";
00463 table(0, 1) = "Follow set";
00464 for(unsigned i=0; i<terminals.size(); ++i)
00465 {
00466 const set<int> &t = terminals[i];
00467 table(i+1, 0) = grammar.printNonterminal(i);
00468 for(set<int>::const_iterator j=t.begin(); j!=t.end(); ++j)
00469 {
00470 if(*j < 0) continue;
00471 if(table(i+1, 1).size()) table(i+1, 1) += " ";
00472 table(i+1, 1) += grammar.printTerminal(*j);
00473 }
00474 if(t.size() && *t.begin()<0)
00475 {
00476 if(t.size() > 1) table(i+1, 1) += " ";
00477 table(i+1, 1) += "<eoi>";
00478 }
00479 }
00480
00481 return Text::format(table, width, 2, true);
00482 }
00483
00484 string CFG::print(int width) const
00485 {
00486 vector<double> columnWidthFractions;
00487 columnWidthFractions.push_back(0.2);
00488 columnWidthFractions.push_back(0.02);
00489 columnWidthFractions.push_back(0.78);
00490 int numberOfProductions = 0;
00491 for(unsigned i=0; i<rules.size(); ++i)
00492 numberOfProductions += max(1, static_cast<int>(rules[i].productions.size()));
00493 if(rules.size()) numberOfProductions += rules.size()-1;
00494 Text::Table<string> table(numberOfProductions, columnWidthFractions);
00495 int l = 0;
00496 for(unsigned i=0; i<rules.size(); ++i)
00497 {
00498 if(i)
00499 {
00500 table(l, 0) = " ";
00501 ++l;
00502 }
00503 table(l, 0) = printNonterminal(i);
00504 table(l, 1) = "=";
00505 const Rule &r = rules[i];
00506 for(unsigned j=0; j<r.productions.size(); ++j)
00507 {
00508 if(j) table(l, 1) = "|";
00509 table(l, 2) = printProductionRightSide(r.productions[j]);
00510 ++l;
00511 }
00512 if(!r.productions.size()) ++l;
00513 }
00514
00515 return Text::format(table, width, 1, false);
00516 }
00517
00518 void CFG::introduceNewStartSymbol()
00519 {
00520 if(!rules.size()) ARCHON_THROW1(ArgumentException,
00521 "Original grammar must have "
00522 "at least one nonterminal");
00523 vector<int> m(rules.size());
00524 for(unsigned l=0; l<rules.size(); ++l) m[l] = l+1;
00525 updateRuleIndices(m);
00526 rules.insert(rules.begin(), Rule(chooseUniqueName(rules[0].name, -1)));
00527 addProd(0, nont(1));
00528 }
00529
00530 void CFG::eliminateEpsilonProductions()
00531 {
00532 vector<bool> nullable;
00533 vector<vector<int> > nullActions;
00534 findNullableNonTerminals(nullable, nullActions);
00535
00536
00537
00538 if(nullable[0])
00539 {
00540 for(unsigned i=0; i<rules.size(); ++i)
00541 {
00542 Rule &r = rules[i];
00543 for(unsigned j=0; j<r.productions.size(); ++j)
00544 {
00545 Production &p = r.productions[j];
00546 for(unsigned k=0; k<p.symbols.size(); ++k)
00547 {
00548 Symbol &s = p.symbols[k];
00549 if(s.type == Symbol::nonterminal && s.index == 0)
00550 {
00551
00552 nullable.insert(nullable.begin(), true);
00553 nullActions.insert(nullActions.begin(), nullActions[0]);
00554 introduceNewStartSymbol();
00555 i = rules.size();
00556 j = r.productions.size();
00557 break;
00558 }
00559 }
00560 }
00561 }
00562 }
00563
00564
00565 for(unsigned i=0; i<rules.size(); ++i)
00566 {
00567 Rule &r = rules[i];
00568 vector<Production> newProductions;
00569 for(unsigned j=0; j<r.productions.size(); ++j)
00570 {
00571 vector<Symbol> prefix;
00572 addNullableCombinations(0, true, r.productions[j], nullable,
00573 nullActions, prefix, newProductions);
00574 }
00575
00576 r.productions = newProductions;
00577 }
00578
00579
00580 if(nullable[0])
00581 {
00582 vector<Symbol> epsilon;
00583 const vector<int> &v = nullActions[0];
00584 for(unsigned j=0; j<v.size(); ++j)
00585 epsilon.push_back(act(v[j]));
00586 addProd(0, epsilon);
00587 }
00588 }
00589
00590 void CFG::eliminateCycles()
00591 {
00592 CFG g = *this;
00593 g.eliminateEpsilonProductions();
00594 int cycles = 0;
00595 for(;;)
00596 {
00597
00598 list<pair<int, int> > cycle;
00599 vector<int> visitedRules(g.rules.size());
00600 for(unsigned i=0; i<g.rules.size(); ++i)
00601 if(g.eliminateCyclesVisit(i, visitedRules, cycle) == -2) break;
00602 if(!cycle.size()) break;
00603 ++cycles;
00604
00605 cerr << "Found cycle: " << g.rules[cycle.front().first].name;
00606 for(list<pair<int, int> >::iterator i=cycle.begin(); i!=cycle.end(); ++i)
00607 cerr << " -> " << g.printProductionRightSide(g.rules[i->first].productions[i->second]);
00608 cerr << "\n";
00609
00610 set<pair<int, int> > cycleProductions;
00611 set<int> cycleNonTerminals;
00612 for(list<pair<int, int> >::iterator i=cycle.begin(); i!=cycle.end(); ++i)
00613 {
00614 cycleProductions.insert(*i);
00615 cycleNonTerminals.insert(i->first);
00616 }
00617
00618
00619 for(unsigned i=0; i<g.rules.size(); ++i)
00620 {
00621 Rule &r = g.rules[i];
00622 vector<Production> newProductions;
00623 for(unsigned j=0; j<r.productions.size(); ++j)
00624 {
00625 Production &p = r.productions[j];
00626
00627 const pair<int, int> q = make_pair(i, j);
00628 if(cycleProductions.find(q) != cycleProductions.end())
00629 {
00630 if(q != cycle.back()) newProductions.push_back(p);
00631 }
00632 else
00633 {
00634 Production n;
00635 for(unsigned k=0; k<p.symbols.size(); ++k)
00636 {
00637 Symbol &s = p.symbols[k];
00638 if(s.type == Symbol::nonterminal &&
00639 cycleNonTerminals.find(s.index) != cycleNonTerminals.end())
00640 n.symbols.push_back(nont(cycle.front().first));
00641 else n.symbols.push_back(s);
00642 }
00643 newProductions.push_back(n);
00644 }
00645 }
00646
00647 r.productions = newProductions;
00648 }
00649 }
00650
00651 if(cycles) *this = g;
00652 }
00653
00654 void CFG::eliminateMidRuleActions()
00655 {
00656 const unsigned n = rules.size();
00657 for(unsigned i=0; i<n; ++i)
00658 {
00659 Rule *r = &rules[i];
00660 for(unsigned j=0; j<r->productions.size(); ++j)
00661 {
00662 Production *p = &r->productions[j];
00663 for(unsigned k=0; k+1<p->symbols.size(); ++k)
00664 {
00665 Symbol *s = &p->symbols[k];
00666 if(s->type != Symbol::action) continue;
00667 const int a = defineNonterminal(chooseUniqueName("action", 1));
00668
00669
00670 r = &rules[i];
00671 p = &r->productions[j];
00672 s = &p->symbols[k];
00673 addProd(a, *s);
00674 s->type = Symbol::nonterminal;
00675 s->index = a;
00676 s->args.clear();
00677 }
00678 }
00679 }
00680 }
00681 }
00682 }