00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef ARCHON_UTILITIES_CFG_H
00021 #define ARCHON_UTILITIES_CFG_H
00022
00023 #include <string>
00024 #include <vector>
00025 #include <map>
00026 #include <set>
00027 #include <list>
00028
00029 #include <archon/util/exception.H>
00030 #include <archon/util/ref.H>
00031 #include <archon/util/text.H>
00032
00033 namespace Archon
00034 {
00035 namespace Utilities
00036 {
00037 using namespace std;
00038
00049 class CFG
00050 {
00051 public:
00052 struct Rule;
00053 struct FirstSets;
00054 struct FollowSets;
00055
00056 struct Item
00057 {
00058 int rule;
00059 int production;
00060 int position;
00061 Item(int rule, int production, int position):
00062 rule(rule), production(production), position(position) {}
00063 };
00064
00065 struct Symbol
00066 {
00067 friend class CFG;
00068 friend class FirstSets;
00069 friend class FollowSets;
00070
00071 public:
00072 enum Type
00073 {
00074 nil = 0,
00075 terminal,
00076 nonterminal,
00077 action
00078 };
00079
00080 private:
00081 Type type;
00082 int index;
00083 vector<int> args;
00084
00085 Symbol(): type(nil) {}
00086 Symbol(Type type, int index): type(type), index(index) {}
00087 Symbol(int index, vector<int> args):
00088 type(action), index(index), args(args) {}
00089
00090 public:
00091 Type getType() const { return type; }
00092 int getIndex() const { return index; }
00093 const vector<int> &getArgs() const { return args; }
00094 };
00095
00096 class Production
00097 {
00098 friend class CFG;
00099 friend class Rule;
00100 friend class FirstSets;
00101 friend class FollowSets;
00102
00103 vector<Symbol> symbols;
00104
00105 Production() {}
00106 Production(const vector<Symbol> &symbols): symbols(symbols) {}
00107
00108 public:
00109 int getNumberOfSymbols() const { return symbols.size(); }
00110 const Symbol &getSymbol(int i) const { return symbols[i]; }
00111 };
00112
00113 class Rule
00114 {
00115 friend class CFG;
00116 friend class FirstSets;
00117 friend class FollowSets;
00118
00119 string name;
00120 vector<Production> productions;
00121
00122 Rule(string name): name(name) {}
00123
00124 void addProduction(const vector<Symbol> &symbols)
00125 {
00126 productions.push_back(Production(symbols));
00127 }
00128
00129 public:
00130 int getNumberOfProductions() const { return productions.size(); }
00131 const Production &getProduction(int i) const { return productions[i]; }
00132 };
00133
00134 struct Actor
00135 {
00136 virtual ~Actor() {}
00137
00141 virtual int getNumberOfMethods() const = 0;
00142
00147 virtual int getMethodArity(int methodIndex) const = 0;
00148
00153 virtual string getMethodName(int methodIndex) const = 0;
00154 };
00155
00156 private:
00157 vector<string> terminals;
00158 vector<Rule> rules;
00159
00160 map<string, int> terminalMap;
00161 map<string, int> nonterminalMap;
00162
00163 const Actor *actor;
00164
00165 void updateRuleIndices(vector<int>);
00166 string chooseUniqueName(string, int);
00167 void findNullableNonTerminals(vector<bool> &, vector<vector<int> > &);
00168 void addNullableCombinations(unsigned, bool,
00169 const Production &,
00170 const vector<bool> &,
00171 const vector<vector<int> > &,
00172 vector<Symbol> &,
00173 vector<Production> &);
00174 int eliminateCyclesVisit(int, vector<int> &, list<pair<int, int> > &);
00175 string printProductionRightSide(const Production &, int mark=-1) const;
00176
00177 public:
00178 CFG(const Actor *a=0): actor(a) {}
00179 ~CFG();
00180
00181 int getNumberOfTerminals() const { return terminals.size(); }
00182 int getNumberOfRules() const { return rules.size(); }
00183 const Rule &getRule(int i) const { return rules[i]; }
00184
00185 int defineTerminal(string name);
00186 int defineNonterminal(string name);
00187
00188 static const Symbol &nil();
00189
00190 void addProd(int nontermIndex, const vector<Symbol> &);
00191 void addProd(int nontermIndex, Symbol=nil(), Symbol=nil(),
00192 Symbol=nil(), Symbol=nil(), Symbol=nil(),
00193 Symbol=nil(), Symbol=nil(), Symbol=nil());
00194
00198 static Symbol term(int terminalIndex)
00199 {
00200 return Symbol(Symbol::terminal, terminalIndex);
00201 }
00202
00206 static Symbol nont(int nonterminalIndex)
00207 {
00208 return Symbol(Symbol::nonterminal, nonterminalIndex);
00209 }
00210
00219 static Symbol act(int methodIndex,
00220 int arg1 = -2, int arg2 = -2, int arg3 = -2,
00221 int arg4 = -2, int arg5 = -2, int arg6 = -2,
00222 int arg7 = -2);
00223
00227 static Symbol null() { return act(-1); }
00228
00232 static Symbol copy(int arg) { return act(-2, arg); }
00233
00237 static Symbol concat(int arg1, int arg2) { return act(-3, arg1, arg2); }
00238
00239 class FirstSets
00240 {
00241 friend struct FollowSets;
00242
00243 const CFG &grammar;
00244 vector<set<int> > terminals;
00245 vector<bool> nullable;
00246
00247 public:
00248 FirstSets(const CFG &);
00249
00255 bool includeFirstSet(const Item &, set<int> &) const;
00256
00257 string print(int width = 0) const;
00258 };
00259
00260 class FollowSets
00261 {
00262 const CFG &grammar;
00263 vector<set<int> > terminals;
00264
00265 public:
00266 FollowSets(const FirstSets &);
00267 const set<int> &get(int i) const { return terminals[i]; }
00268 string print(int width = 0) const;
00269 };
00270
00275 string printTerminal(int index) const;
00276
00277 string printNonterminal(int index) const;
00278 string printProduction(int rule, int production) const;
00279 string printItem(const Item &) const;
00280 string print(int width = 0) const;
00281
00282 void introduceNewStartSymbol();
00283
00323 void eliminateEpsilonProductions();
00324
00371 void eliminateCycles();
00372
00378
00379
00391 void eliminateMidRuleActions();
00392 };
00393 }
00394 }
00395
00396 #endif // ARCHON_UTILITIES_CFG_H