00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #ifndef ARCHON_UTILITIES_DFA_H
00021 #define ARCHON_UTILITIES_DFA_H
00022
00023 #include <string>
00024 #include <vector>
00025 #include <set>
00026 #include <iostream>
00027
00028 #include <archon/util/exception.H>
00029 #include <archon/util/unicode.H>
00030
00031 namespace Archon
00032 {
00033 namespace Utilities
00034 {
00035 using namespace std;
00036
00037 struct NFA;
00038
00045 class DFA
00046 {
00047 friend class NFA;
00048
00049 public:
00050 struct State;
00051
00052 class Edge
00053 {
00054 friend class DFA;
00055 friend class State;
00056
00057 unsigned begin;
00058 unsigned end;
00059 int targetState;
00060 Edge(unsigned begin, unsigned end, int targetState):
00061 begin(begin), end(end), targetState(targetState) {}
00062
00063 struct Cmp
00064 {
00065 bool operator()(const Edge &e1, unsigned e2) const
00066 {
00067 return e1.begin < e2;
00068 }
00069 };
00070
00071 public:
00072 unsigned getBegin() const { return begin; }
00073 unsigned getEnd() const { return end; }
00074 int getTargetState() const { return targetState; }
00075 };
00076
00077 class State
00078 {
00079 friend class DFA;
00080
00081 int finalValue;
00087 vector<Edge> edges;
00088 State(int finalValue): finalValue(finalValue) {}
00089
00090 public:
00091 int getFinalValue() const { return finalValue; }
00092 int getNumberOfEdges() const { return edges.size(); }
00093 const Edge &getEdge(int i) const { return edges[i]; }
00094
00100 int step(unsigned symbol) const
00101 {
00102
00103 vector<Edge>::const_iterator i =
00104 lower_bound(edges.begin(), edges.end(), symbol, Edge::Cmp());
00105 if(i != edges.end() && i->begin == symbol) return i->targetState;
00106 if(i != edges.begin() && (i-1)->end >= symbol)
00107 return (i-1)->targetState;
00108 return -1;
00109 }
00110 };
00111
00112 private:
00113
00114
00115 vector<State> states;
00116 vector<int> startStates;
00117
00118 bool testEquivalence(int s1, int s2, const vector<int> &partitionMap) const;
00119 void minimize(DFA *) const;
00120 void makeNFA(NFA *) const;
00121
00134 int addStartState(int finalValue = -1);
00135
00148 int addState(int finalValue = -1);
00149
00161 void addEdgeRange(unsigned begin, unsigned end,
00162 int originalState, int targetState);
00163
00164 public:
00165 struct AnchorInfo
00166 {
00167 unsigned start;
00168 unsigned numberOf;
00169 AnchorInfo(unsigned start, unsigned numberOf):
00170 start(start), numberOf(numberOf) {}
00171 };
00172
00191 DFA(const NFA &, vector<set<int> > *stateSets=0, AnchorInfo *anchorInfo=0);
00192
00193 struct Minimize
00194 {
00195 const DFA &a;
00196 Minimize(const DFA &a): a(a) {}
00197 };
00198
00212 DFA(Minimize);
00213
00214 int getNumberOfStates() const { return states.size(); }
00215 const State &getState(int i) const { return states[i]; }
00216
00217 struct Printer
00218 {
00219 virtual string printSymbol(unsigned) const = 0;
00220 virtual ~Printer() {}
00221 };
00222
00223 static const Printer &getDefaultPrinter();
00224
00225 string print(const Printer &printer = getDefaultPrinter(), int width = 0) const;
00226 };
00227 }
00228 }
00229
00230 #endif // ARCHON_UTILITIES_DFA_H