00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 
00010 
00011 
00012 
00013 
00014 
00015 
00016 
00017 
00018 
00019 
00020 #ifndef ARCHON_UTILITIES_DFA_H
00021 #define ARCHON_UTILITIES_DFA_H
00022 
00023 #include <string>
00024 #include <vector>
00025 #include <set>
00026 #include <iostream>
00027 
00028 #include <archon/util/exception.H>
00029 #include <archon/util/unicode.H>
00030 
00031 namespace Archon
00032 {
00033   namespace Utilities
00034   {
00035     using namespace std;
00036 
00037     struct NFA;
00038 
00045     class DFA
00046     {
00047       friend class NFA;
00048 
00049     public:
00050       struct State;
00051 
00052       class Edge
00053       {
00054         friend class DFA;
00055         friend class State;
00056 
00057         unsigned begin;
00058         unsigned end; 
00059         int targetState;
00060         Edge(unsigned begin, unsigned end, int targetState):
00061           begin(begin), end(end), targetState(targetState) {}
00062 
00063         struct Cmp
00064         {
00065           bool operator()(const Edge &e1, unsigned e2) const
00066           {
00067             return e1.begin < e2;
00068           }
00069         };
00070 
00071       public:
00072         unsigned getBegin() const { return begin; }
00073         unsigned getEnd() const { return end; }
00074         int getTargetState() const { return targetState; }
00075       };
00076 
00077       class State
00078       {
00079         friend class DFA;
00080 
00081         int finalValue; 
00087         vector<Edge> edges;
00088         State(int finalValue): finalValue(finalValue) {}
00089 
00090       public:
00091         int getFinalValue() const { return finalValue; }
00092         int getNumberOfEdges() const { return edges.size(); }
00093         const Edge &getEdge(int i) const { return edges[i]; }
00094 
00100         int step(unsigned symbol) const
00101         {
00102           
00103           vector<Edge>::const_iterator i =
00104             lower_bound(edges.begin(), edges.end(), symbol, Edge::Cmp());
00105           if(i != edges.end() && i->begin == symbol) return i->targetState;
00106           if(i != edges.begin() && (i-1)->end >= symbol)
00107             return (i-1)->targetState;
00108           return -1;
00109         }
00110       };
00111 
00112     private:
00113       
00114       
00115       vector<State> states;
00116       vector<int> startStates; 
00117 
00118       bool testEquivalence(int s1, int s2, const vector<int> &partitionMap) const;
00119       void minimize(DFA *) const;
00120       void makeNFA(NFA *) const;
00121 
00134       int addStartState(int finalValue = -1);
00135 
00148       int addState(int finalValue = -1);
00149 
00161       void addEdgeRange(unsigned begin, unsigned end,
00162                         int originalState, int targetState);
00163 
00164     public:
00165       struct AnchorInfo
00166       {
00167         unsigned start;
00168         unsigned numberOf;
00169         AnchorInfo(unsigned start, unsigned numberOf):
00170           start(start), numberOf(numberOf) {}
00171       };
00172 
00191       DFA(const NFA &, vector<set<int> > *stateSets=0, AnchorInfo *anchorInfo=0);
00192 
00193       struct Minimize
00194       {
00195         const DFA &a;
00196         Minimize(const DFA &a): a(a) {}
00197       };
00198 
00212       DFA(Minimize);
00213 
00214       int getNumberOfStates() const { return states.size(); }
00215       const State &getState(int i) const { return states[i]; }
00216 
00217       struct Printer
00218       {
00219         virtual string printSymbol(unsigned) const = 0;
00220         virtual ~Printer() {}
00221       };
00222 
00223       static const Printer &getDefaultPrinter();
00224 
00225       string print(const Printer &printer = getDefaultPrinter(), int width = 0) const;
00226     };
00227   }
00228 }
00229 
00230 #endif // ARCHON_UTILITIES_DFA_H