00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020 #include <stack>
00021 #include <map>
00022 #include <list>
00023 #include <algorithm>
00024
00025 #include <archon/util/text.H>
00026 #include <archon/util/nfa.H>
00027
00028 #include <archon/util/dfa.H>
00029
00030 using namespace std;
00031
00032 namespace Archon
00033 {
00034 namespace Utilities
00035 {
00036 bool DFA::testEquivalence(int s1, int s2, const vector<int> &partitionMap) const
00037 {
00038 const State &state1 = states[s1];
00039 const State &state2 = states[s2];
00040
00041 vector<Edge>::const_iterator e1 = state1.edges.begin();
00042 vector<Edge>::const_iterator e2 = state2.edges.begin();
00043
00044 while(e1 != state1.edges.end() || e2 != state2.edges.end())
00045 {
00046 if(e2 == state2.edges.end())
00047 {
00048 if(partitionMap[e1->targetState]) return false;
00049 ++e1;
00050 continue;
00051 }
00052
00053 if(e1 == state1.edges.end())
00054 {
00055 if(partitionMap[e2->targetState]) return false;
00056 ++e2;
00057 continue;
00058 }
00059
00060 if(e1->begin < e2->begin)
00061 {
00062 if(partitionMap[e1->targetState]) return false;
00063
00064 if(e1->end < e2->begin)
00065 {
00066 ++e1;
00067 continue;
00068 }
00069 }
00070 else if(e1->begin > e2->begin)
00071 {
00072 if(partitionMap[e2->targetState]) return false;
00073
00074 if(e1->begin > e2->end)
00075 {
00076 ++e2;
00077 continue;
00078 }
00079 }
00080
00081 for(;;)
00082 {
00083 if(partitionMap[e1->targetState] !=
00084 partitionMap[e2->targetState]) return false;
00085
00086 if(e1->end < e2->end)
00087 {
00088 const unsigned u = e1->end;
00089 ++e1;
00090 if(e1 == state1.edges.end() || u < e1->begin-1) break;
00091 }
00092 else if(e1->end > e2->end)
00093 {
00094 const unsigned u = e2->end;
00095 ++e2;
00096 if(e2 == state2.edges.end() || u < e2->begin-1) break;
00097 }
00098 else
00099 {
00100 ++e1;
00101 ++e2;
00102 break;
00103 }
00104 }
00105 }
00106
00107 return true;
00108 }
00109
00110 void DFA::minimize(DFA *targetDFA) const
00111 {
00115 vector<int> partitionMap(states.size());
00116
00117
00118 {
00119 map<int, int> m;
00120 m.insert(make_pair(-1, 0)).first->second;
00121 for(unsigned i=0; i<states.size(); ++i)
00122 {
00123 const int r = states[i].finalValue;
00124 partitionMap[i] = m.insert(make_pair(r < 0 ? -1 : r, m.size())).first->second;
00125 }
00126 }
00127
00128 bool again;
00129
00130 do
00131 {
00132 again = false;
00133
00134
00135 map<int, list<int> > partitions;
00136 for(unsigned i=0; i<partitionMap.size(); ++i)
00137 partitions[partitionMap[i]].push_back(i);
00138
00139
00140
00141 int newPartitionIndex = 1;
00142
00143
00144 for(map<int, list<int> >::iterator i=partitions.begin();
00145 i!=partitions.end(); ++i)
00146 {
00147 list<int> &p = i->second;
00148
00149
00150 if(i->first == 0)
00151 {
00152 list<int>::iterator j1=p.begin();
00153 while(j1 != p.end())
00154 {
00155 list<int>::iterator j2 = j1;
00156 ++j1;
00157
00158 const vector<Edge> &e = states[*j2].edges;
00159 vector<Edge>::const_iterator i=e.begin();
00160 while(i!=e.end())
00161 {
00162 if(partitionMap[i->targetState]) break;
00163 ++i;
00164 }
00165 if(i!=e.end())
00166 {
00167 again = true;
00168 continue;
00169 }
00170
00171 partitionMap[*j2] = 0;
00172
00173 p.erase(j2);
00174 }
00175 }
00176
00177 while(p.size())
00178 {
00179 int s = p.front();
00180 p.pop_front();
00181
00182 list<int>::iterator j1=p.begin();
00183 while(j1 != p.end())
00184 {
00185 list<int>::iterator j2 = j1;
00186 ++j1;
00187
00188 if(!testEquivalence(s, *j2, partitionMap))
00189 {
00190 again = true;
00191 continue;
00192 }
00193
00194 partitionMap[*j2] = newPartitionIndex;
00195
00196 p.erase(j2);
00197 }
00198
00199 partitionMap[s] = newPartitionIndex;
00200 ++newPartitionIndex;
00201 }
00202 }
00203 }
00204 while(again);
00205
00206
00207
00208 {
00209 map<int, int> newStateMap;
00210 stack<int> uncheckedStates;
00211
00212
00213 for(unsigned i=0; i<startStates.size(); ++i)
00214 {
00215 const int s = startStates[i];
00216 pair<map<int, int>::iterator, bool> r =
00217 newStateMap.insert(make_pair(partitionMap[s], i));
00218 if(r.second)
00219 {
00220 targetDFA->startStates.push_back(targetDFA->states.size());
00221 targetDFA->states.push_back(State(states[s].finalValue));
00222 uncheckedStates.push(s);
00223 }
00224 }
00225
00226 while(uncheckedStates.size())
00227 {
00228 const int s = uncheckedStates.top();
00229 uncheckedStates.pop();
00230 const int f = newStateMap[partitionMap[s]];
00231 const vector<Edge> &v = states[s].edges;
00232 for(vector<Edge>::const_iterator e=v.begin(); e<v.end(); ++e)
00233 {
00234 const int p = partitionMap[e->targetState];
00235 if(!p) continue;
00236 pair<map<int, int>::iterator, bool> r =
00237 newStateMap.insert(make_pair(p, targetDFA->states.size()));
00238 if(r.second)
00239 {
00240 targetDFA->states.push_back(State(states[e->targetState].finalValue));
00241 uncheckedStates.push(e->targetState);
00242 }
00243 vector<Edge> &w = targetDFA->states[f].edges;
00244 if(w.size() && w.back().end == e->begin-1 &&
00245 w.back().targetState == r.first->second) w.back().end = e->end;
00246 else w.push_back(Edge(e->begin, e->end, r.first->second));
00247 }
00248 }
00249 }
00250 }
00251
00252 void DFA::makeNFA(NFA *a) const
00253 {
00254 a->states.resize(states.size());
00255 for(unsigned i=0; i<states.size(); ++i)
00256 {
00257 const State &s = states[i];
00258 NFA::State &t = a->states[i];
00259 t.finalValue = s.finalValue;
00260 t.symbolEdges.reserve(s.edges.size());
00261 for(unsigned j=0; j<s.edges.size(); ++j)
00262 {
00263 const Edge &e = s.edges[j];
00264 t.symbolEdges.push_back(NFA::Edge(e.begin, e.end, e.targetState));
00265 }
00266 }
00267 a->startStates = startStates;
00268 }
00269
00270 int DFA::addStartState(int finalValue)
00271 {
00272 startStates.push_back(states.size());
00273 return addState(finalValue);
00274 }
00275
00276 int DFA::addState(int finalValue)
00277 {
00278 states.push_back(State(finalValue));
00279 return states.size()-1;
00280 }
00281
00282 void DFA::addEdgeRange(unsigned begin, unsigned end,
00283 int originalState, int targetState)
00284 {
00285 states[originalState].edges.push_back(Edge(begin, end, targetState));
00286 }
00287
00288 DFA::DFA(const NFA &nfa, vector<set<int> > *stateSets, AnchorInfo *anchorInfo)
00289 {
00290 if(!nfa.states.size())
00291 ARCHON_THROW1(ArgumentException, "Cannot create a DFA from a null NFA");
00292 nfa.constructDFA(this, stateSets, anchorInfo);
00293 }
00294
00295 DFA::DFA(Minimize m)
00296 {
00297 m.a.minimize(this);
00298 }
00299
00300 struct DefaultPrinter: DFA::Printer
00301 {
00302 string printSymbol(unsigned s) const
00303 {
00304 return Text::toString(s);
00305 }
00306 };
00307
00308 const DFA::Printer &DFA::getDefaultPrinter()
00309 {
00310 static DefaultPrinter defaultPrinter;
00311 return defaultPrinter;
00312 }
00313
00314 string DFA::print(const Printer &printer, int width) const
00315 {
00316 vector<double> columnWidthFractions;
00317 columnWidthFractions.push_back(0.1);
00318 columnWidthFractions.push_back(0.1);
00319 columnWidthFractions.push_back(0.8);
00320 Text::Table<string> table(states.size()+1, columnWidthFractions);
00321 table(0, 0) = "State";
00322 table(0, 1) = "Final";
00323 table(0, 2) = "DFA-transitions";
00324 for(unsigned i=0; i<states.size(); ++i)
00325 {
00326 const State &s = states[i];
00327 table(i+1, 0) = Text::toString(i);
00328 if(find(startStates.begin(), startStates.end(), static_cast<int>(i))!=startStates.end())
00329 table(i+1, 0) += "*";
00330 if(s.finalValue >= 0) table(i+1, 1) = Text::toString(s.finalValue);
00331 for(unsigned j=0; j<s.edges.size(); ++j)
00332 {
00333 if(j) table(i+1, 2) += ", ";
00334 const Edge &e = s.edges[j];
00335 table(i+1, 2) += "'" + printer.printSymbol(e.begin) + "'";
00336 if(e.begin < e.end)
00337 table(i+1, 2) += "-'" + printer.printSymbol(e.end) + "'";
00338 table(i+1, 2) += " -> " + Text::toString(e.targetState);
00339 }
00340 }
00341
00342 return Text::format(table, width, 2, true);
00343 }
00344 }
00345 }