【编译原理】【词法分析】【正则表达式】【NFA】【DFA】【C++】正则表达式转DFA&NFA,判断字符串是否符合正则表达式的匹配算法


正则表达式转DFA&NFA

显然,正则表达式、NFA、DFA的概念都很简单,所以直接上代码,注释应该解释地比较清楚,没有万能头文件的自行替换需求库,如果有疑问的可以留言。

网盘链接
[自行补全]/s/1pbGT_wpB662TwFrnukXgGQ?pwd=TSIT
提取码:TSIT

运行截图

图片[1] - 【编译原理】【词法分析】【正则表达式】【NFA】【DFA】【C++】正则表达式转DFA&NFA,判断字符串是否符合正则表达式的匹配算法 - MaxSSL
图片[2] - 【编译原理】【词法分析】【正则表达式】【NFA】【DFA】【C++】正则表达式转DFA&NFA,判断字符串是否符合正则表达式的匹配算法 - MaxSSL
图片[3] - 【编译原理】【词法分析】【正则表达式】【NFA】【DFA】【C++】正则表达式转DFA&NFA,判断字符串是否符合正则表达式的匹配算法 - MaxSSL

原理可以参考这篇博客传送门

本次程序由四个文件组成

文件名描述
Regular_Expression.cpp处理正则表达式
NFA.cpp正则表达式转NFA
DFA.cppNFA转DFA
MainProcess.cpp主要处理程序
//Regular_Expression.cpp#ifndef __REGULAR_EXPRESSION__#define __REGULAR_EXPRESSION__#includeusing namespace std;const int OPERANDS = 1; // 操作符const int OPERATOR = 2; // 运算符const int ILLEGAL_CHAR = -1;// 非法字符// 正则表达式包含非法字符class Regular_Expression_Input_Exception{ public:char c;Regular_Expression_Input_Exception(char c){this->c = c;}string what()const{ string ret;ret = c + " is not in the charset" ;return ret;}};// 正则表达式格式错误class Regular_Expression_Format_Exceprion{public:Regular_Expression_Format_Exceprion(){}string what()const{string ret;ret = "Format Error ";return ret;}};class Regular_Expression{ public:Regular_Expression(string & s){expression = s;expression = Regular_Expression_Pretreatment(expression);expression = Regular_Expression_Infix2PostFix(expression);}string get_postfix(){return expression;}private:string expression;inline string Regular_Expression_Pretreatment(string&);inline string Regular_Expression_Infix2PostFix(string&);};// 返回字符类型int get_chartype(char c){if( ('a' <= c && c <= 'z')||('A' <= c && c <= 'Z')||('0' <= c && c <= '9')||(c == '.')) return OPERANDS;if( (c == '|')||(c == '*')||(c == '(')||(c == ')')|| (c == '&')) return OPERATOR;return ILLEGAL_CHAR;}// 设置操作符优先级int get_priorioty(char c){switch (c){case '*': return 3; break;case '&': return 2; break;case '|': return 1; break;case '(': return 0; break;default:return ILLEGAL_CHAR;break;}}//检查并预处理正则表达式 添加&作为连接符string Regular_Expression::Regular_Expression_Pretreatment(string & pre_expression){assert(pre_expression.size() > 0);string treated_expression;try{ char prechar ;int pretype ;char nchar = pre_expression[0];int ntype = get_chartype(nchar);if(ntype == ILLEGAL_CHAR) throw Regular_Expression_Input_Exception(nchar);treated_expression.push_back(nchar);prechar = nchar;pretype = ntype;int len = pre_expression.length();for(int i = 1 ; i<len ; ++i){nchar = pre_expression[i];ntype = get_chartype(nchar);if(ntype == ILLEGAL_CHAR) throw Regular_Expression_Input_Exception(nchar);// 当第一位是操作数 , * , )且第二位为操作数或(if( (pretype == OPERANDS || prechar == '*' || prechar == ')') && (ntype == OPERANDS || nchar == '(')) // 使用 & 作为连接符treated_expression.push_back('&');treated_expression.push_back(nchar);pretype = ntype;prechar = nchar;}return treated_expression;}catch(const Regular_Expression_Input_Exception& e){std::cerr << e.what() << "\n";exit(0);}catch(const exception& e){std::cerr << e.what() << "\n";exit(0);}return treated_expression;}// 正则表达式的中缀表达式转成后缀表达式string Regular_Expression::Regular_Expression_Infix2PostFix(string & pre_expression){assert(pre_expression.size() > 0);string treated_expression;try{vector<char> op;int len = pre_expression.length();int ntype ;char nchar;for(int i=0 ; i<len ; ++i){nchar = pre_expression[i];ntype = get_chartype(nchar);if(ntype == ILLEGAL_CHAR) throw Regular_Expression_Input_Exception(nchar);// 遇到 ( 直接压入栈中;if(nchar == '('){op.push_back(nchar);}// 遇到 ) 将运算符出栈并输出,直到遇到 ( ,将 ( 出栈但不输出else if(nchar == ')'){while(!op.empty() && op.back() != '('){treated_expression.push_back(op.back());op.pop_back();}// 没有左括号匹配if(op.empty()) throw Regular_Expression_Format_Exceprion();// 弹出左括号op.pop_back();}/*遇到 * 、 & 、 | 运算符:如果栈为空,直接将运算符压入栈中;如果栈不为空,弹出栈中优先级大于等于当前运算符的运算符并输出,再将当前运算符入栈。*/else if(ntype == OPERATOR){ int npriority = get_priorioty(nchar);while(!op.empty() && get_priorioty(op.back()) >= npriority){treated_expression.push_back(op.back());op.pop_back();}op.push_back(nchar);}else if(ntype == OPERANDS){treated_expression.push_back(nchar);}}while(!op.empty()){nchar = op.back();// 缺少右括号匹配if(nchar == '(') throw Regular_Expression_Format_Exceprion();treated_expression.push_back(nchar);op.pop_back();}return treated_expression;}catch(const Regular_Expression_Input_Exception& e){std::cerr << e.what() << "\n";exit(0);}catch(const Regular_Expression_Format_Exceprion& e){std::cerr << e.what() << "\n";exit(0);}catch(const std::exception& e){std::cerr << e.what() << '\n';exit(0);}return treated_expression; }// #ifndef __MAINPROCESS__// int main()// {// #ifdef __LOCAL// freopen("usrin.txt", "r", stdin);// freopen("usrout.txt", "w", stdout);// #endif// string str;// cin >> str;// Regular_Expression reg(str);// cout << reg.get_postfix() << "\n";// }// #endif#endif
//NFA.cpp#ifndef __NFA__#define __NFA__#include "Regular_Expression.cpp"#includeusing namespace std;struct Nfastate{ // '#' 视为空转移Nfastate(char c = '#' , Nfastate* trans = NULL , vector<Nfastate*> vec = vector<Nfastate*>()){input = c;chTrans = trans;epTrans.clear();for(auto x:vec) epTrans.insert(x);}void add_New_epTrans(Nfastate& n){epTrans.insert(&n);}void add_New_epTrans(Nfastate* n){epTrans.insert(n);}void add_new_epTrans(vector<Nfastate*> &vec){for(auto x:vec) epTrans.insert(x);}void print();set<Nfastate*> closure();char input; // 转移字符Nfastate* chTrans;// 转移到的状态set<Nfastate*> epTrans; //空转移状态集合};set<Nfastate*> Nfastate::closure(){set<Nfastate*> s;s.insert(this);bool hasnewnode = 1;while(hasnewnode){hasnewnode = 0;for(auto x:epTrans){if(s.find(x) == s.end()){hasnewnode = 1;s.insert(x);}}}return s;}void Nfastate::print(){cerr << "------Nfastate---------\n";cerr << "input = " << input << "\n";cerr << "chTrans = " << chTrans << "\n";cerr << "epTrans = {" ;for(auto x:epTrans) cerr << x << " ";cerr << "}\n---------------------------------\n";}struct subNFA{subNFA(Nfastate* phead = NULL , Nfastate* ptail = NULL){head = phead;tail = ptail;}subNFA(Nfastate& ohead , Nfastate & otail){head = &ohead;tail = &otail;}subNFA(char c){Nfastate *ntail = new Nfastate;Nfastate *nhead = new Nfastate(c , ntail);// fix1if(c == '#') nhead->chTrans = NULL;head = nhead;tail = ntail;}void operator=(const subNFA& ele){head = ele.head;tail = ele.tail;}void print();Nfastate *head; // 头部指针Nfastate *tail; //尾部指针};void subNFA::print(){int nodecnt = 0;map<Nfastate* , int> mp;queue<Nfastate*> q;q.push(head);mp[head] = ++nodecnt;auto addnode = [&](Nfastate* e){if(mp.count(e)) return mp[e];mp[e] = ++nodecnt;q.push(e);return mp[e];};while(!q.empty()){Nfastate* now = q.front();q.pop();std::cout << "node : " << mp[now] << "\n";if(now->chTrans != NULL) {addnode(now->chTrans);std::cout << "chTrans : " << now->input << "\n";std::cout << "Transnode : " << mp[now->chTrans] << "\n";}if(!now->epTrans.empty())std:: cout <<"epTrans : {";bool first = 1;for(auto x:now->epTrans){addnode(x);std::cout << (first" />"":" , ") << mp[x] ; first = 0;}if(!now->epTrans.empty())std::cout << "}\n";cout << "\n";}std::cout << "Start node: "<< mp[head] << "\n";std::cout << "End node : " << mp[tail] << "\n";}class NFA{public:NFA(string &expression):reg(expression),subnfa(){// cerr << reg.get_postfix() << "\n";Build_NFA();} void print();void Build_NFA();Nfastate* gethead();Nfastate* gettail();private:subNFA subnfa;// 最终的NFARegular_Expression reg; // 正则表达式转换 };Nfastate* NFA::gethead(){return subnfa.head;}Nfastate* NFA::gettail(){return subnfa.tail;}void NFA::print(){ //debug// std::cout << nfa.head << " " << nfa.tail << "\n"; std::cout << reg.get_postfix() << "\n";subnfa.print();}void NFA::Build_NFA(){string expression = reg.get_postfix();vector<subNFA> sta_nfa; // nfa栈try{int len = expression.length();for(int i=0 ; i<len ; ++i){char nchar = expression[i];int ntype = get_chartype(nchar);//如果遇到操作数a,则新建一个NFA,转换弧上的值为a,将这个NFA压入栈中if(ntype == OPERANDS){subNFA n(nchar);sta_nfa.push_back(n);//debug// cerr << nchar << ' ' <input << "\n";// cerr << n.head << " " << n.tail << "\n";// n.head->print();// n.tail->print();}else if(nchar == '*'){// 取不出一个元素if(sta_nfa.size() < 1) throw Regular_Expression_Format_Exceprion();subNFA n1 = sta_nfa.back();sta_nfa.pop_back();subNFA n('#');(*n.head).add_New_epTrans(n1.head);(*n1.tail).add_New_epTrans(n.tail);(*n1.tail).add_New_epTrans(n.head);sta_nfa.push_back(n);}else if(nchar == '|'){// 取不出两个元素if(sta_nfa.size() < 2) throw Regular_Expression_Format_Exceprion();subNFA n1 = sta_nfa.back();sta_nfa.pop_back();subNFA n2 = sta_nfa.back();sta_nfa.pop_back();subNFA n('#');//debug// n.head->print();// n.tail->print();(*n.head).add_New_epTrans(n1.head);(*n.head).add_New_epTrans(n2.head);(*n1.tail).add_New_epTrans(n.tail);(*n2.tail).add_New_epTrans(n.tail);//debug// n.head->print();// n.tail->print();sta_nfa.push_back(n);}else if(nchar == '&'){// 取不出两个元素if(sta_nfa.size() < 2) throw Regular_Expression_Format_Exceprion();subNFA n2 = sta_nfa.back();sta_nfa.pop_back();subNFA n1 = sta_nfa.back();sta_nfa.pop_back();(*n1.tail).add_New_epTrans(n2.head);n1.tail = n2.tail;sta_nfa.push_back(n1);}else throw Regular_Expression_Format_Exceprion();}if(sta_nfa.size() != 1) throw Regular_Expression_Format_Exceprion();subnfa = sta_nfa[0];}catch(const Regular_Expression_Input_Exception& e){std::cerr << e.what() << "\n";exit(0);}catch(const Regular_Expression_Format_Exceprion& e){std::cerr << e.what() << "\n";exit(0);}catch(const std::exception& e){std::cerr << e.what() << '\n';exit(0);}}// #ifndef __MAINPROCESS__// int main()// {// #ifdef __LOCAL// freopen("usrin.txt", "r", stdin);// freopen("usrout.txt", "w", stdout);// #endif// string str;// cin >> str;// NFA nfa(str);// nfa.print();// }// #endif#endif
//DFA.cpp#ifndef __DFA__#define __DFA__#include #include "NFA.cpp"using namespace std;struct Dfastate{ // 获取对于字符c的闭包Dfastate move(char c , Nfastate* tail){Dfastate ret;for(auto x:closure){if(x->input == c) ret.closure.insert(x->chTrans);}ret.n_closure(tail);return ret;}void n_closure(Nfastate*);void upGrade_End_info(Nfastate*);bool isEnd; //判断是否是结束状态set<Nfastate*> closure; //闭包void print();bool operator<(const Dfastate& b) const;bool operator>(const Dfastate& b) const;bool operator==(const Dfastate& b) const;};void Dfastate::print(){cerr << "----------Dfastate-------------\n";for(auto x:closure) cerr << x << " "; cerr << "\n";cerr << (isEnd ? "YES" : "NO") << "\n";cerr << "-------------------------------\n\n";}bool Dfastate::operator<(const Dfastate& b)const{ //debug// cerr << " < \n";return closure < b.closure;}bool Dfastate::operator>(const Dfastate& b)const{//debug// cerr < \n";return closure > b.closure;}bool Dfastate::operator==(const Dfastate& b)const{//debug// cerr << " == \n";return closure == b.closure;}void Dfastate::upGrade_End_info(Nfastate* tail){if(closure.find(tail) != closure.end()) isEnd = true;else isEnd = false;}void Dfastate::n_closure(Nfastate * tail){bool hasnewnode = 1;while(hasnewnode){hasnewnode = 0;for(auto x:closure){for(auto y:x->epTrans){if(closure.find(y) == closure.end()){hasnewnode = 1;closure.insert(y);}}}}upGrade_End_info(tail);}struct subDFA{bool isEnd; //是否是结束状态map<int , subDFA*> to;//转移图};class DFA{ public:DFA(string &expression){// expression = str;for(auto x:expression){int ntype = get_chartype(x);if(ntype == OPERANDS) charset.insert(x);if(ntype == ILLEGAL_CHAR) {cerr << "ILLEGAL CHARACHTER\n";exit(0);}}Build_DFA(expression);//debug// for(auto x:charset) // {// cerr << x << " ";// }}void print();bool match(string&);private:void Build_DFA(string&);set<char> charset;//用到的字符集subDFA* dfahead; //DFA的初始状态// string expression;};bool DFA::match(string& matchstr){ subDFA* nowp = dfahead;if(nowp == NULL) return false;for(auto ch:matchstr){ if(!nowp->to.count(ch)) return false;nowp = nowp->to[ch];}return nowp->isEnd;}void DFA::print(){int nodecnt = 0;map<subDFA* , int> mp;queue<subDFA*> q;q.push(dfahead);mp[dfahead] = ++nodecnt;while(!q.empty()){subDFA * now = q.front();q.pop();for(auto next:now->to){if(mp.count(next.second)) continue;else{mp[next.second] = ++nodecnt;q.push(next.second);}}}for(auto x:mp){cout << "node[" << x.second <<"] : ";for(auto y:x.first->to){cout << "(" << char(y.first) << " , " << mp[y.second] << ") ";}cout << "isEnd : " << ( (x.first->isEnd) ? "YES" : "NO" );cout << "\n";}}void DFA::Build_DFA(string & expression){NFA nfa(expression);Nfastate* head = nfa.gethead();Nfastate* tail = nfa.gettail();map<Dfastate , subDFA*> mp_Dfastate_PsubDFA;queue<Dfastate> q_Dfastate;// 获取初始状态的闭包Dfastate T;T.closure.insert(head);//debug// T.print();T.n_closure(tail);// T.print();//初始化map queue 以及转移的初始状态mp_Dfastate_PsubDFA[T] = new subDFA;mp_Dfastate_PsubDFA[T]->isEnd = T.isEnd;q_Dfastate.push(T);dfahead = mp_Dfastate_PsubDFA[T];while(!q_Dfastate.empty() ){T = q_Dfastate.front();q_Dfastate.pop();//debug// T.print();subDFA* now = mp_Dfastate_PsubDFA[T];for(auto ch:charset){Dfastate next = T.move(ch , tail);//debug// cerr << "char = " << ch << "\n";// next.print();if(next.closure.empty()) continue;//状态没出现过就新建节点并加入队列if(!mp_Dfastate_PsubDFA.count(next)){mp_Dfastate_PsubDFA[next] = new subDFA;mp_Dfastate_PsubDFA[next]->isEnd = next.isEnd;q_Dfastate.push(next);}now->to[ch] = mp_Dfastate_PsubDFA[next];}}//debug// cerr << mp_Dfastate_PsubDFA.size() << "\n";} const string Digit ="(0|1|2|3|4|5|6|7|8|9)" ;const string Pos_Digit = "(1|2|3|4|5|6|7|8|9)";// #ifndef __MAINPROCESS__// int main()// {// #ifdef __LOCAL// freopen("usrin.txt", "r", stdin);// freopen("usrout.txt", "w", stdout);// #endif// string str;// string interger = "(0|" + Pos_Digit + Digit + "*)";// string decimal = "(0|" + Digit + "*" + Pos_Digit + ")";// string pat = "(" + interger + ")|(" + interger + "." + decimal + ")"; // cout << pat<< "\n";// DFA dfa(pat);// dfa.print();// cin >> str;// cout << dfa.match(str);// }// #endif#endif
//Main_Process.cpp#ifndef __MAINPRECESS__#define __MAINPROCESS__#include#include "DFA.cpp"using namespace std;int main(){#ifdef __LOCALfreopen("usrin.txt", "r", stdin);freopen("usrout.txt", "w", stdout);#endifstring str;string interger = "(0|" + Pos_Digit + Digit + "*)";string decimal = "(0|" + Digit + "*" + Pos_Digit + ")";string pat = "(" + interger + ")|(" + interger + "." + decimal + ")"; // pat = "(eedd";// pat 为正规式// str 为待检测的字符串pat = "(a|b)*acd*(c|d)";str = "aacdc";cout <<"Regular_Expression : " <<pat<< "\n";// cin >> str;cout << "MatchString : " << str << "\n\n";// 构建DFADFA dfa(pat);cout << "DFA GRAPH EXPRESSION:\n";// 输出DFAdfa.print();cout << "\n";//判断是否符合cout << (dfa.match(str) ? "Legal\n" : "Illegal\n");}#endif
© 版权声明
THE END
喜欢就支持一下吧
点赞0 分享