Regular Expression Matching
Implement regular expression matching with support for
'.'
and'*'
.'.' Matches any single character. '*' Matches zero or more of the preceding element. The matching should cover the entire input string (not partial). The function prototype should be: bool isMatch(const char *s, const char *p) Some examples: isMatch("aa","a") → false isMatch("aa","aa") → true isMatch("aaa","aa") → false isMatch("aa", "a*") → true isMatch("aa", ".*") → true isMatch("ab", ".*") → true isMatch("aab", "c*a*b") → truetags: backtracking, dynamic programming, string
9/25/2015 update dynamic programming
O(p*s*s)
//Dynamic programming class Solution { public: bool isMatch(string s, string p) { //base condition if(s.size() == 0){ if(p.size() == 0) return true; while(p.size() >= 2 && p[1] == '*'){ p = p.substr(2, -1); } if(p.size() == 0) return true; else{ return false; } } if(p.size() == 0) return false; //this ensures s.size() and p.size() not equal to 0 bool dp[100][100]; vector<string> tokens; memset(dp, false, 100 * 100 * sizeof(bool)); parseToken(p, tokens); //match a space if(tokens[0].size() == 2) dp[0][0] = true; //initialize first column for(int i = 1; i <= s.size(); i++){ //dp[0][j] means whether p[0-j] matches an empty string if(i == 1){ if(tokens[0][0] == '.' || tokens[0][0] == s[i - 1]){ dp[i][0] = true; } else{ break; } } else{ if(tokens[0].size() == 2 && (tokens[0][0] == s[i - 1] || tokens[0][0] == '.')){ dp[i][0] = true; } else{ break; } } } for(int j = 1; j < tokens.size(); j++){ string token = tokens[j]; for(int i = 0; i <= s.size(); i++){ if(dp[i][j - 1] == false){ continue; } //match space if(token.size() == 2){ dp[i][j] = true; } for(int k = i + 1; k <= s.size(); k++){ if(k == i + 1){ if(token[0] == '.' || token[0] == s[k - 1]){ dp[k][j] = true; } else{ break; } } else{ if(token.size() == 2 && (token[0] == s[k - 1] || token[0] == '.')){ dp[k][j] = true; } else{ break; } } } } } return dp[s.size()][tokens.size() - 1]; } void parseToken(string& p, vector<string>& tokens){ int i = 0; while(i < p.size()){ if(p[i] == '*'){ tokens.back().push_back('*'); } else{ tokens.push_back(string(1, p[i])); } i++; } } };
很有味道的一道题,看了tag才做出来。
考虑用递归构造一个在string域内包含全部可能的,正则表达式展开结果的树。如果展开到某一节点,发现无法匹配,则回溯。
效率很低,但我们不得不用深搜考虑所有可能的情况。估计工业上的正则表达式匹配也用的是这样的方案把。
记string长度为m,正则表达式长度为n。则最坏情况下的时间复杂度为m^n(感觉,待论证)。
class Solution { public: bool isMatch(const char *s, const char *p) { return _isMatch(s, p, 0, 0); } bool _isMatch(const char * s, const char * p, int idxS, int idxP){ if(idxS == strlen(s) && idxP == strlen(p)){//string and reString all end return true; } else if(idxS != strlen(s) && idxP == strlen(p)){//reString end but string not end return false; } else if(idxS == strlen(s) && idxP != strlen(p)){//string end but reString not end, char token[3]; getToken(p, idxP, token); if(strlen(token) == 2){//if token is (a*) or (.*) ignore it and search deeper return _isMatch(s, p, idxS, idxP); } else{ return false; } } else{ char token[3]; getToken(p, idxP, token); if(strlen(token) == 1){ if(matchChar(s,idxS, token[0]) == false){ return false; } else{ return _isMatch(s, p, idxS, idxP); } } else{// .* or a* bool state; if(_isMatch(s, p, idxS, idxP) == true){//ignore this token, since an empty string also matches (a*) or (.*) return true; } while(matchChar(s, idxS, token[0])){ state = _isMatch(s, p, idxS, idxP); if(state == true){ return true; } } return false; } } } bool getToken(const char * p, int &pos, char * token){ if(p[pos] == '\0'){ return false; } if(p[pos + 1] == '*'){ token[0] = p[pos]; token[1] = p[pos + 1]; token[2] = '\0'; pos += 2; return true; } else{ token[0] = p[pos]; token[1] = '\0'; pos += 1; return true; } } bool matchChar(const char *s, int &pos, char c){ if(pos == strlen(s)){ return false; } else if(s[pos] == c || c == '.'){ pos +=1; return true; } else{ return false; } } };