Regular Expression Matching
Implement regular expression matching with support for
'.'and'*'.'.' Matches any single character. '*' Matches zero or more of the preceding element. The matching should cover the entire input string (not partial). The function prototype should be: bool isMatch(const char *s, const char *p) Some examples: isMatch("aa","a") → false isMatch("aa","aa") → true isMatch("aaa","aa") → false isMatch("aa", "a*") → true isMatch("aa", ".*") → true isMatch("ab", ".*") → true isMatch("aab", "c*a*b") → truetags: backtracking, dynamic programming, string
9/25/2015 update dynamic programming
O(p*s*s)
//Dynamic programming
class Solution {
public:
bool isMatch(string s, string p) {
//base condition
if(s.size() == 0){
if(p.size() == 0) return true;
while(p.size() >= 2 && p[1] == '*'){
p = p.substr(2, -1);
}
if(p.size() == 0) return true;
else{
return false;
}
}
if(p.size() == 0) return false;
//this ensures s.size() and p.size() not equal to 0
bool dp[100][100];
vector<string> tokens;
memset(dp, false, 100 * 100 * sizeof(bool));
parseToken(p, tokens);
//match a space
if(tokens[0].size() == 2) dp[0][0] = true;
//initialize first column
for(int i = 1; i <= s.size(); i++){
//dp[0][j] means whether p[0-j] matches an empty string
if(i == 1){
if(tokens[0][0] == '.' || tokens[0][0] == s[i - 1]){
dp[i][0] = true;
}
else{
break;
}
}
else{
if(tokens[0].size() == 2 && (tokens[0][0] == s[i - 1] || tokens[0][0] == '.')){
dp[i][0] = true;
}
else{
break;
}
}
}
for(int j = 1; j < tokens.size(); j++){
string token = tokens[j];
for(int i = 0; i <= s.size(); i++){
if(dp[i][j - 1] == false){
continue;
}
//match space
if(token.size() == 2){
dp[i][j] = true;
}
for(int k = i + 1; k <= s.size(); k++){
if(k == i + 1){
if(token[0] == '.' || token[0] == s[k - 1]){
dp[k][j] = true;
}
else{
break;
}
}
else{
if(token.size() == 2 && (token[0] == s[k - 1] || token[0] == '.')){
dp[k][j] = true;
}
else{
break;
}
}
}
}
}
return dp[s.size()][tokens.size() - 1];
}
void parseToken(string& p, vector<string>& tokens){
int i = 0;
while(i < p.size()){
if(p[i] == '*'){
tokens.back().push_back('*');
}
else{
tokens.push_back(string(1, p[i]));
}
i++;
}
}
};
很有味道的一道题,看了tag才做出来。
考虑用递归构造一个在string域内包含全部可能的,正则表达式展开结果的树。如果展开到某一节点,发现无法匹配,则回溯。
效率很低,但我们不得不用深搜考虑所有可能的情况。估计工业上的正则表达式匹配也用的是这样的方案把。
记string长度为m,正则表达式长度为n。则最坏情况下的时间复杂度为m^n(感觉,待论证)。
class Solution {
public:
bool isMatch(const char *s, const char *p) {
return _isMatch(s, p, 0, 0);
}
bool _isMatch(const char * s, const char * p, int idxS, int idxP){
if(idxS == strlen(s) && idxP == strlen(p)){//string and reString all end
return true;
}
else if(idxS != strlen(s) && idxP == strlen(p)){//reString end but string not end
return false;
}
else if(idxS == strlen(s) && idxP != strlen(p)){//string end but reString not end,
char token[3];
getToken(p, idxP, token);
if(strlen(token) == 2){//if token is (a*) or (.*) ignore it and search deeper
return _isMatch(s, p, idxS, idxP);
}
else{
return false;
}
}
else{
char token[3];
getToken(p, idxP, token);
if(strlen(token) == 1){
if(matchChar(s,idxS, token[0]) == false){
return false;
}
else{
return _isMatch(s, p, idxS, idxP);
}
}
else{// .* or a*
bool state;
if(_isMatch(s, p, idxS, idxP) == true){//ignore this token, since an empty string also matches (a*) or (.*)
return true;
}
while(matchChar(s, idxS, token[0])){
state = _isMatch(s, p, idxS, idxP);
if(state == true){
return true;
}
}
return false;
}
}
}
bool getToken(const char * p, int &pos, char * token){
if(p[pos] == '\0'){
return false;
}
if(p[pos + 1] == '*'){
token[0] = p[pos];
token[1] = p[pos + 1];
token[2] = '\0';
pos += 2;
return true;
}
else{
token[0] = p[pos];
token[1] = '\0';
pos += 1;
return true;
}
}
bool matchChar(const char *s, int &pos, char c){
if(pos == strlen(s)){
return false;
}
else if(s[pos] == c || c == '.'){
pos +=1;
return true;
}
else{
return false;
}
}
};
