搜档网
当前位置:搜档网 › 字符串匹配算法

字符串匹配算法


用语言实现蛮力法、Horspool、Boyer-Moore、Knuth-Morris-Pratt算法,针对不同的数据规模研究它们的性能。数据规模应在100,000以上。


int BruteForceStringMatch(string str, string pattern)
{
for( int i = 0; i <= str.size()- pattern.size(); i++ )
{
int j = 0;
while( (j < pattern.size()) && (pattern[j] == str[i+j]))
{
j++;

}
if( j == pattern.size())
return i;
}
return (-1);
}
void ShiftTable(string pattern, int table[], int l)
{
for(int i = 0; i < 127; i++)
table[i] = l;
for(int j = 0; j < l-1; j++)
table[pattern[j]] = l-1-j;
}
void HorspoolMatch(string str, int table[], string pSize)

int main()
{
string a = "abcdefg";
string mode = "z";
cout<return 0;
}



#include
using namespace std;

const int HASH_SIZE=256;
int table[HASH_SIZE];//对应字符表的255个字符建哈希表,表示对不匹配字符 向右移动的距离

void ShiftTable(char pattern[]){
/*建立一个以字母表中字符为索引的Table数组*/
int m=strlen(pattern);
for(int i=0;itable[i]=m;
for(int j=0;jtable[ pattern[j] ]=m-1-j;

}

int HorspoolMatching(char pattern[],char text[]){//平均效率为O(n)
/*
pre:模式pattern,文本text
post:第一个匹配字串的最左端字符下标,没有找到匹配字串返回-1
*/
ShiftTable(pattern);
int m=strlen(pattern);
int n=strlen(text);

int i=m-1;
while(i <= n-1){
int k=0; //匹配的字符个数
while(k<=m-1 && pattern[m-1-k] == text[i-k] )
k++;
if(k==m)
return i-m+1;
else
i=i+table[ text[i] ]; //以模式最后一个字符确定移动距离,与B.M算法的最大区别,简化形式
}

return -1;
}


int main(){

char p[20],t[1000];

while(cin>>t && t!="."){

int times=5;
while(times--){
cin>>p;
cout<}

}
return 1;
}


#include
#include
#include
#include
#ifndef ssize_t
typedef off_t ssize_t;
#endif
using namespace std;
void compute_last_occurrence(const string& needle , vector& last_occurrence)
{
last_occurrence.resize(256,-1);
for ( size_t i = 0 ; i < needle.size() ; i++ )
{
last_occurrence[needle[i]] = i;
}
}
void compute_prefix_function(const string& needle , vector& prefix_function)
{
if ( needle.size() == 0 )
{
return;
}
prefix_function.resize( needle.size() , 0 );
size_t d = 0 ;
for ( size_t i = 1 ; i < needle.size() ; i++ )
{


while ( d > 0 && needle[d] != needle[i] )
{
d = prefix_function[d-1];
}
if ( needle[i] == needle[d] )
{
d++;
}
prefix_function[i]=d;
}
}
void compute_goodsuffix_heuristic( const string& needle ,vector& goodsuffix_heristic)
{
string reverse_needle;
copy( needle.rbegin() , needle.rend() , back_inserter( reverse_needle ) );
vector prefix_function,reverse_prefix_function;
compute_prefix_function( needle , prefix_function );
compute_prefix_function( reverse_needle , reverse_prefix_function );
size_t m = needle.size();
goodsuffix_heristic.resize( m + 1 , m - prefix_function[ m - 1 ] );
for ( size_t l = 1 ; l <= m ; l++ )
{
size_t t = reverse_prefix_function[l-1];
size_t j = m - t ;
if ( goodsuffix_heristic[j] > l - t )
{
goodsuffix_heristic[j] = l - t ;
}
}
}
void boyer_moore_matcher(const string& haystack,const string& needle)
{
size_t n=haystack.size();
size_t m=needle.size();
if ( n == 0 || m == 0 || n < m )
{
cout<<"Invalid input"<return;
}
vector last_occurrence;
compute_last_occurrence(needle,last_occurrence);
vector goodsuffix_heristic;
compute_goodsuffix_heuristic( needle , goodsuffix_heristic );
size_t offset = 0 ;
while ( offset <= n - m )
{
ssize_t j = m - 1 ;
while ( j >= 0 && needle[j] == haystack[offset+j] )
{
j--;
}
if ( j < 0 )
{
cout<<"Find a match at offset "<offset += goodsuffix_heristic[0];
}
else
{
offset += max( goodsuffix_heristic[j+1] ,
j - last_occurrence[haystack[offset+j]] );
}
}
}
int main()
{
string haystack,needle;
while ( true )
{
cout<<"Input the haystack:"<getline(cin,haystack);
cout<<"Input the needle:"<getline(cin,needle);
cout<<"Match results:"<boyer_moore_matcher(haystack,needle);
}
}




#include
#include
#include
#include
using namespace std;
int* GetNextVal(const char *s, int &len)
{
len = strlen(s);
int *next = new int[len];
int i = 0;
int j = -1;
next[0] = -1;
while(i{
if(j==-1 || s[i]==s[j])
{
++i;
++j;
next[i] = j;
}

else
{
j = next[j];
}
}
return next;
}

int KMP(const char *s, const char *t)
{
int slen,tlen;
int i,j;
int *next = GetNextVal(t, tlen);
slen = strlen(s);
i = 0;
j = 0;
while(i{
if(j==-1 || s[i]==t[j])
{
++i;
++j;
}
else
{
j = next[j];
}
}

delete[] next;

if(j==tlen)
return i - tlen;
return -1;
}
int main(int argc, char *argv[])
{
char s[128],t[128];
cout<<"请输入字符串s,t:"<while(cin>>s>>t)
{
int pos1 = KMP(s,t);
cout<<"字符串"<}
return 0;
}




相关主题