|
[ 簡単な説明 ]
文字列照合関数は、1個だけ探索する関数(2〜5)と、全探索する関数(1)を準備しました。 全探索関数 matchall( ) は、探索に使用する1個探索関数名(2〜5)を引数として与える必要があります。 bm1( ) が最も処理が速いので、通常の使用では、これで問題無いと思います。 具体的な使い方は、使用例を参照して下さい。
|
/* match.c 文字列照合 */
#include <stdio.h>
#include <stdlib.h>
#include "match.h"
void matchall(int (*func)(uchar *text, uchar *pattern), uchar *text, uchar *pattern, int *find, int nn)
{
int k, *p;
uchar *sp;
sp = text;
p = find;
while(nn--)
{
if((k = func(sp, pattern)) < 0) return;
*p++ = k++ + (int)(sp - text);
sp += k;
}
}
int bm1(uchar *text, uchar *pattern)
{
int i, len, *pi;
static uchar ptn[80];
static int skip[256];
uchar tail, *pt, *ptw, *pp, *ptlast, *pplast;
tail = *(pplast = pattern + (len = strlen((char *)pattern)) - 1);
if(len == 1)
{
pt = text;
while(*pt) if(*pt++ == tail) return (int)(pt - text - 1);
return -1;
}
if(strcmp(pattern, ptn))
{
pi = skip + 255;
while(pi >= skip) *pi-- = len;
pp = pattern;
pt = ptn;
i = len--;
while(--i) *(skip + (*pt++ = *pp++)) = i;
*pt++ = tail;
*pt = '\0';
}
if((pt = text + len) < (ptlast = text + strlen((char *)text)))
{
do
{
if(*pt == tail)
{
pp = pplast;
ptw = pt;
while(*(--pp) == *(--ptw))
if(pp == pattern) return (int)(ptw - text);
}
} while((pt += *(skip + *pt)) < ptlast);
}
return -1;
}
int bm(uchar *text, uchar *pattern)
{
int i, len, *pi;
static int skip[256];
uchar c, tail, *pt, *ptw, *pp, *ptlast, *pplast;
ptlast = text + strlen((char *)text);
if((len = strlen((char *)pattern)) == 0) return -1;
tail = *(pplast = pattern + len - 1);
if(len == 1)
{
pt = text;
while(*pt) if(*pt++ == tail) return (int)(pt - text - 1);
return -1;
}
pi = skip + 255;
while(pi >= skip) *pi-- = len;
i = --len;
pp = pattern;
while(i) *(skip + *pp++) = i--;
pt = text + len;
while(pt < ptlast)
{
if((c = *pt) == tail)
{
pp = pplast;
ptw = pt;
while(*(--pp) == *(--ptw))
if(pp == pattern) return (int)(ptw - text);
}
pt += *(skip + c);
}
return -1;
}
int position1(uchar *text, uchar *pattern)
{
uchar *pi, *pj;
pi = text;
pj = pattern;
while(*pi && *pj)
{
if(*pi == *pj)
{
pi++;
pj++;
}
else
{
pi -= (int)(pj - pattern) - 1;
pj = pattern;
}
}
if(*pj) return -1;
return (int)(pi - text) - (int)(pj - pattern);
}
int position2(uchar *text, uchar *pattern)
{
uchar c, *pi, *pj, *pk;
c = *pattern;
pi = text;
while(*pi)
{
if(*pi++ == c)
{
pk = pi;
pj = pattern + 1;
while(*pk == *pj && *pj)
{
pk++;
pj++;
}
if(*pj == '\0') return (int)(pk - text) - (int)(pj - pattern);
}
}
return -1;
}
int rks(uchar *text, uchar *pattern)
{
long dm = 1, h1 = 0, h2 = 0, q = LARGE_PRIME;
int i, m, n, d = NUMBER_OF_CHAR;
m = strlen(pattern);
n = strlen(text);
for(i = 0; i < m - 1; i++) dm = (d * dm) % q;
for(i = 0; i < m; i++)
{
h1 = (h1 * d + (long)pattern[i]) % q;
h2 = (h2 * d + (long)text[i]) % q;
}
i = 0;
while(n - i >= m && h1 != h2)
{
h2 = (h2 + d * q - (long)text[i] * dm) % q;
h2 = (h2 * d + (long)text[i + m]) % q;
i++;
}
return (i <= n - m) ? i: -1;
}
int kmp(uchar *text, uchar *pattern)
{
int m, n, j, k;
m = strlen(pattern);
n = strlen(text);
if(m > n || m <= 0) return -1;
j = k = 0;
while(j < m && k < n)
{
while(j >= 0 && text[k] != pattern[j]) j = _next[j];
k++;
j++;
}
return ((k == n && text[k] == pattern[j]) || k < n) ? k - j : -1;
}
void pre_kmp(uchar *pattern)
{
int j, m, t;
m = strlen(pattern);
_next = (int *)malloc(m * sizeof(int));
if(_next == NULL)
{
fprintf(stderr, "Error : out of memory in pre_kmp()\n");
return;
}
j = 0;
t = -1;
_next[0] = -1;
while(j < m)
{
while(t > -1 && pattern[j] != pattern[t]) t = _next[t];
t++;
j++;
_next[j] = (pattern[j] == pattern[t])? _next[t]: t;
}
}
|