[ 簡単な説明 ]
文字列照合関数は、1個だけ探索する関数(2〜5)と、全探索する関数(1)を準備しました。 全探索関数 matchall( ) は、探索に使用する1個探索関数名(2〜5)を引数として与える必要があります。 bm1( ) が最も処理が速いので、通常の使用では、これで問題無いと思います。 具体的な使い方は、使用例を参照して下さい。
|
/* match.c 文字列照合 */ #include <stdio.h> #include <stdlib.h> #include "match.h" void matchall(int (*func)(uchar *text, uchar *pattern), uchar *text, uchar *pattern, int *find, int nn) { int k, *p; uchar *sp; sp = text; p = find; while(nn--) { if((k = func(sp, pattern)) < 0) return; *p++ = k++ + (int)(sp - text); sp += k; } } int bm1(uchar *text, uchar *pattern) { int i, len, *pi; static uchar ptn[80]; static int skip[256]; uchar tail, *pt, *ptw, *pp, *ptlast, *pplast; tail = *(pplast = pattern + (len = strlen((char *)pattern)) - 1); if(len == 1) { pt = text; while(*pt) if(*pt++ == tail) return (int)(pt - text - 1); return -1; } if(strcmp(pattern, ptn)) { pi = skip + 255; while(pi >= skip) *pi-- = len; pp = pattern; pt = ptn; i = len--; while(--i) *(skip + (*pt++ = *pp++)) = i; *pt++ = tail; *pt = '\0'; } if((pt = text + len) < (ptlast = text + strlen((char *)text))) { do { if(*pt == tail) { pp = pplast; ptw = pt; while(*(--pp) == *(--ptw)) if(pp == pattern) return (int)(ptw - text); } } while((pt += *(skip + *pt)) < ptlast); } return -1; } int bm(uchar *text, uchar *pattern) { int i, len, *pi; static int skip[256]; uchar c, tail, *pt, *ptw, *pp, *ptlast, *pplast; ptlast = text + strlen((char *)text); if((len = strlen((char *)pattern)) == 0) return -1; tail = *(pplast = pattern + len - 1); if(len == 1) { pt = text; while(*pt) if(*pt++ == tail) return (int)(pt - text - 1); return -1; } pi = skip + 255; while(pi >= skip) *pi-- = len; i = --len; pp = pattern; while(i) *(skip + *pp++) = i--; pt = text + len; while(pt < ptlast) { if((c = *pt) == tail) { pp = pplast; ptw = pt; while(*(--pp) == *(--ptw)) if(pp == pattern) return (int)(ptw - text); } pt += *(skip + c); } return -1; } int position1(uchar *text, uchar *pattern) { uchar *pi, *pj; pi = text; pj = pattern; while(*pi && *pj) { if(*pi == *pj) { pi++; pj++; } else { pi -= (int)(pj - pattern) - 1; pj = pattern; } } if(*pj) return -1; return (int)(pi - text) - (int)(pj - pattern); } int position2(uchar *text, uchar *pattern) { uchar c, *pi, *pj, *pk; c = *pattern; pi = text; while(*pi) { if(*pi++ == c) { pk = pi; pj = pattern + 1; while(*pk == *pj && *pj) { pk++; pj++; } if(*pj == '\0') return (int)(pk - text) - (int)(pj - pattern); } } return -1; } int rks(uchar *text, uchar *pattern) { long dm = 1, h1 = 0, h2 = 0, q = LARGE_PRIME; int i, m, n, d = NUMBER_OF_CHAR; m = strlen(pattern); n = strlen(text); for(i = 0; i < m - 1; i++) dm = (d * dm) % q; for(i = 0; i < m; i++) { h1 = (h1 * d + (long)pattern[i]) % q; h2 = (h2 * d + (long)text[i]) % q; } i = 0; while(n - i >= m && h1 != h2) { h2 = (h2 + d * q - (long)text[i] * dm) % q; h2 = (h2 * d + (long)text[i + m]) % q; i++; } return (i <= n - m) ? i: -1; } int kmp(uchar *text, uchar *pattern) { int m, n, j, k; m = strlen(pattern); n = strlen(text); if(m > n || m <= 0) return -1; j = k = 0; while(j < m && k < n) { while(j >= 0 && text[k] != pattern[j]) j = _next[j]; k++; j++; } return ((k == n && text[k] == pattern[j]) || k < n) ? k - j : -1; } void pre_kmp(uchar *pattern) { int j, m, t; m = strlen(pattern); _next = (int *)malloc(m * sizeof(int)); if(_next == NULL) { fprintf(stderr, "Error : out of memory in pre_kmp()\n"); return; } j = 0; t = -1; _next[0] = -1; while(j < m) { while(t > -1 && pattern[j] != pattern[t]) t = _next[t]; t++; j++; _next[j] = (pattern[j] == pattern[t])? _next[t]: t; } } |