Aho Corasick Algo在c/c++中表示十六进制



我的问题-我试图使用我在搜索net时为c++找到的aho-corasick算法,它目前只搜索基于字符的字符串,我希望它修改它以搜索不同字符的基于十六进制的字符串。对改进代码的任何帮助都将不胜感激。如果我只是修改字符串文本,它就会进入ifinite循环。

int buildMatchingMachine(const vector<string> &words, char lowestChar = 'a', char highestChar = 'z')
{
    memset(out, 0, sizeof out);
    memset(f, -1, sizeof f);
    memset(g, -1, sizeof g);
    int states = 1; // Initially, we just have the 0 state
    for (int i = 0; i < words.size(); ++i)
    {
        const string &keyword = words[i];
        int currentState = 0;
        for (int j = 0; j < keyword.size(); ++j)
        {
            int c = keyword[j] - lowestChar;
            if (g[currentState][c] == -1)
            { // Allocate a new node
                g[currentState][c] = states++;
            }
            currentState = g[currentState][c];
        }
        out[currentState] |= (1 << i); // There's a match of keywords[i] at node currentState.
    }
    // State 0 should have an outgoing edge for all characters.
    for (int c = 0; c < MAXC; ++c)
    {
        if (g[0][c] == -1)
        {
            g[0][c] = 0;
        }
    }

// Now, let's build the failure function
queue<int> q;
for (int c = 0; c <= highestChar - lowestChar; ++c)
{ // Iterate over every possible input
    // All nodes s of depth 1 have f[s] = 0
    if (g[0][c] != -1 && g[0][c] != 0)
    {
        f[g[0][c]] = 0;
        q.push(g[0][c]);
    }
}
while (q.size())
{
    int state = q.front();
    q.pop();
    for (int c = 0; c <= highestChar - lowestChar; ++c)
    {
        if (g[state][c] != -1)
        {
            int failure = f[state];
            while (g[failure][c] == -1)
            {
                failure = f[failure];
            }
            failure = g[failure][c];
            f[g[state][c]] = failure;
            out[g[state][c]] |= out[failure]; // Merge out values
            q.push(g[state][c]);
        }
    }
}

return states;
}
int openFile::findNextState(int currentState, char nextInput, char lowestChar = 'a')
{
    int answer = currentState;
    int c = nextInput - lowestChar;
    while (g[answer][c] == -1)
        answer = f[answer];
    return g[answer][c];
}

我找到了一个有效的解决方案,您只需要将基于十六进制的符号的最低字符和最高字符重新定义为其相应的ascii值而不是int值。还可以将MAXS和MAXC更改为合适的数字,现在代码适用于基于十六进制值。

最新更新