C++ Paste by Aho Corasick
Description: None
Hide line numbers

Create new paste
Post a reply
View replies

Paste:
1  
2  
3  
4  
5  
6  
7  
8  
9  
10  
11  
12  
13  
14  
15  
16  
17  
18  
19  
20  
21  
22  
23  
24  
25  
26  
27  
28  
29  
30  
31  
32  
33  
34  
35  
36  
37  
38  
39  
40  
41  
42  
43  
44  
45  
46  
47  
48  
49  
50  
51  
52  
53  
54  
55  
56  
57  
58  
59  
60  
61  
62  
63  
64  
65  
66  
67  
68  
69  
70  
71  
72  
73  
74  
75  
76  
77  
78  
79  
80  
81  
82  
83  
84  
85  
86  
87  
88  
89  
90  
91  
92  
93  
94  
95  
96  
97  
98  
99  
100  
101  
102  
103  
104  
105  
106  
107  
108  
109  
110  
111  
112  
113  
114  
115  
116  
117  
118  
119  
120  
121  
122  
123  
124  
125  
126  
127  
128  
129  
130  
131  
132  
133  
134  
135  
136  
137  
138  
139  
140  
141  
142  
143  
144  
145  
146  
147  
148  
149  
150  
151  
152  
153  
154  
155  
156  
157  
158  
159  
160  
161  
162  
163  
164  
165  
166  
167  
168  
169  
170  
171  
172  
173  
174  
175  
176  
177  
178  
179  
180  
181  
182  
183  
184  
185  
186  
187  
188  
189  
190  
191  
192  
193  
194  
195  
196  
197  
198  
#include <iostream>
#include <fstream>
#include <string>
#include <vector>
#include <queue>
#include <map>

class KeywordTree {
public:
    typedef void(*foundCallback)(int patternNr, int position);

    KeywordTree(const std::vector<std::string>& patterns)
    : root_(new Node(0, '\0'))
    {
        int n = 0;
        for (std::vector<std::string>::const_iterator it = patterns.begin();
             it != patterns.end();
             ++it, ++n) {
            insertPattern(*it, n);
            lengths_[n] = it->length();
        }
        computeLinks();
    }
    ~KeywordTree();
    void findOccurences(const std::string& text, foundCallback cb);
private:
    KeywordTree(const KeywordTree &kwt);
    KeywordTree& operator=(const KeywordTree &rhs);

    void insertPattern(const std::string& pattern, int patterNr);
    void computeLinks();

    struct Node {
        typedef std::map<char, Node *> Childs;
        typedef std::map<char, Node *>::iterator ChildIterator;

        Node(Node *parent, char edgeChar) 
        : parent_(parent)
        , definesPatternNr_(-1)
        , edgeChar_(edgeChar) {};

        bool isLeaf() { return childs_.empty(); };
        bool hasChild(char ch) { return (childs_.find(ch) != childs_.end()); };

        Node *parent_;
        Node *failure_;
        Node *output_;
        int definesPatternNr_;
        char edgeChar_;                  // char of edge pointing to this node
        Childs childs_;
    };

    void freeTree(Node *n);

    Node *root_;
    std::map<int, int> lengths_;
};

KeywordTree::~KeywordTree()
{
    freeTree(root_);
}

void KeywordTree::insertPattern(const std::string& pattern, int patternNr)
{
    Node *n = root_;
    for (std::string::const_iterator it = pattern.begin();
         it != pattern.end();
         ++it) {
        Node::ChildIterator ch = n->childs_.find(*it);
        if (ch != n->childs_.end()) {
            n = ch->second;                          // follow path
        } else {
            n->childs_[*it] = new Node(n, *it);      // create path
            n = n->childs_[*it];
        }
    }
    n->definesPatternNr_ = patternNr;
}

void KeywordTree::computeLinks()
{
    root_->failure_ = root_;
    // set all failure links in depth 1 to root + put all nodes depth 1 in a queue
    std::queue<Node *> qu;
    for (Node::ChildIterator it = root_->childs_.begin();
         it != root_->childs_.end();
         ++it) {
        it->second->failure_ = root_;
        qu.push(it->second);
    }
    // del all nodes depth 1 in queue; put all nodes depth 2 in queue
    for (int i = 0, n = qu.size(); i != n; ++i) {
        Node *n = qu.front();
        qu.pop();
        for (Node::ChildIterator it = n->childs_.begin();
             it != n->childs_.end();
             ++it) {
            qu.push(it->second);
        }
    }
    // compute links for depht >= 2
    while (!qu.empty()) {
        Node *n = qu.front();
        qu.pop();
        char ch = n->edgeChar_;
        Node *m = n->parent_->failure_;
        while (m != root_ && !m->hasChild(ch)) {
            m = m->failure_;
        }
        if (m != root_) {
            n->failure_ = m->childs_[ch];
        } else {
            Node::ChildIterator chit = root_->childs_.find(ch);
            if (chit != root_->childs_.end()) {
                n->failure_ = chit->second;
            } else {
                n->failure_ = root_;
            }
        }
        if (n->failure_->definesPatternNr_ != -1) {
            n->output_ = n->failure_;
        } else {
            n->output_ = n->failure_->output_;
        }
        // enqueue childs
        for (Node::ChildIterator it = n->childs_.begin();
             it != n->childs_.end();
             ++it) {
            qu.push(it->second);
        }
    }
}

void KeywordTree::freeTree(Node *n)
{
    if (n->isLeaf()) {
        delete n;
        return;
    }
    for (Node::ChildIterator ch = n->childs_.begin();
         ch != n->childs_.end();
         ++ch) {
        freeTree(ch->second);
    }
}

void KeywordTree::findOccurences(const std::string& text, foundCallback cb)
{
    Node *n = root_;
    unsigned int i = 0;
    while (i < text.length()) {
        while (n->childs_.find(text[i]) != n->childs_.end()) {
            n = n->childs_[text[i]];
            if (n->definesPatternNr_ != -1) {
                cb(n->definesPatternNr_, i - lengths_[n->definesPatternNr_] + 1);
            }
            if (n->output_ != 0) {
                Node *m = n->output_;
                while (m != 0) {
                    cb(m->definesPatternNr_, i - lengths_[m->definesPatternNr_] + 1);
                    m = m->output_;
                }
            }
            ++i;
        }    
        if (n == root_) {
            ++i;
        } else {
            n = n->failure_;
        }
    }
}

std::vector<std::string> patterns;

void cb(int patternNr, int position)
{
    std::cout << "found " << patterns[patternNr] << " @ " << position << "\n";
}

int main(int argc, char **argv)
{
    for (int i = 2; i < argc; ++i) {
        patterns.push_back(argv[i]);
    }
    
    KeywordTree kwt(patterns);

    char buffer[1024];
    std::ifstream inf(argv[1]);
    for (;;) {
        if (!inf.getline(buffer, sizeof (buffer)))
            break;
        std::cout << buffer << "\n";
        kwt.findOccurences(buffer, cb);
    }
}

Replies:

    (some replies deleted)