942 lines
26 KiB
C++
942 lines
26 KiB
C++
/****************************************************************************
|
|
**
|
|
** Copyright (C) 2006-2009 fullmetalcoder <fullmetalcoder@hotmail.fr>
|
|
**
|
|
** This file is part of the Edyuk project <http://edyuk.org>
|
|
**
|
|
** This file may be used under the terms of the GNU General Public License
|
|
** version 3 as published by the Free Software Foundation and appearing in the
|
|
** file GPL.txt included in the packaging of this file.
|
|
**
|
|
** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
|
|
** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
**
|
|
****************************************************************************/
|
|
|
|
#include "qnfa.h"
|
|
|
|
/*!
|
|
\file qnfa.cpp
|
|
\brief Implementation of the core QNFA syntax engine
|
|
*/
|
|
|
|
#include <QHash>
|
|
#include <QList>
|
|
|
|
quint32 QNFA::_count = 0;
|
|
static QList<QNFA *> _deleted;
|
|
|
|
QNFA::QNFA() : type(Char), assertion(0), actionid(0) {
|
|
out.next = 0;
|
|
|
|
++_count;
|
|
|
|
// qDebug("alloc(0x%x) => QNFA syntax[%i];", this, _count);
|
|
}
|
|
|
|
QNFA::~QNFA() {
|
|
--_count;
|
|
|
|
// some nfa nodes are shared... gotta make sure they are free'd once only
|
|
_deleted << this;
|
|
|
|
// qDebug("free(0x%x) => QNFA syntax[%i];", this, _count);
|
|
|
|
tree.clear();
|
|
|
|
if ((type & CxtBeg) && out.branch) {
|
|
delete out.branch;
|
|
out.branch = 0;
|
|
}
|
|
|
|
if (out.next && !_deleted.contains(out.next)) {
|
|
delete out.next;
|
|
out.next = 0;
|
|
}
|
|
}
|
|
|
|
QNFABranch::~QNFABranch() {
|
|
// qDebug("branch to %i nodes", count());
|
|
for (int i = 0; i < count(); ++i) {
|
|
if (at(i) && !_deleted.contains(at(i))) {
|
|
delete (*this)[i];
|
|
(*this)[i] = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
inline bool isWord(QChar c) {
|
|
return c.isLetterOrNumber() || (c.unicode() == '_');
|
|
}
|
|
|
|
static bool match(QChar cc, QNFA *chain) {
|
|
bool found = true;
|
|
quint16 cu = cc.unicode();
|
|
bool notEmpty = chain->c.count();
|
|
|
|
if (notEmpty && (chain->c.at(0) == '\0'))
|
|
found = false;
|
|
|
|
if (notEmpty)
|
|
if (chain->c.contains(cu))
|
|
return found;
|
|
|
|
int ass = chain->assertion;
|
|
|
|
if (ass) {
|
|
if (cc.isDigit()) {
|
|
if (ass & Digit)
|
|
return found;
|
|
} else {
|
|
if (ass & NonDigit)
|
|
return found;
|
|
|
|
if (cc.isSpace()) {
|
|
if (ass & Space)
|
|
return found;
|
|
} else {
|
|
if (ass & NonSpace)
|
|
return found;
|
|
|
|
if (cc.isLetterOrNumber() || (cu == '_')) {
|
|
if (ass & Word)
|
|
return found;
|
|
} else {
|
|
if (ass & NonWord)
|
|
return found;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
return !found;
|
|
}
|
|
|
|
void match(QNFAMatchContext *lexer, const QChar *d, int length,
|
|
QNFAMatchNotifier notify) {
|
|
if (!lexer || !lexer->context) {
|
|
// qWarning("get off you scum!");
|
|
return;
|
|
}
|
|
|
|
// restore message buffering
|
|
|
|
notify.clear();
|
|
|
|
int olvls = lexer->parents.count(), nlvls = 0, lvls = olvls;
|
|
|
|
if (lvls)
|
|
notify.startBuffering();
|
|
|
|
//
|
|
|
|
quint16 c = 0;
|
|
const QChar *di = d;
|
|
QNFA *chain = 0, *start = 0;
|
|
int index = 0, lastCxt = 0, len, idx;
|
|
bool bFound, bEscape = false, bEscaped = false;
|
|
bool wPrev = false, wCur = false;
|
|
|
|
while (index < length) {
|
|
bFound = false;
|
|
bEscaped = false;
|
|
// bEscape &= !lexer->meaningless.contains(d[index].unicode());
|
|
|
|
// while ( lexer->meaningless.contains(d[index].unicode()) && ((index +
|
|
// 1) < length) )
|
|
// ++index;
|
|
|
|
if (index >= length)
|
|
break;
|
|
|
|
c = di->unicode();
|
|
|
|
wCur = isWord(*di);
|
|
|
|
int plainIndex = -1, plainMatch, plainLength;
|
|
|
|
// try fast plain matching
|
|
if (!(wPrev && wCur)) {
|
|
// qDebug("trying plain...");
|
|
|
|
// len = 0;
|
|
idx = index;
|
|
QCharTree::const_iterator it, match, end;
|
|
|
|
it = lexer->context->tree.constFind(c);
|
|
|
|
if (it != lexer->context->tree.constEnd()) {
|
|
// qDebug("plain on %c", c);
|
|
do {
|
|
++di;
|
|
++idx;
|
|
|
|
end = it->next.constEnd();
|
|
match = it->next.constFind(0);
|
|
|
|
if (idx < length) {
|
|
c = di->unicode();
|
|
it = it->next.constFind(c);
|
|
} else {
|
|
it = end;
|
|
}
|
|
|
|
if (it == end) {
|
|
if ((match != end) && !isWord(*di)) {
|
|
// word boundary found
|
|
// corresponding token end found
|
|
wPrev = isWord(*(di - 1));
|
|
bFound = true;
|
|
if (match->value.action & 0x40000000) {
|
|
// try regexps before notifying
|
|
plainIndex = index;
|
|
plainLength = idx - index;
|
|
plainMatch = match->value.action;
|
|
// qDebug("ambiguity.");
|
|
} else {
|
|
notify(index, idx - index, match->value.action);
|
|
index = idx;
|
|
}
|
|
// qDebug("next step : %c", d[index].toLatin1());
|
|
// bMonitor = true;
|
|
}
|
|
|
|
break;
|
|
}
|
|
} while (idx < length);
|
|
|
|
if (bFound) {
|
|
bEscape = false;
|
|
|
|
if (plainIndex == -1)
|
|
continue;
|
|
|
|
bFound = false;
|
|
}
|
|
|
|
di -= idx - index;
|
|
}
|
|
}
|
|
|
|
// fallback on regexp-like NFA-based matching
|
|
QNFABranch *children = lexer->context->out.branch;
|
|
|
|
if (children) {
|
|
// qDebug("trying %i sub nfas on %c", children->count(),
|
|
// d[index].toLatin1());
|
|
auto max = children->count();
|
|
|
|
for (decltype(max) i = 0; i < max; ++i) {
|
|
len = 0;
|
|
idx = index;
|
|
start = chain = children->at(i);
|
|
|
|
// qDebug("%ith attempt on %c", i, d[index + len].toLatin1());
|
|
|
|
while ((idx < length) || (chain->type & Match)) {
|
|
bEscaped = false;
|
|
|
|
if (chain->type & Match) {
|
|
if ((chain->assertion & WordEnd) && (idx < length) &&
|
|
isWord(*di) && isWord(*(di - 1))) {
|
|
// qDebug("end assertion failed...");
|
|
break;
|
|
}
|
|
|
|
// qDebug("matched to end");
|
|
|
|
if (chain->type & CxtBeg) {
|
|
// qDebug("entering context : 0x%x", chain);
|
|
|
|
++nlvls;
|
|
|
|
bool notifySub = notify.bufferLevel();
|
|
|
|
if (notifySub) {
|
|
// pop one message buffer
|
|
notify.stopBuffering();
|
|
}
|
|
|
|
// notify content of previous context until nest
|
|
notify(lastCxt, index - lastCxt,
|
|
lexer->context->actionid | 0x80000000);
|
|
|
|
if (notifySub) {
|
|
// notify sub matches so far to avoid tricky
|
|
// handling later on
|
|
notify.flush();
|
|
|
|
// notify.startBuffering();
|
|
}
|
|
|
|
// notify begin marker
|
|
notify(index, len,
|
|
start->actionid ? start->actionid
|
|
: chain->actionid);
|
|
|
|
// update context stack
|
|
lexer->parents.push(lexer->context);
|
|
lexer->context = chain;
|
|
|
|
// update nest index
|
|
lastCxt = idx;
|
|
|
|
// push a message buffer
|
|
notify.startBuffering();
|
|
|
|
} else if (chain->type & CxtEnd) {
|
|
// qDebug("leaving context :");
|
|
|
|
if (lexer->parents.isEmpty())
|
|
qFatal("context nesting problem");
|
|
|
|
if (bEscape) {
|
|
// not really end : escape found...
|
|
|
|
bEscape = false;
|
|
bEscaped = true;
|
|
} else {
|
|
|
|
if (nlvls)
|
|
--nlvls;
|
|
else
|
|
--lvls;
|
|
|
|
// pop one message buffer
|
|
notify.stopBuffering();
|
|
|
|
// notify context content from last nest
|
|
notify(lastCxt, index - lastCxt,
|
|
lexer->context->actionid | 0x80000000);
|
|
|
|
// flush sub matches
|
|
notify.flush();
|
|
|
|
// update context stack
|
|
lexer->context = lexer->parents.pop();
|
|
|
|
if (lexer->parents.count())
|
|
notify.startBuffering();
|
|
|
|
// update nest index
|
|
lastCxt = idx;
|
|
|
|
// notify end marker
|
|
notify(index, len, chain->actionid);
|
|
|
|
// qDebug("cxt notif...");
|
|
|
|
if (chain->type & Exclusive)
|
|
index = idx;
|
|
|
|
--index;
|
|
--di;
|
|
|
|
bFound = true;
|
|
break;
|
|
}
|
|
} else if (chain->type & CxtEsc) {
|
|
// qDebug("matched %s", qPrintable(QString(index,
|
|
// len)));
|
|
|
|
// notify(index, len, chain->actionid);
|
|
bEscape = !bEscape;
|
|
} else {
|
|
// qDebug("matched %s", qPrintable(QString(d +
|
|
// index, len)));
|
|
|
|
if (plainIndex != -1 && plainLength >= len) {
|
|
break;
|
|
}
|
|
|
|
notify(index, len, chain->actionid);
|
|
bEscape = false;
|
|
}
|
|
|
|
bFound = true;
|
|
index = idx;
|
|
--index;
|
|
--di;
|
|
|
|
// qDebug("next step : %c", d[index + 1].toLatin1());
|
|
// bMonitor = true;
|
|
|
|
break;
|
|
} else {
|
|
// "regular" nfa match (no match yet...)
|
|
|
|
if ((chain->assertion & WordStart) && (idx >= 1) &&
|
|
(isWord(*(di - 1)) && isWord(*di))) {
|
|
// qDebug("beg assertion failed...");
|
|
|
|
break;
|
|
}
|
|
|
|
QChar cc = *di;
|
|
bool found = match(cc, chain);
|
|
|
|
if (!(chain->assertion & ZeroOrOne) &&
|
|
!(chain->assertion & ZeroOrMore) && !found) {
|
|
// if ( cc.toLatin1() == ')' )
|
|
// qDebug("mismatch : %c != %c", cc.toLatin1(),
|
|
// chain->c.at(0));
|
|
|
|
break;
|
|
}
|
|
|
|
if (found) {
|
|
// qDebug("%c", d[index + len].toLatin1());
|
|
|
|
if ((chain->assertion & OneOrMore) ||
|
|
(chain->assertion & ZeroOrMore)) {
|
|
do {
|
|
++di;
|
|
++len;
|
|
++idx;
|
|
} while ((idx < length) && match(*di, chain));
|
|
|
|
} else {
|
|
++len;
|
|
++idx;
|
|
++di;
|
|
}
|
|
|
|
} else {
|
|
// qDebug("! %c", d[index + len].toLatin1());
|
|
}
|
|
|
|
chain = chain->out.next;
|
|
}
|
|
}
|
|
|
|
if (bFound)
|
|
break;
|
|
|
|
di -= len;
|
|
}
|
|
}
|
|
|
|
if (!bFound) {
|
|
if (plainIndex != -1) {
|
|
notify(plainIndex, plainLength, plainMatch);
|
|
index = plainIndex + plainLength;
|
|
di += plainLength;
|
|
continue;
|
|
}
|
|
|
|
bEscape = false;
|
|
//++index;
|
|
wPrev = wCur;
|
|
} else {
|
|
wPrev = isWord(*di);
|
|
}
|
|
|
|
++index;
|
|
++di;
|
|
}
|
|
|
|
// flush messages
|
|
|
|
if (!notify.bufferLevel())
|
|
return;
|
|
|
|
// qDebug("%i context nests", notify.bufferLevel());
|
|
// qDebug("[%i;+00[ : 0x%x", lastCxt, lexer->context->actionid |
|
|
// 0x80000000);
|
|
|
|
// pop down one buffer
|
|
notify.stopBuffering();
|
|
|
|
// notify overlapping context so far
|
|
notify(lastCxt, length - lastCxt, lexer->context->actionid | 0x80000000);
|
|
|
|
// notify sub matches
|
|
notify.flush();
|
|
|
|
// make sure we leave a blank notifier...
|
|
notify.clear();
|
|
|
|
// preserve escape power...
|
|
if (bEscaped)
|
|
return;
|
|
|
|
// some existing left AND new one(s)
|
|
if ((olvls == lvls) && nlvls)
|
|
++lvls;
|
|
|
|
// close stay-on-line contexts, if any
|
|
QStack<QNFA *>::iterator it = lexer->parents.begin() + lvls;
|
|
|
|
while (it != lexer->parents.end()) {
|
|
if ((*it)->type & StayOnLine) {
|
|
// qDebug("staid...");
|
|
it = lexer->parents.erase(it);
|
|
} else {
|
|
++it;
|
|
}
|
|
}
|
|
|
|
if ((lexer->context->type & StayOnLine) && nlvls && lexer->parents.count())
|
|
lexer->context = lexer->parents.pop();
|
|
}
|
|
|
|
QNFA *lexer() {
|
|
QNFA *lex = new QNFA;
|
|
|
|
lex->type = ContextBegin;
|
|
lex->out.branch = new QNFABranch;
|
|
|
|
return lex;
|
|
}
|
|
|
|
QNFA *sharedContext(const QString &start, QNFA *other, bool cs) {
|
|
QNFA *nfa, *end,
|
|
*beg = sequence(start.constData(), start.length(), &end, cs);
|
|
|
|
nfa = new QNFA;
|
|
nfa->type = ContextBegin;
|
|
nfa->out.branch = other->out.branch;
|
|
|
|
end->out.next = nfa;
|
|
|
|
return beg;
|
|
}
|
|
|
|
QNFA *context(const QString &start, const QString &stop, const QString &,
|
|
int action, QNFA **handler, bool cs) {
|
|
QNFA *nfa, *end,
|
|
*beg = sequence(start.constData(), start.length(), &end, cs);
|
|
|
|
nfa = new QNFA;
|
|
nfa->type = ContextBegin;
|
|
nfa->actionid = action;
|
|
nfa->out.branch = new QNFABranch;
|
|
|
|
if (handler)
|
|
*handler = nfa;
|
|
// else
|
|
// qDebug("no handler set [0x%x]", nfa);
|
|
|
|
end->out.next = nfa;
|
|
end = nfa;
|
|
|
|
QNFA *endmark,
|
|
*begendmark = sequence(stop.constData(), stop.length(), &endmark, cs);
|
|
|
|
nfa = new QNFA;
|
|
nfa->type = ContextEnd;
|
|
nfa->actionid = action;
|
|
|
|
endmark->out.next = nfa;
|
|
|
|
// end->out->branch->append(endmark);
|
|
addNFA(end, begendmark);
|
|
|
|
return beg;
|
|
}
|
|
|
|
void addWord(QNFA *lexer, const QString &w, int action, bool cs) {
|
|
if (!lexer || !(lexer->type & CxtBeg) || !lexer->out.branch)
|
|
return;
|
|
|
|
// try using the fastest way if possible
|
|
|
|
QString pt;
|
|
|
|
if (plain(w, &pt) && cs) {
|
|
addWord(lexer->tree, pt, action, cs);
|
|
return;
|
|
}
|
|
|
|
// fallback on (fast) regexp-like NFA-based semi-compiled parsing
|
|
QNFA *nfa, *word, *end;
|
|
|
|
word = sequence(w.constData(), w.length(), &end, cs);
|
|
word->assertion |= WordStart;
|
|
|
|
nfa = new QNFA;
|
|
nfa->type = Match;
|
|
nfa->assertion = WordEnd;
|
|
nfa->actionid = action;
|
|
|
|
end->out.next = nfa;
|
|
|
|
// lexer->out.branch->append(word);
|
|
addNFA(lexer, word);
|
|
}
|
|
|
|
void addSequence(QNFA *lexer, const QString &w, int action, bool cs) {
|
|
if (!lexer || !(lexer->type & CxtBeg) || !lexer->out.branch) {
|
|
return;
|
|
}
|
|
|
|
QNFA *seq, *end, *nfa;
|
|
|
|
seq = sequence(w.constData(), w.length(), &end, cs);
|
|
|
|
nfa = new QNFA;
|
|
nfa->type = Match;
|
|
nfa->actionid = action;
|
|
|
|
end->out.next = nfa;
|
|
|
|
// lexer->out.branch->append(seq);
|
|
addNFA(lexer, seq);
|
|
}
|
|
|
|
QNFA *sequence(const QChar *d, int length, QNFA **end, bool cs) {
|
|
QNFA *nfa, *set = 0, *prev = 0, *first = 0;
|
|
|
|
for (int i = 0; i < length; ++i) {
|
|
QChar c = d[i];
|
|
|
|
if (c == QLatin1Char('\\')) {
|
|
c = d[++i];
|
|
|
|
if (c == QLatin1Char('n')) {
|
|
c = '\n';
|
|
} else if (c == QLatin1Char('t')) {
|
|
c = '\t';
|
|
} else if (c == QLatin1Char('r')) {
|
|
c = '\r';
|
|
}
|
|
|
|
if (set) {
|
|
set->c << c.unicode();
|
|
} else {
|
|
nfa = new QNFA;
|
|
nfa->c << c.unicode();
|
|
|
|
if (prev)
|
|
prev->out.next = nfa;
|
|
|
|
prev = nfa;
|
|
}
|
|
} else if (c == QLatin1Char('$')) {
|
|
// char classes
|
|
c = d[++i];
|
|
|
|
if (set) {
|
|
if (c == QLatin1Char('s'))
|
|
set->assertion |= Space;
|
|
else if (c == QLatin1Char('S'))
|
|
set->assertion |= NonSpace;
|
|
else if (c == QLatin1Char('d'))
|
|
set->assertion |= Digit;
|
|
else if (c == QLatin1Char('D'))
|
|
set->assertion |= NonDigit;
|
|
else if (c == QLatin1Char('w'))
|
|
set->assertion |= Word;
|
|
else if (c == QLatin1Char('W'))
|
|
set->assertion |= NonWord;
|
|
else
|
|
set->c << QLatin1Char('$').unicode() << c.unicode();
|
|
|
|
} else {
|
|
nfa = new QNFA;
|
|
|
|
if (c == QLatin1Char('s'))
|
|
nfa->assertion |= Space;
|
|
else if (c == QLatin1Char('S'))
|
|
nfa->assertion |= NonSpace;
|
|
else if (c == QLatin1Char('d'))
|
|
nfa->assertion |= Digit;
|
|
else if (c == QLatin1Char('D'))
|
|
nfa->assertion |= NonDigit;
|
|
else if (c == QLatin1Char('w'))
|
|
nfa->assertion |= Word;
|
|
else if (c == QLatin1Char('W'))
|
|
nfa->assertion |= NonWord;
|
|
else {
|
|
nfa->c << QLatin1Char('$').unicode();
|
|
--i;
|
|
}
|
|
|
|
if (prev)
|
|
prev->out.next = nfa;
|
|
|
|
prev = nfa;
|
|
}
|
|
} else if (c == QLatin1Char('[')) {
|
|
|
|
if (set) {
|
|
set->c << c.unicode();
|
|
// qWarning("Nested sets are not supported (and useless
|
|
// BTW)...");
|
|
continue;
|
|
}
|
|
|
|
// enter set...
|
|
|
|
set = new QNFA;
|
|
|
|
// qDebug("set start");
|
|
|
|
} else if (c == QLatin1Char(']')) {
|
|
|
|
if (!set) {
|
|
qWarning("Unmatched set closing marker");
|
|
continue;
|
|
}
|
|
|
|
// leave set...
|
|
|
|
if (prev)
|
|
prev->out.next = set;
|
|
|
|
prev = set;
|
|
set = 0;
|
|
|
|
// qDebug("set end");
|
|
/*
|
|
} else if ( c == QLatin1Char('(') ) {
|
|
// allow trivial groups
|
|
|
|
QList<int> cuts;
|
|
int idx = i, nest = 1;
|
|
|
|
while ( nest && (++idx < length) )
|
|
{
|
|
if ( d[idx] == '\\' )
|
|
{
|
|
++idx;
|
|
continue;
|
|
} else if ( d[idx] == '(' ) {
|
|
++nest;
|
|
} else if ( d[idx] == ')' ) {
|
|
--nest;
|
|
} else if ( (nest == 1) && (d[idx] == '|') ) {
|
|
cuts << idx;
|
|
} else if ( d[idx] == '[' ) {
|
|
while ( ++idx < length )
|
|
{
|
|
if ( d[idx] == '\\' )
|
|
{
|
|
++idx;
|
|
continue;
|
|
} else if ( d[idx] == ']' ) {
|
|
break;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
*/
|
|
} else if (set) {
|
|
|
|
if ((c == QLatin1Char('^')) && !set->c.count()) {
|
|
set->c << '\0';
|
|
continue;
|
|
}
|
|
|
|
quint16 prev =
|
|
set->c.count() ? set->c.at(set->c.length() - 1) : '\0';
|
|
|
|
if ((c == '-') && (prev != '\0') && ((i + 1) < length)) {
|
|
quint16 cse = d[++i].unicode();
|
|
|
|
for (quint16 csi = prev + 1; csi <= cse; ++csi) {
|
|
QChar csc(csi);
|
|
|
|
if (c.isLetter() && !cs)
|
|
set->c << c.toLower().unicode()
|
|
<< c.toUpper().unicode();
|
|
else
|
|
set->c << csi;
|
|
}
|
|
} else {
|
|
if (c.isLetter() && !cs)
|
|
set->c << c.toLower().unicode() << c.toUpper().unicode();
|
|
else
|
|
set->c << c.unicode();
|
|
}
|
|
// qDebug("set << %c", c.toLatin1());
|
|
|
|
} else if (c == QLatin1Char('+')) {
|
|
if (prev)
|
|
prev->assertion |= OneOrMore;
|
|
} else if (c == QLatin1Char('*')) {
|
|
if (prev)
|
|
prev->assertion |= ZeroOrMore;
|
|
} else if (c == QLatin1Char('?')) {
|
|
if (prev)
|
|
prev->assertion |= ZeroOrOne;
|
|
} else {
|
|
nfa = new QNFA;
|
|
|
|
if (c.isLetter() && !cs) {
|
|
nfa->c << c.toLower().unicode() << c.toUpper().unicode();
|
|
} else {
|
|
nfa->c << c.unicode();
|
|
}
|
|
|
|
if (prev)
|
|
prev->out.next = nfa;
|
|
|
|
prev = nfa;
|
|
}
|
|
|
|
if (!first)
|
|
first = prev;
|
|
}
|
|
|
|
if (end) {
|
|
*end = prev;
|
|
}
|
|
|
|
return first;
|
|
}
|
|
|
|
bool plain(const QString &word, QString *dest) {
|
|
if (dest)
|
|
dest->clear();
|
|
|
|
for (int i = 0; i < word.length(); i++) {
|
|
QChar c = word.at(i);
|
|
|
|
if (c == QLatin1Char('\\')) {
|
|
if (dest && ((i + 1) < word.length())) {
|
|
c = word.at(++i);
|
|
|
|
if (c == QLatin1Char('n'))
|
|
dest->append('\n');
|
|
else if (c == QLatin1Char('t'))
|
|
dest->append('\t');
|
|
else if (c == QLatin1Char('r'))
|
|
dest->append('\r');
|
|
else
|
|
dest->append(c);
|
|
}
|
|
} else if (c == QLatin1Char('[') || c == QLatin1Char(']') ||
|
|
c == QLatin1Char('+') || c == QLatin1Char('*') ||
|
|
c == QLatin1Char('?') || c == QLatin1Char('$')) {
|
|
if (dest)
|
|
dest->clear();
|
|
|
|
return false;
|
|
} else {
|
|
|
|
if (dest)
|
|
dest->append(c);
|
|
}
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
void addWord(QCharTree &tree, const QString &w, int action, bool cs) {
|
|
// qDebug("Adding word to char tree : %s", qPrintable(w));
|
|
|
|
if (cs) {
|
|
quint16 u = w.at(0).unicode();
|
|
QCharTree::iterator it = tree.find(u), tmp;
|
|
|
|
if (it == tree.end())
|
|
it = tree.insert(u, QCharTreeNode(u));
|
|
|
|
for (int i = 1; i < w.length(); i++) {
|
|
u = w.at(i).unicode();
|
|
|
|
// qDebug("char %c", w.at(i).toLatin1());
|
|
|
|
tmp = it->next.find(u);
|
|
|
|
if (tmp == it->next.end())
|
|
tmp = it->next.insert(u, QCharTreeNode(u));
|
|
|
|
it = tmp;
|
|
}
|
|
|
|
// add action handler
|
|
QCharTreeNode node;
|
|
node.value.action = action;
|
|
|
|
it->next[0] = node;
|
|
} else if (0) {
|
|
QChar c = w.at(0);
|
|
quint16 u = c.unicode();
|
|
|
|
QCharTree::iterator it, tmp;
|
|
QList<QCharTree::iterator> l, ltmp;
|
|
|
|
if (c.isLetter()) {
|
|
u = c.toLower().unicode();
|
|
tmp = tree.find(u);
|
|
|
|
if (tmp == tree.end())
|
|
tmp = tree.insert(u, QCharTreeNode(u));
|
|
|
|
l << tmp;
|
|
|
|
u = c.toUpper().unicode();
|
|
tmp = tree.find(u);
|
|
|
|
if (tmp == tree.end())
|
|
tmp = tree.insert(u, QCharTreeNode(u));
|
|
|
|
l << tmp;
|
|
} else {
|
|
tmp = tree.find(u);
|
|
|
|
if (tmp == tree.end())
|
|
tmp = tree.insert(u, QCharTreeNode(u));
|
|
|
|
l << tmp;
|
|
}
|
|
|
|
for (int i = 1; i < w.length(); ++i) {
|
|
c = w.at(i);
|
|
QList<QChar> lc;
|
|
|
|
if (c.isLetter())
|
|
lc << c.toLower() << c.toUpper();
|
|
else
|
|
lc << c;
|
|
|
|
foreach (c, lc) {
|
|
u = c.unicode();
|
|
|
|
foreach (it, l) {
|
|
tmp = it->next.find(u);
|
|
|
|
if (tmp == it->next.end())
|
|
tmp = it->next.insert(u, QCharTreeNode(u));
|
|
|
|
ltmp << tmp;
|
|
}
|
|
}
|
|
|
|
l = ltmp;
|
|
}
|
|
|
|
// add action handler
|
|
QCharTreeNode node;
|
|
node.value.action = action;
|
|
|
|
foreach (it, l)
|
|
it->next[0] = node;
|
|
}
|
|
}
|
|
|
|
void squeeze(QNFA *nfa) {
|
|
squeeze(nfa->tree);
|
|
|
|
if (nfa->type & Match) {
|
|
if (nfa->out.branch)
|
|
for (int i = 0; i < nfa->out.branch->count(); ++i)
|
|
squeeze(nfa->out.branch->at(i));
|
|
|
|
} else if (nfa->out.next) {
|
|
squeeze(nfa->out.next);
|
|
}
|
|
}
|
|
|
|
void squeeze(QCharTreeLevel &lvl) {
|
|
lvl.squeeze();
|
|
|
|
QCharTreeLevel::iterator it = lvl.begin();
|
|
|
|
while (it != lvl.end())
|
|
squeeze((it++)->next);
|
|
}
|