| /* |
| ** 2008 Nov 28 |
| ** |
| ** The author disclaims copyright to this source code. In place of |
| ** a legal notice, here is a blessing: |
| ** |
| ** May you do good and not evil. |
| ** May you find forgiveness for yourself and forgive others. |
| ** May you share freely, never taking more than you give. |
| ** |
| ****************************************************************************** |
| ** |
| ** This module contains code that implements a parser for fts3 query strings |
| ** (the right-hand argument to the MATCH operator). Because the supported |
| ** syntax is relatively simple, the whole tokenizer/parser system is |
| ** hand-coded. |
| */ |
| #if !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) |
| |
| /* |
| ** By default, this module parses the legacy syntax that has been |
| ** traditionally used by fts3. Or, if SQLITE_ENABLE_FTS3_PARENTHESIS |
| ** is defined, then it uses the new syntax. The differences between |
| ** the new and the old syntaxes are: |
| ** |
| ** a) The new syntax supports parenthesis. The old does not. |
| ** |
| ** b) The new syntax supports the AND and NOT operators. The old does not. |
| ** |
| ** c) The old syntax supports the "-" token qualifier. This is not |
| ** supported by the new syntax (it is replaced by the NOT operator). |
| ** |
| ** d) When using the old syntax, the OR operator has a greater precedence |
| ** than an implicit AND. When using the new, both implicity and explicit |
| ** AND operators have a higher precedence than OR. |
| ** |
| ** If compiled with SQLITE_TEST defined, then this module exports the |
| ** symbol "int sqlite3_fts3_enable_parentheses". Setting this variable |
| ** to zero causes the module to use the old syntax. If it is set to |
| ** non-zero the new syntax is activated. This is so both syntaxes can |
| ** be tested using a single build of testfixture. |
| ** |
| ** The following describes the syntax supported by the fts3 MATCH |
| ** operator in a similar format to that used by the lemon parser |
| ** generator. This module does not use actually lemon, it uses a |
| ** custom parser. |
| ** |
| ** query ::= andexpr (OR andexpr)*. |
| ** |
| ** andexpr ::= notexpr (AND? notexpr)*. |
| ** |
| ** notexpr ::= nearexpr (NOT nearexpr|-TOKEN)*. |
| ** notexpr ::= LP query RP. |
| ** |
| ** nearexpr ::= phrase (NEAR distance_opt nearexpr)*. |
| ** |
| ** distance_opt ::= . |
| ** distance_opt ::= / INTEGER. |
| ** |
| ** phrase ::= TOKEN. |
| ** phrase ::= COLUMN:TOKEN. |
| ** phrase ::= "TOKEN TOKEN TOKEN...". |
| */ |
| |
| #ifdef SQLITE_TEST |
| int sqlite3_fts3_enable_parentheses = 0; |
| #else |
| # ifdef SQLITE_ENABLE_FTS3_PARENTHESIS |
| # define sqlite3_fts3_enable_parentheses 1 |
| # else |
| # define sqlite3_fts3_enable_parentheses 0 |
| # endif |
| #endif |
| |
| /* |
| ** Default span for NEAR operators. |
| */ |
| #define SQLITE_FTS3_DEFAULT_NEAR_PARAM 10 |
| |
| #include "fts3Int.h" |
| #include <string.h> |
| #include <assert.h> |
| |
| typedef struct ParseContext ParseContext; |
| struct ParseContext { |
| sqlite3_tokenizer *pTokenizer; /* Tokenizer module */ |
| const char **azCol; /* Array of column names for fts3 table */ |
| int nCol; /* Number of entries in azCol[] */ |
| int iDefaultCol; /* Default column to query */ |
| sqlite3_context *pCtx; /* Write error message here */ |
| int nNest; /* Number of nested brackets */ |
| }; |
| |
| /* |
| ** This function is equivalent to the standard isspace() function. |
| ** |
| ** The standard isspace() can be awkward to use safely, because although it |
| ** is defined to accept an argument of type int, its behaviour when passed |
| ** an integer that falls outside of the range of the unsigned char type |
| ** is undefined (and sometimes, "undefined" means segfault). This wrapper |
| ** is defined to accept an argument of type char, and always returns 0 for |
| ** any values that fall outside of the range of the unsigned char type (i.e. |
| ** negative values). |
| */ |
| static int fts3isspace(char c){ |
| return c==' ' || c=='\t' || c=='\n' || c=='\r' || c=='\v' || c=='\f'; |
| } |
| |
| /* |
| ** Allocate nByte bytes of memory using sqlite3_malloc(). If successful, |
| ** zero the memory before returning a pointer to it. If unsuccessful, |
| ** return NULL. |
| */ |
| static void *fts3MallocZero(int nByte){ |
| void *pRet = sqlite3_malloc(nByte); |
| if( pRet ) memset(pRet, 0, nByte); |
| return pRet; |
| } |
| |
| |
| /* |
| ** Extract the next token from buffer z (length n) using the tokenizer |
| ** and other information (column names etc.) in pParse. Create an Fts3Expr |
| ** structure of type FTSQUERY_PHRASE containing a phrase consisting of this |
| ** single token and set *ppExpr to point to it. If the end of the buffer is |
| ** reached before a token is found, set *ppExpr to zero. It is the |
| ** responsibility of the caller to eventually deallocate the allocated |
| ** Fts3Expr structure (if any) by passing it to sqlite3_free(). |
| ** |
| ** Return SQLITE_OK if successful, or SQLITE_NOMEM if a memory allocation |
| ** fails. |
| */ |
| static int getNextToken( |
| ParseContext *pParse, /* fts3 query parse context */ |
| int iCol, /* Value for Fts3Phrase.iColumn */ |
| const char *z, int n, /* Input string */ |
| Fts3Expr **ppExpr, /* OUT: expression */ |
| int *pnConsumed /* OUT: Number of bytes consumed */ |
| ){ |
| sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; |
| sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; |
| int rc; |
| sqlite3_tokenizer_cursor *pCursor; |
| Fts3Expr *pRet = 0; |
| int nConsumed = 0; |
| |
| rc = pModule->xOpen(pTokenizer, z, n, &pCursor); |
| if( rc==SQLITE_OK ){ |
| const char *zToken; |
| int nToken, iStart, iEnd, iPosition; |
| int nByte; /* total space to allocate */ |
| |
| pCursor->pTokenizer = pTokenizer; |
| rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition); |
| |
| if( rc==SQLITE_OK ){ |
| nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken; |
| pRet = (Fts3Expr *)fts3MallocZero(nByte); |
| if( !pRet ){ |
| rc = SQLITE_NOMEM; |
| }else{ |
| pRet->eType = FTSQUERY_PHRASE; |
| pRet->pPhrase = (Fts3Phrase *)&pRet[1]; |
| pRet->pPhrase->nToken = 1; |
| pRet->pPhrase->iColumn = iCol; |
| pRet->pPhrase->aToken[0].n = nToken; |
| pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1]; |
| memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken); |
| |
| if( iEnd<n && z[iEnd]=='*' ){ |
| pRet->pPhrase->aToken[0].isPrefix = 1; |
| iEnd++; |
| } |
| if( !sqlite3_fts3_enable_parentheses && iStart>0 && z[iStart-1]=='-' ){ |
| pRet->pPhrase->isNot = 1; |
| } |
| } |
| nConsumed = iEnd; |
| } |
| |
| pModule->xClose(pCursor); |
| } |
| |
| *pnConsumed = nConsumed; |
| *ppExpr = pRet; |
| return rc; |
| } |
| |
| |
| /* |
| ** Enlarge a memory allocation. If an out-of-memory allocation occurs, |
| ** then free the old allocation. |
| */ |
| static void *fts3ReallocOrFree(void *pOrig, int nNew){ |
| void *pRet = sqlite3_realloc(pOrig, nNew); |
| if( !pRet ){ |
| sqlite3_free(pOrig); |
| } |
| return pRet; |
| } |
| |
| /* |
| ** Buffer zInput, length nInput, contains the contents of a quoted string |
| ** that appeared as part of an fts3 query expression. Neither quote character |
| ** is included in the buffer. This function attempts to tokenize the entire |
| ** input buffer and create an Fts3Expr structure of type FTSQUERY_PHRASE |
| ** containing the results. |
| ** |
| ** If successful, SQLITE_OK is returned and *ppExpr set to point at the |
| ** allocated Fts3Expr structure. Otherwise, either SQLITE_NOMEM (out of memory |
| ** error) or SQLITE_ERROR (tokenization error) is returned and *ppExpr set |
| ** to 0. |
| */ |
| static int getNextString( |
| ParseContext *pParse, /* fts3 query parse context */ |
| const char *zInput, int nInput, /* Input string */ |
| Fts3Expr **ppExpr /* OUT: expression */ |
| ){ |
| sqlite3_tokenizer *pTokenizer = pParse->pTokenizer; |
| sqlite3_tokenizer_module const *pModule = pTokenizer->pModule; |
| int rc; |
| Fts3Expr *p = 0; |
| sqlite3_tokenizer_cursor *pCursor = 0; |
| char *zTemp = 0; |
| int nTemp = 0; |
| |
| rc = pModule->xOpen(pTokenizer, zInput, nInput, &pCursor); |
| if( rc==SQLITE_OK ){ |
| int ii; |
| pCursor->pTokenizer = pTokenizer; |
| for(ii=0; rc==SQLITE_OK; ii++){ |
| const char *zToken; |
| int nToken, iBegin, iEnd, iPos; |
| rc = pModule->xNext(pCursor, &zToken, &nToken, &iBegin, &iEnd, &iPos); |
| if( rc==SQLITE_OK ){ |
| int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); |
| p = fts3ReallocOrFree(p, nByte+ii*sizeof(Fts3PhraseToken)); |
| zTemp = fts3ReallocOrFree(zTemp, nTemp + nToken); |
| if( !p || !zTemp ){ |
| goto no_mem; |
| } |
| if( ii==0 ){ |
| memset(p, 0, nByte); |
| p->pPhrase = (Fts3Phrase *)&p[1]; |
| } |
| p->pPhrase = (Fts3Phrase *)&p[1]; |
| memset(&p->pPhrase->aToken[ii], 0, sizeof(Fts3PhraseToken)); |
| p->pPhrase->nToken = ii+1; |
| p->pPhrase->aToken[ii].n = nToken; |
| memcpy(&zTemp[nTemp], zToken, nToken); |
| nTemp += nToken; |
| if( iEnd<nInput && zInput[iEnd]=='*' ){ |
| p->pPhrase->aToken[ii].isPrefix = 1; |
| }else{ |
| p->pPhrase->aToken[ii].isPrefix = 0; |
| } |
| } |
| } |
| |
| pModule->xClose(pCursor); |
| pCursor = 0; |
| } |
| |
| if( rc==SQLITE_DONE ){ |
| int jj; |
| char *zNew = NULL; |
| int nNew = 0; |
| int nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase); |
| nByte += (p?(p->pPhrase->nToken-1):0) * sizeof(Fts3PhraseToken); |
| p = fts3ReallocOrFree(p, nByte + nTemp); |
| if( !p ){ |
| goto no_mem; |
| } |
| if( zTemp ){ |
| zNew = &(((char *)p)[nByte]); |
| memcpy(zNew, zTemp, nTemp); |
| }else{ |
| memset(p, 0, nByte+nTemp); |
| } |
| p->pPhrase = (Fts3Phrase *)&p[1]; |
| for(jj=0; jj<p->pPhrase->nToken; jj++){ |
| p->pPhrase->aToken[jj].z = &zNew[nNew]; |
| nNew += p->pPhrase->aToken[jj].n; |
| } |
| sqlite3_free(zTemp); |
| p->eType = FTSQUERY_PHRASE; |
| p->pPhrase->iColumn = pParse->iDefaultCol; |
| rc = SQLITE_OK; |
| } |
| |
| *ppExpr = p; |
| return rc; |
| no_mem: |
| |
| if( pCursor ){ |
| pModule->xClose(pCursor); |
| } |
| sqlite3_free(zTemp); |
| sqlite3_free(p); |
| *ppExpr = 0; |
| return SQLITE_NOMEM; |
| } |
| |
| /* |
| ** Function getNextNode(), which is called by fts3ExprParse(), may itself |
| ** call fts3ExprParse(). So this forward declaration is required. |
| */ |
| static int fts3ExprParse(ParseContext *, const char *, int, Fts3Expr **, int *); |
| |
| /* |
| ** The output variable *ppExpr is populated with an allocated Fts3Expr |
| ** structure, or set to 0 if the end of the input buffer is reached. |
| ** |
| ** Returns an SQLite error code. SQLITE_OK if everything works, SQLITE_NOMEM |
| ** if a malloc failure occurs, or SQLITE_ERROR if a parse error is encountered. |
| ** If SQLITE_ERROR is returned, pContext is populated with an error message. |
| */ |
| static int getNextNode( |
| ParseContext *pParse, /* fts3 query parse context */ |
| const char *z, int n, /* Input string */ |
| Fts3Expr **ppExpr, /* OUT: expression */ |
| int *pnConsumed /* OUT: Number of bytes consumed */ |
| ){ |
| static const struct Fts3Keyword { |
| char *z; /* Keyword text */ |
| unsigned char n; /* Length of the keyword */ |
| unsigned char parenOnly; /* Only valid in paren mode */ |
| unsigned char eType; /* Keyword code */ |
| } aKeyword[] = { |
| { "OR" , 2, 0, FTSQUERY_OR }, |
| { "AND", 3, 1, FTSQUERY_AND }, |
| { "NOT", 3, 1, FTSQUERY_NOT }, |
| { "NEAR", 4, 0, FTSQUERY_NEAR } |
| }; |
| int ii; |
| int iCol; |
| int iColLen; |
| int rc; |
| Fts3Expr *pRet = 0; |
| |
| const char *zInput = z; |
| int nInput = n; |
| |
| /* Skip over any whitespace before checking for a keyword, an open or |
| ** close bracket, or a quoted string. |
| */ |
| while( nInput>0 && fts3isspace(*zInput) ){ |
| nInput--; |
| zInput++; |
| } |
| if( nInput==0 ){ |
| return SQLITE_DONE; |
| } |
| |
| /* See if we are dealing with a keyword. */ |
| for(ii=0; ii<(int)(sizeof(aKeyword)/sizeof(struct Fts3Keyword)); ii++){ |
| const struct Fts3Keyword *pKey = &aKeyword[ii]; |
| |
| if( (pKey->parenOnly & ~sqlite3_fts3_enable_parentheses)!=0 ){ |
| continue; |
| } |
| |
| if( nInput>=pKey->n && 0==memcmp(zInput, pKey->z, pKey->n) ){ |
| int nNear = SQLITE_FTS3_DEFAULT_NEAR_PARAM; |
| int nKey = pKey->n; |
| char cNext; |
| |
| /* If this is a "NEAR" keyword, check for an explicit nearness. */ |
| if( pKey->eType==FTSQUERY_NEAR ){ |
| assert( nKey==4 ); |
| if( zInput[4]=='/' && zInput[5]>='0' && zInput[5]<='9' ){ |
| nNear = 0; |
| for(nKey=5; zInput[nKey]>='0' && zInput[nKey]<='9'; nKey++){ |
| nNear = nNear * 10 + (zInput[nKey] - '0'); |
| } |
| } |
| } |
| |
| /* At this point this is probably a keyword. But for that to be true, |
| ** the next byte must contain either whitespace, an open or close |
| ** parenthesis, a quote character, or EOF. |
| */ |
| cNext = zInput[nKey]; |
| if( fts3isspace(cNext) |
| || cNext=='"' || cNext=='(' || cNext==')' || cNext==0 |
| ){ |
| pRet = (Fts3Expr *)fts3MallocZero(sizeof(Fts3Expr)); |
| if( !pRet ){ |
| return SQLITE_NOMEM; |
| } |
| pRet->eType = pKey->eType; |
| pRet->nNear = nNear; |
| *ppExpr = pRet; |
| *pnConsumed = (int)((zInput - z) + nKey); |
| return SQLITE_OK; |
| } |
| |
| /* Turns out that wasn't a keyword after all. This happens if the |
| ** user has supplied a token such as "ORacle". Continue. |
| */ |
| } |
| } |
| |
| /* Check for an open bracket. */ |
| if( sqlite3_fts3_enable_parentheses ){ |
| if( *zInput=='(' ){ |
| int nConsumed; |
| pParse->nNest++; |
| rc = fts3ExprParse(pParse, &zInput[1], nInput-1, ppExpr, &nConsumed); |
| if( rc==SQLITE_OK && !*ppExpr ){ |
| rc = SQLITE_DONE; |
| } |
| *pnConsumed = (int)((zInput - z) + 1 + nConsumed); |
| return rc; |
| } |
| |
| /* Check for a close bracket. */ |
| if( *zInput==')' ){ |
| pParse->nNest--; |
| *pnConsumed = (int)((zInput - z) + 1); |
| return SQLITE_DONE; |
| } |
| } |
| |
| /* See if we are dealing with a quoted phrase. If this is the case, then |
| ** search for the closing quote and pass the whole string to getNextString() |
| ** for processing. This is easy to do, as fts3 has no syntax for escaping |
| ** a quote character embedded in a string. |
| */ |
| if( *zInput=='"' ){ |
| for(ii=1; ii<nInput && zInput[ii]!='"'; ii++); |
| *pnConsumed = (int)((zInput - z) + ii + 1); |
| if( ii==nInput ){ |
| return SQLITE_ERROR; |
| } |
| return getNextString(pParse, &zInput[1], ii-1, ppExpr); |
| } |
| |
| |
| /* If control flows to this point, this must be a regular token, or |
| ** the end of the input. Read a regular token using the sqlite3_tokenizer |
| ** interface. Before doing so, figure out if there is an explicit |
| ** column specifier for the token. |
| ** |
| ** TODO: Strangely, it is not possible to associate a column specifier |
| ** with a quoted phrase, only with a single token. Not sure if this was |
| ** an implementation artifact or an intentional decision when fts3 was |
| ** first implemented. Whichever it was, this module duplicates the |
| ** limitation. |
| */ |
| iCol = pParse->iDefaultCol; |
| iColLen = 0; |
| for(ii=0; ii<pParse->nCol; ii++){ |
| const char *zStr = pParse->azCol[ii]; |
| int nStr = (int)strlen(zStr); |
| if( nInput>nStr && zInput[nStr]==':' |
| && sqlite3_strnicmp(zStr, zInput, nStr)==0 |
| ){ |
| iCol = ii; |
| iColLen = (int)((zInput - z) + nStr + 1); |
| break; |
| } |
| } |
| rc = getNextToken(pParse, iCol, &z[iColLen], n-iColLen, ppExpr, pnConsumed); |
| *pnConsumed += iColLen; |
| return rc; |
| } |
| |
| /* |
| ** The argument is an Fts3Expr structure for a binary operator (any type |
| ** except an FTSQUERY_PHRASE). Return an integer value representing the |
| ** precedence of the operator. Lower values have a higher precedence (i.e. |
| ** group more tightly). For example, in the C language, the == operator |
| ** groups more tightly than ||, and would therefore have a higher precedence. |
| ** |
| ** When using the new fts3 query syntax (when SQLITE_ENABLE_FTS3_PARENTHESIS |
| ** is defined), the order of the operators in precedence from highest to |
| ** lowest is: |
| ** |
| ** NEAR |
| ** NOT |
| ** AND (including implicit ANDs) |
| ** OR |
| ** |
| ** Note that when using the old query syntax, the OR operator has a higher |
| ** precedence than the AND operator. |
| */ |
| static int opPrecedence(Fts3Expr *p){ |
| assert( p->eType!=FTSQUERY_PHRASE ); |
| if( sqlite3_fts3_enable_parentheses ){ |
| return p->eType; |
| }else if( p->eType==FTSQUERY_NEAR ){ |
| return 1; |
| }else if( p->eType==FTSQUERY_OR ){ |
| return 2; |
| } |
| assert( p->eType==FTSQUERY_AND ); |
| return 3; |
| } |
| |
| /* |
| ** Argument ppHead contains a pointer to the current head of a query |
| ** expression tree being parsed. pPrev is the expression node most recently |
| ** inserted into the tree. This function adds pNew, which is always a binary |
| ** operator node, into the expression tree based on the relative precedence |
| ** of pNew and the existing nodes of the tree. This may result in the head |
| ** of the tree changing, in which case *ppHead is set to the new root node. |
| */ |
| static void insertBinaryOperator( |
| Fts3Expr **ppHead, /* Pointer to the root node of a tree */ |
| Fts3Expr *pPrev, /* Node most recently inserted into the tree */ |
| Fts3Expr *pNew /* New binary node to insert into expression tree */ |
| ){ |
| Fts3Expr *pSplit = pPrev; |
| while( pSplit->pParent && opPrecedence(pSplit->pParent)<=opPrecedence(pNew) ){ |
| pSplit = pSplit->pParent; |
| } |
| |
| if( pSplit->pParent ){ |
| assert( pSplit->pParent->pRight==pSplit ); |
| pSplit->pParent->pRight = pNew; |
| pNew->pParent = pSplit->pParent; |
| }else{ |
| *ppHead = pNew; |
| } |
| pNew->pLeft = pSplit; |
| pSplit->pParent = pNew; |
| } |
| |
| /* |
| ** Parse the fts3 query expression found in buffer z, length n. This function |
| ** returns either when the end of the buffer is reached or an unmatched |
| ** closing bracket - ')' - is encountered. |
| ** |
| ** If successful, SQLITE_OK is returned, *ppExpr is set to point to the |
| ** parsed form of the expression and *pnConsumed is set to the number of |
| ** bytes read from buffer z. Otherwise, *ppExpr is set to 0 and SQLITE_NOMEM |
| ** (out of memory error) or SQLITE_ERROR (parse error) is returned. |
| */ |
| static int fts3ExprParse( |
| ParseContext *pParse, /* fts3 query parse context */ |
| const char *z, int n, /* Text of MATCH query */ |
| Fts3Expr **ppExpr, /* OUT: Parsed query structure */ |
| int *pnConsumed /* OUT: Number of bytes consumed */ |
| ){ |
| Fts3Expr *pRet = 0; |
| Fts3Expr *pPrev = 0; |
| Fts3Expr *pNotBranch = 0; /* Only used in legacy parse mode */ |
| int nIn = n; |
| const char *zIn = z; |
| int rc = SQLITE_OK; |
| int isRequirePhrase = 1; |
| |
| while( rc==SQLITE_OK ){ |
| Fts3Expr *p = 0; |
| int nByte = 0; |
| rc = getNextNode(pParse, zIn, nIn, &p, &nByte); |
| if( rc==SQLITE_OK ){ |
| int isPhrase; |
| |
| if( !sqlite3_fts3_enable_parentheses |
| && p->eType==FTSQUERY_PHRASE && p->pPhrase->isNot |
| ){ |
| /* Create an implicit NOT operator. */ |
| Fts3Expr *pNot = fts3MallocZero(sizeof(Fts3Expr)); |
| if( !pNot ){ |
| sqlite3Fts3ExprFree(p); |
| rc = SQLITE_NOMEM; |
| goto exprparse_out; |
| } |
| pNot->eType = FTSQUERY_NOT; |
| pNot->pRight = p; |
| if( pNotBranch ){ |
| pNot->pLeft = pNotBranch; |
| } |
| pNotBranch = pNot; |
| p = pPrev; |
| }else{ |
| int eType = p->eType; |
| assert( eType!=FTSQUERY_PHRASE || !p->pPhrase->isNot ); |
| isPhrase = (eType==FTSQUERY_PHRASE || p->pLeft); |
| |
| /* The isRequirePhrase variable is set to true if a phrase or |
| ** an expression contained in parenthesis is required. If a |
| ** binary operator (AND, OR, NOT or NEAR) is encounted when |
| ** isRequirePhrase is set, this is a syntax error. |
| */ |
| if( !isPhrase && isRequirePhrase ){ |
| sqlite3Fts3ExprFree(p); |
| rc = SQLITE_ERROR; |
| goto exprparse_out; |
| } |
| |
| if( isPhrase && !isRequirePhrase ){ |
| /* Insert an implicit AND operator. */ |
| Fts3Expr *pAnd; |
| assert( pRet && pPrev ); |
| pAnd = fts3MallocZero(sizeof(Fts3Expr)); |
| if( !pAnd ){ |
| sqlite3Fts3ExprFree(p); |
| rc = SQLITE_NOMEM; |
| goto exprparse_out; |
| } |
| pAnd->eType = FTSQUERY_AND; |
| insertBinaryOperator(&pRet, pPrev, pAnd); |
| pPrev = pAnd; |
| } |
| |
| /* This test catches attempts to make either operand of a NEAR |
| ** operator something other than a phrase. For example, either of |
| ** the following: |
| ** |
| ** (bracketed expression) NEAR phrase |
| ** phrase NEAR (bracketed expression) |
| ** |
| ** Return an error in either case. |
| */ |
| if( pPrev && ( |
| (eType==FTSQUERY_NEAR && !isPhrase && pPrev->eType!=FTSQUERY_PHRASE) |
| || (eType!=FTSQUERY_PHRASE && isPhrase && pPrev->eType==FTSQUERY_NEAR) |
| )){ |
| sqlite3Fts3ExprFree(p); |
| rc = SQLITE_ERROR; |
| goto exprparse_out; |
| } |
| |
| if( isPhrase ){ |
| if( pRet ){ |
| assert( pPrev && pPrev->pLeft && pPrev->pRight==0 ); |
| pPrev->pRight = p; |
| p->pParent = pPrev; |
| }else{ |
| pRet = p; |
| } |
| }else{ |
| insertBinaryOperator(&pRet, pPrev, p); |
| } |
| isRequirePhrase = !isPhrase; |
| } |
| assert( nByte>0 ); |
| } |
| assert( rc!=SQLITE_OK || (nByte>0 && nByte<=nIn) ); |
| nIn -= nByte; |
| zIn += nByte; |
| pPrev = p; |
| } |
| |
| if( rc==SQLITE_DONE && pRet && isRequirePhrase ){ |
| rc = SQLITE_ERROR; |
| } |
| |
| if( rc==SQLITE_DONE ){ |
| rc = SQLITE_OK; |
| if( !sqlite3_fts3_enable_parentheses && pNotBranch ){ |
| if( !pRet ){ |
| rc = SQLITE_ERROR; |
| }else{ |
| Fts3Expr *pIter = pNotBranch; |
| while( pIter->pLeft ){ |
| pIter = pIter->pLeft; |
| } |
| pIter->pLeft = pRet; |
| pRet = pNotBranch; |
| } |
| } |
| } |
| *pnConsumed = n - nIn; |
| |
| exprparse_out: |
| if( rc!=SQLITE_OK ){ |
| sqlite3Fts3ExprFree(pRet); |
| sqlite3Fts3ExprFree(pNotBranch); |
| pRet = 0; |
| } |
| *ppExpr = pRet; |
| return rc; |
| } |
| |
| /* |
| ** Parameters z and n contain a pointer to and length of a buffer containing |
| ** an fts3 query expression, respectively. This function attempts to parse the |
| ** query expression and create a tree of Fts3Expr structures representing the |
| ** parsed expression. If successful, *ppExpr is set to point to the head |
| ** of the parsed expression tree and SQLITE_OK is returned. If an error |
| ** occurs, either SQLITE_NOMEM (out-of-memory error) or SQLITE_ERROR (parse |
| ** error) is returned and *ppExpr is set to 0. |
| ** |
| ** If parameter n is a negative number, then z is assumed to point to a |
| ** nul-terminated string and the length is determined using strlen(). |
| ** |
| ** The first parameter, pTokenizer, is passed the fts3 tokenizer module to |
| ** use to normalize query tokens while parsing the expression. The azCol[] |
| ** array, which is assumed to contain nCol entries, should contain the names |
| ** of each column in the target fts3 table, in order from left to right. |
| ** Column names must be nul-terminated strings. |
| ** |
| ** The iDefaultCol parameter should be passed the index of the table column |
| ** that appears on the left-hand-side of the MATCH operator (the default |
| ** column to match against for tokens for which a column name is not explicitly |
| ** specified as part of the query string), or -1 if tokens may by default |
| ** match any table column. |
| */ |
| int sqlite3Fts3ExprParse( |
| sqlite3_tokenizer *pTokenizer, /* Tokenizer module */ |
| char **azCol, /* Array of column names for fts3 table */ |
| int nCol, /* Number of entries in azCol[] */ |
| int iDefaultCol, /* Default column to query */ |
| const char *z, int n, /* Text of MATCH query */ |
| Fts3Expr **ppExpr /* OUT: Parsed query structure */ |
| ){ |
| int nParsed; |
| int rc; |
| ParseContext sParse; |
| sParse.pTokenizer = pTokenizer; |
| sParse.azCol = (const char **)azCol; |
| sParse.nCol = nCol; |
| sParse.iDefaultCol = iDefaultCol; |
| sParse.nNest = 0; |
| if( z==0 ){ |
| *ppExpr = 0; |
| return SQLITE_OK; |
| } |
| if( n<0 ){ |
| n = (int)strlen(z); |
| } |
| rc = fts3ExprParse(&sParse, z, n, ppExpr, &nParsed); |
| |
| /* Check for mismatched parenthesis */ |
| if( rc==SQLITE_OK && sParse.nNest ){ |
| rc = SQLITE_ERROR; |
| sqlite3Fts3ExprFree(*ppExpr); |
| *ppExpr = 0; |
| } |
| |
| return rc; |
| } |
| |
| /* |
| ** Free a parsed fts3 query expression allocated by sqlite3Fts3ExprParse(). |
| */ |
| void sqlite3Fts3ExprFree(Fts3Expr *p){ |
| if( p ){ |
| sqlite3Fts3ExprFree(p->pLeft); |
| sqlite3Fts3ExprFree(p->pRight); |
| sqlite3_free(p->aDoclist); |
| sqlite3_free(p); |
| } |
| } |
| |
| /**************************************************************************** |
| ***************************************************************************** |
| ** Everything after this point is just test code. |
| */ |
| |
| #ifdef SQLITE_TEST |
| |
| #include <stdio.h> |
| |
| /* |
| ** Function to query the hash-table of tokenizers (see README.tokenizers). |
| */ |
| static int queryTestTokenizer( |
| sqlite3 *db, |
| const char *zName, |
| const sqlite3_tokenizer_module **pp |
| ){ |
| int rc; |
| sqlite3_stmt *pStmt; |
| const char zSql[] = "SELECT fts3_tokenizer(?)"; |
| |
| *pp = 0; |
| rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); |
| if( rc!=SQLITE_OK ){ |
| return rc; |
| } |
| |
| sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); |
| if( SQLITE_ROW==sqlite3_step(pStmt) ){ |
| if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ |
| memcpy((void *)pp, sqlite3_column_blob(pStmt, 0), sizeof(*pp)); |
| } |
| } |
| |
| return sqlite3_finalize(pStmt); |
| } |
| |
| /* |
| ** Return a pointer to a buffer containing a text representation of the |
| ** expression passed as the first argument. The buffer is obtained from |
| ** sqlite3_malloc(). It is the responsibility of the caller to use |
| ** sqlite3_free() to release the memory. If an OOM condition is encountered, |
| ** NULL is returned. |
| ** |
| ** If the second argument is not NULL, then its contents are prepended to |
| ** the returned expression text and then freed using sqlite3_free(). |
| */ |
| static char *exprToString(Fts3Expr *pExpr, char *zBuf){ |
| switch( pExpr->eType ){ |
| case FTSQUERY_PHRASE: { |
| Fts3Phrase *pPhrase = pExpr->pPhrase; |
| int i; |
| zBuf = sqlite3_mprintf( |
| "%zPHRASE %d %d", zBuf, pPhrase->iColumn, pPhrase->isNot); |
| for(i=0; zBuf && i<pPhrase->nToken; i++){ |
| zBuf = sqlite3_mprintf("%z %.*s%s", zBuf, |
| pPhrase->aToken[i].n, pPhrase->aToken[i].z, |
| (pPhrase->aToken[i].isPrefix?"+":"") |
| ); |
| } |
| return zBuf; |
| } |
| |
| case FTSQUERY_NEAR: |
| zBuf = sqlite3_mprintf("%zNEAR/%d ", zBuf, pExpr->nNear); |
| break; |
| case FTSQUERY_NOT: |
| zBuf = sqlite3_mprintf("%zNOT ", zBuf); |
| break; |
| case FTSQUERY_AND: |
| zBuf = sqlite3_mprintf("%zAND ", zBuf); |
| break; |
| case FTSQUERY_OR: |
| zBuf = sqlite3_mprintf("%zOR ", zBuf); |
| break; |
| } |
| |
| if( zBuf ) zBuf = sqlite3_mprintf("%z{", zBuf); |
| if( zBuf ) zBuf = exprToString(pExpr->pLeft, zBuf); |
| if( zBuf ) zBuf = sqlite3_mprintf("%z} {", zBuf); |
| |
| if( zBuf ) zBuf = exprToString(pExpr->pRight, zBuf); |
| if( zBuf ) zBuf = sqlite3_mprintf("%z}", zBuf); |
| |
| return zBuf; |
| } |
| |
| /* |
| ** This is the implementation of a scalar SQL function used to test the |
| ** expression parser. It should be called as follows: |
| ** |
| ** fts3_exprtest(<tokenizer>, <expr>, <column 1>, ...); |
| ** |
| ** The first argument, <tokenizer>, is the name of the fts3 tokenizer used |
| ** to parse the query expression (see README.tokenizers). The second argument |
| ** is the query expression to parse. Each subsequent argument is the name |
| ** of a column of the fts3 table that the query expression may refer to. |
| ** For example: |
| ** |
| ** SELECT fts3_exprtest('simple', 'Bill col2:Bloggs', 'col1', 'col2'); |
| */ |
| static void fts3ExprTest( |
| sqlite3_context *context, |
| int argc, |
| sqlite3_value **argv |
| ){ |
| sqlite3_tokenizer_module const *pModule = 0; |
| sqlite3_tokenizer *pTokenizer = 0; |
| int rc; |
| char **azCol = 0; |
| const char *zExpr; |
| int nExpr; |
| int nCol; |
| int ii; |
| Fts3Expr *pExpr; |
| char *zBuf = 0; |
| sqlite3 *db = sqlite3_context_db_handle(context); |
| |
| if( argc<3 ){ |
| sqlite3_result_error(context, |
| "Usage: fts3_exprtest(tokenizer, expr, col1, ...", -1 |
| ); |
| return; |
| } |
| |
| rc = queryTestTokenizer(db, |
| (const char *)sqlite3_value_text(argv[0]), &pModule); |
| if( rc==SQLITE_NOMEM ){ |
| sqlite3_result_error_nomem(context); |
| goto exprtest_out; |
| }else if( !pModule ){ |
| sqlite3_result_error(context, "No such tokenizer module", -1); |
| goto exprtest_out; |
| } |
| |
| rc = pModule->xCreate(0, 0, &pTokenizer); |
| assert( rc==SQLITE_NOMEM || rc==SQLITE_OK ); |
| if( rc==SQLITE_NOMEM ){ |
| sqlite3_result_error_nomem(context); |
| goto exprtest_out; |
| } |
| pTokenizer->pModule = pModule; |
| |
| zExpr = (const char *)sqlite3_value_text(argv[1]); |
| nExpr = sqlite3_value_bytes(argv[1]); |
| nCol = argc-2; |
| azCol = (char **)sqlite3_malloc(nCol*sizeof(char *)); |
| if( !azCol ){ |
| sqlite3_result_error_nomem(context); |
| goto exprtest_out; |
| } |
| for(ii=0; ii<nCol; ii++){ |
| azCol[ii] = (char *)sqlite3_value_text(argv[ii+2]); |
| } |
| |
| rc = sqlite3Fts3ExprParse( |
| pTokenizer, azCol, nCol, nCol, zExpr, nExpr, &pExpr |
| ); |
| if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ |
| sqlite3_result_error(context, "Error parsing expression", -1); |
| }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ |
| sqlite3_result_error_nomem(context); |
| }else{ |
| sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); |
| sqlite3_free(zBuf); |
| } |
| |
| sqlite3Fts3ExprFree(pExpr); |
| |
| exprtest_out: |
| if( pModule && pTokenizer ){ |
| rc = pModule->xDestroy(pTokenizer); |
| } |
| sqlite3_free(azCol); |
| } |
| |
| /* |
| ** Register the query expression parser test function fts3_exprtest() |
| ** with database connection db. |
| */ |
| int sqlite3Fts3ExprInitTestInterface(sqlite3* db){ |
| return sqlite3_create_function( |
| db, "fts3_exprtest", -1, SQLITE_UTF8, 0, fts3ExprTest, 0, 0 |
| ); |
| } |
| |
| #endif |
| #endif /* !defined(SQLITE_CORE) || defined(SQLITE_ENABLE_FTS3) */ |