sql >> Database teknologi >  >> RDS >> Mysql

parsing af brugerskrevne fuldtekstsøgningsforespørgsler ind i WHERE-klausulen i MySQL ved hjælp af PHP

Følgende kode består af klasserne Tokenizer, Token og QueryBuilder. Det er nok ikke den mest elegante løsning nogensinde, men den gør faktisk, hvad du bad om:

<?
// QueryBuilder Grammar:
// =====================
// SearchRule       := SimpleSearchRule { KeyWord }
// SimpleSearchRule := Expression | SimpleSearchRule { 'OR' Expression }
// Expression       := SimpleExpression | Expression { 'AND' SimpleExpression }
// SimpleExpression := '(' SimpleSearchRule ')' | FieldExpression

$input = 'from:me AND (to:john OR to:jenny) dinner party';

$fieldMapping = array(
    'id' => 'id',
    'from' => 'from',
    'to' => 'to',
    'title' => 'title',
    'description' => 'description',
    'time_created' => 'time_created'
);
$fullTextFields = array('title','description');

$qb = new QueryBuilder($fieldMapping, $fullTextFields);
try {
    echo $qb->parseSearchRule($input);
} catch(Exception $error) {
    echo 'Error occurred while parsing search query: <br/>'.$error->getMessage();
}

class Token {
    const   KEYWORD = 'KEYWORD',
            OPEN_PAR='OPEN_PAR',
            CLOSE_PAR='CLOSE_PAR',
            FIELD='FIELD',
            AND_OP='AND_OP',
            OR_OP='OR_OP';
    public $type;
    public $chars;
    public $position;

    function __construct($type,$chars,$position) {
        $this->type = $type;
        $this->chars = $chars;
        $this->position = $position;
    }

    function __toString() {
        return 'Token[ type='.$this->type.', chars='.$this->chars.', position='.$this->position.' ]';
    }
}

class Tokenizer {
    private $tokens = array();
    private $input;
    private $currentPosition;

    function __construct($input) {
        $this->input = trim($input);
        $this->currentPosition = 0;
    }

    /**
     * @return Token
     */
    function getToken() {
        if(count($this->tokens)==0) {
            $token = $this->nextToken();
            if($token==null) {
                return null;
            }
            array_push($this->tokens, $token);
        }
        return $this->tokens[0];
    }

    function consumeToken() {
        $token = $this->getToken();
        if($token==null) {
            return null;
        }
        array_shift($this->tokens);
        return $token;
    }

    protected function nextToken() {
        $reservedCharacters = '\:\s\(\)';
        $fieldExpr = '/^([^'.$reservedCharacters.']+)\:([^'.$reservedCharacters.']+)/';
        $keyWord = '/^([^'.$reservedCharacters.']+)/';
        $andOperator = '/^AND\s/';
        $orOperator = '/^OR\s/';
        // Remove whitespaces ..
        $whiteSpaces = '/^\s+/';
        $remaining = substr($this->input,$this->currentPosition);
        if(preg_match($whiteSpaces, $remaining, $matches)) {
            $this->currentPosition += strlen($matches[0]);
            $remaining = substr($this->input,$this->currentPosition);
        }
        if($remaining=='') {
            return null;
        }
        switch(substr($remaining,0,1)) {
            case '(':
                return new Token(Token::OPEN_PAR,'(',$this->currentPosition++);
            case ')':
                return new Token(Token::CLOSE_PAR,')',$this->currentPosition++);
        }
        if(preg_match($fieldExpr, $remaining, $matches)) {
            $token = new Token(Token::FIELD, $matches[0], $this->currentPosition);
            $this->currentPosition += strlen($matches[0]);
        } else if(preg_match($andOperator, $remaining, $matches)) {
            $token = new Token(Token::AND_OP, 'AND', $this->currentPosition);
            $this->currentPosition += 3;
        } else if(preg_match($orOperator, $remaining, $matches)) {
            $token = new Token(Token::OR_OP, 'OR', $this->currentPosition);
            $this->currentPosition += 2;
        } else if(preg_match($keyWord, $remaining, $matches)) {
            $token = new Token(Token::KEYWORD, $matches[0], $this->currentPosition);
            $this->currentPosition += strlen($matches[0]);
        } else throw new Exception('Unable to tokenize: '.$remaining);
        return $token;
    }
}

class QueryBuilder {
    private $fieldMapping;
    private $fulltextFields;

    function __construct($fieldMapping, $fulltextFields) {
        $this->fieldMapping = $fieldMapping;
        $this->fulltextFields = $fulltextFields;
    }

    function parseSearchRule($input) {
        $t = new Tokenizer($input);
        $token = $t->getToken();
        if($token==null) {
            return '';
        }
        $token = $t->getToken();
        if($token->type!=Token::KEYWORD) {
            $searchRule = $this->parseSimpleSearchRule($t);
        } else {
            $searchRule = '';
        }
        $keywords = '';
        while($token = $t->consumeToken()) {
            if($token->type!=Token::KEYWORD) {
                throw new Exception('Only keywords allowed at end of search rule.');
            }
            if($keywords!='') {
                $keywords .= ' ';
            }
            $keywords .= $token->chars;
        }
        if($keywords!='') {
            $matchClause = 'MATCH (`'.(implode('`,`',$this->fulltextFields)).'`) AGAINST (';
            $keywords = $matchClause.'\''.mysql_real_escape_string($keywords).'\' IN BOOLEAN MODE)';
            if($searchRule=='') {
                $searchRule = $keywords;
            } else {
                $searchRule = '('.$searchRule.') AND ('.$keywords.')';
            }
        }
        return $searchRule;
    }

    protected function parseSimpleSearchRule(Tokenizer $t) {
        $expressions = array();
        do {
            $repeat = false;
            $expressions[] = $this->parseExpression($t);
            $token = $t->getToken();
            if($token->type==Token::OR_OP) {
                $t->consumeToken();
                $repeat = true;
            }
        } while($repeat);
        return implode(' OR ', $expressions);
    }

    protected function parseExpression(Tokenizer $t) {
        $expressions = array();
        do {
            $repeat = false;
            $expressions[] = $this->parseSimpleExpression($t);
            $token = $t->getToken();
            if($token->type==Token::AND_OP) {
                $t->consumeToken();
                $repeat = true;
            }
        } while($repeat);
        return implode(' AND ', $expressions);
    }

    protected function parseSimpleExpression(Tokenizer $t) {
        $token = $t->consumeToken();
        if($token->type==Token::OPEN_PAR) {
            $spr = $this->parseSimpleSearchRule($t);
            $token = $t->consumeToken();
            if($token==null || $token->type!=Token::CLOSE_PAR) {
                throw new Exception('Expected closing parenthesis, found: '.$token->chars);
            }
            return '('.$spr.')';
        } else if($token->type==Token::FIELD) {
            $fieldVal = explode(':', $token->chars,2);
            if(isset($this->fieldMapping[$fieldVal[0]])) {
                return '`'.$this->fieldMapping[$fieldVal[0]].'` = \''.mysql_real_escape_string($fieldVal[1]).'\'';
            }
            throw new Exception('Unknown field selected: '.$token->chars);
        } else {
            throw new Exception('Expected opening parenthesis or field-expression, found: '.$token->chars);
        }
    }
}
?>

En mere korrekt løsning ville først bygge et parse-træ og derefter transformere det til en forespørgsel efter lidt yderligere analyse.



  1. Parse CSV og eksporter til Mysql-database i Grails

  2. SQL Server konverterer varbinary til streng

  3. mysql:hvordan afkortes længden af ​​et felt

  4. SQLSTATE[HY000] [1040] For mange forbindelser