123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305 |
- <?php
- // Notes (from the source file: oql-lexer.plex) - Romain
- //
- // The strval rule is a little bit cryptic.
- // This is due to both a bug in the lexer generator and the complexity of our need
- // The rule means: either a quoted string with ", or a quoted string with '
- // literal " (resp. ') must be escaped by a \
- // \ must be escaped by an additional \
- //
- // Here are the issues and limitation found in the lexer generator:
- // * Matching simple quotes is an issue, because regexp are not correctly escaped (and the ESC code is escaped itself)
- // Workaround: insert '.chr(39).' which will be a real ' in the end
- // * Matching an alternate regexp is an issue because you must specify "|^...."
- // and the regexp parser will not accept that syntax
- // Workaround: insert '.chr(94).' which will be a real ^
- //
- // Let's analyze an overview of the regexp, we have
- // 1) The strval rule in the lexer definition
- // /"([^\\"]|\\"|\\\\)*"|'.chr(94).chr(39).'([^\\'.chr(39).']|\\'.chr(39).'|\\\\)*'.chr(39).'/
- // 2) Becomes the php expression in the lexer
- // (note the escaped double quotes, hopefully having no effect, but showing where the issue is!)
- // $myRegexp = '/^\"([^\\\\\"]|\\\\\"|\\\\\\\\)*\"|'.chr(94).chr(39).'([^\\\\'.chr(39).']|\\\\'.chr(39).'|\\\\\\\\)*'.chr(39).'/';
- //
- // To be fixed in LexerGenerator/Parser.y, in doLongestMatch (doFirstMatch is ok)
- //
- //
- // Now, let's explain how the regexp has been designed.
- // Here is a simplified version, dealing with simple quotes, and based on the assumption that the lexer generator has been fixed!
- // The strval rule in the lexer definition
- // /'([^\\']*(\\')*(\\\\)*)*'/
- // This means anything containing \\ or \' or any other char but a standalone ' or \
- // This means ' or \ could not be found without a preceding \
- //
- class OQLLexerRaw
- {
- protected $data; // input string
- public $token; // token id
- public $value; // token string representation
- protected $line; // current line
- protected $count; // current column
- function __construct($data)
- {
- $this->data = $data;
- $this->count = 0;
- $this->line = 1;
- }
- /*!lex2php
- %input $this->data
- %counter $this->count
- %token $this->token
- %value $this->value
- %line $this->line
- %matchlongest 1
- whitespace = /[ \t\n]+/
- select = "SELECT"
- as_alias = "AS"
- where = "WHERE"
- join = "JOIN"
- on = "ON"
- coma = ","
- par_open = "("
- par_close = ")"
- math_div = "/"
- math_mult = "*"
- math_plus = "+"
- math_minus = "-"
- log_and = "AND"
- log_or = "OR"
- eq = "="
- not_eq = "!="
- gt = ">"
- lt = "<"
- ge = ">="
- le = "<="
- like = "LIKE"
- not_like = "NOT LIKE"
- in = "IN"
- not_in = "NOT IN"
- interval = "INTERVAL"
- f_if = "IF"
- f_elt = "ELT"
- f_coalesce = "COALESCE"
- f_concat = "CONCAT"
- f_substr = "SUBSTR"
- f_trim = "TRIM"
- f_date = "DATE"
- f_date_format = "DATE_FORMAT"
- f_current_date = "CURRENT_DATE"
- f_now = "NOW"
- f_time = "TIME"
- f_to_days = "TO_DAYS"
- f_from_days = "FROM_DAYS"
- f_year = "YEAR"
- f_month = "MONTH"
- f_day = "DAY"
- f_date_add = "DATE_ADD"
- f_date_sub = "DATE_SUB"
- f_round = "ROUND"
- f_floor = "FLOOR"
- numval = /[0-9]+|0x[0-9a-fA-F]+/
- strval = /"([^\\"]|\\"|\\\\)*"|'.chr(94).chr(39).'([^\\'.chr(39).']|\\'.chr(39).'|\\\\)*'.chr(39).'/
- name = /([_a-zA-Z][_a-zA-Z0-9]*|`[^`]+`)/
- dot = "."
- */
- /*!lex2php
- whitespace {
- return false;
- }
- select {
- $this->token = OQLParser::SELECT;
- }
- as_alias {
- $this->token = OQLParser::AS_ALIAS;
- }
- where {
- $this->token = OQLParser::WHERE;
- }
- join {
- $this->token = OQLParser::JOIN;
- }
- on {
- $this->token = OQLParser::ON;
- }
- math_div {
- $this->token = OQLParser::MATH_DIV;
- }
- math_mult {
- $this->token = OQLParser::MATH_MULT;
- }
- math_plus {
- $this->token = OQLParser::MATH_PLUS;
- }
- math_minus {
- $this->token = OQLParser::MATH_MINUS;
- }
- log_and {
- $this->token = OQLParser::LOG_AND;
- }
- log_or {
- $this->token = OQLParser::LOG_OR;
- }
- coma {
- $this->token = OQLParser::COMA;
- }
- par_open {
- $this->token = OQLParser::PAR_OPEN;
- }
- par_close {
- $this->token = OQLParser::PAR_CLOSE;
- }
- eq {
- $this->token = OQLParser::EQ;
- }
- not_eq {
- $this->token = OQLParser::NOT_EQ;
- }
- gt {
- $this->token = OQLParser::GT;
- }
- lt {
- $this->token = OQLParser::LT;
- }
- ge {
- $this->token = OQLParser::GE;
- }
- le {
- $this->token = OQLParser::LE;
- }
- like {
- $this->token = OQLParser::LIKE;
- }
- not_like {
- $this->token = OQLParser::NOT_LIKE;
- }
- in {
- $this->token = OQLParser::IN;
- }
- not_in {
- $this->token = OQLParser::NOT_IN;
- }
- interval {
- $this->token = OQLParser::INTERVAL;
- }
- f_if {
- $this->token = OQLParser::F_IF;
- }
- f_elt {
- $this->token = OQLParser::F_ELT;
- }
- f_coalesce {
- $this->token = OQLParser::F_COALESCE;
- }
- f_concat {
- $this->token = OQLParser::F_CONCAT;
- }
- f_substr {
- $this->token = OQLParser::F_SUBSTR;
- }
- f_trim {
- $this->token = OQLParser::F_TRIM;
- }
- f_date {
- $this->token = OQLParser::F_DATE;
- }
- f_date_format {
- $this->token = OQLParser::F_DATE_FORMAT;
- }
- f_current_date {
- $this->token = OQLParser::F_CURRENT_DATE;
- }
- f_now {
- $this->token = OQLParser::F_NOW;
- }
- f_time {
- $this->token = OQLParser::F_TIME;
- }
- f_to_days {
- $this->token = OQLParser::F_TO_DAYS;
- }
- f_from_days {
- $this->token = OQLParser::F_FROM_DAYS;
- }
- f_year {
- $this->token = OQLParser::F_YEAR;
- }
- f_month {
- $this->token = OQLParser::F_MONTH;
- }
- f_day {
- $this->token = OQLParser::F_DAY;
- }
- f_date_add {
- $this->token = OQLParser::F_DATE_ADD;
- }
- f_date_sub {
- $this->token = OQLParser::F_DATE_SUB;
- }
- f_round {
- $this->token = OQLParser::F_ROUND;
- }
- f_floor {
- $this->token = OQLParser::F_FLOOR;
- }
- numval {
- $this->token = OQLParser::NUMVAL;
- }
- strval {
- $this->token = OQLParser::STRVAL;
- }
- name {
- $this->token = OQLParser::NAME;
- }
- dot {
- $this->token = OQLParser::DOT;
- }
- */
- }
- define('UNEXPECTED_INPUT_AT_LINE', 'Unexpected input at line');
- class OQLLexerException extends OQLException
- {
- public function __construct($sInput, $iLine, $iCol, $sUnexpected)
- {
- parent::__construct("Syntax error", $sInput, $iLine, $iCol, $sUnexpected);
- }
- }
- class OQLLexer extends OQLLexerRaw
- {
- public function getTokenPos()
- {
- return max(0, $this->count - strlen($this->value));
- }
- function yylex()
- {
- try
- {
- return parent::yylex();
- }
- catch (Exception $e)
- {
- $sMessage = $e->getMessage();
- if (substr($sMessage, 0, strlen(UNEXPECTED_INPUT_AT_LINE)) == UNEXPECTED_INPUT_AT_LINE)
- {
- $sLineAndChar = substr($sMessage, strlen(UNEXPECTED_INPUT_AT_LINE));
- if (preg_match('#^([0-9]+): (.+)$#', $sLineAndChar, $aMatches))
- {
- $iLine = $aMatches[1];
- $sUnexpected = $aMatches[2];
- throw new OQLLexerException($this->data, $iLine, $this->count, $sUnexpected);
- }
- }
- // Default: forward the exception
- throw $e;
- }
- }
- }
- ?>
|