oql-lexer.plex 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305
  1. <?php
  2. // Notes (from the source file: oql-lexer.plex) - Romain
  3. //
  4. // The strval rule is a little bit cryptic.
  5. // This is due to both a bug in the lexer generator and the complexity of our need
  6. // The rule means: either a quoted string with ", or a quoted string with '
  7. // literal " (resp. ') must be escaped by a \
  8. // \ must be escaped by an additional \
  9. //
  10. // Here are the issues and limitation found in the lexer generator:
  11. // * Matching simple quotes is an issue, because regexp are not correctly escaped (and the ESC code is escaped itself)
  12. // Workaround: insert '.chr(39).' which will be a real ' in the end
  13. // * Matching an alternate regexp is an issue because you must specify "|^...."
  14. // and the regexp parser will not accept that syntax
  15. // Workaround: insert '.chr(94).' which will be a real ^
  16. //
  17. // Let's analyze an overview of the regexp, we have
  18. // 1) The strval rule in the lexer definition
  19. // /"([^\\"]|\\"|\\\\)*"|'.chr(94).chr(39).'([^\\'.chr(39).']|\\'.chr(39).'|\\\\)*'.chr(39).'/
  20. // 2) Becomes the php expression in the lexer
  21. // (note the escaped double quotes, hopefully having no effect, but showing where the issue is!)
  22. // $myRegexp = '/^\"([^\\\\\"]|\\\\\"|\\\\\\\\)*\"|'.chr(94).chr(39).'([^\\\\'.chr(39).']|\\\\'.chr(39).'|\\\\\\\\)*'.chr(39).'/';
  23. //
  24. // To be fixed in LexerGenerator/Parser.y, in doLongestMatch (doFirstMatch is ok)
  25. //
  26. //
  27. // Now, let's explain how the regexp has been designed.
  28. // Here is a simplified version, dealing with simple quotes, and based on the assumption that the lexer generator has been fixed!
  29. // The strval rule in the lexer definition
  30. // /'([^\\']*(\\')*(\\\\)*)*'/
  31. // This means anything containing \\ or \' or any other char but a standalone ' or \
  32. // This means ' or \ could not be found without a preceding \
  33. //
  34. class OQLLexerRaw
  35. {
  36. protected $data; // input string
  37. public $token; // token id
  38. public $value; // token string representation
  39. protected $line; // current line
  40. protected $count; // current column
  41. function __construct($data)
  42. {
  43. $this->data = $data;
  44. $this->count = 0;
  45. $this->line = 1;
  46. }
  47. /*!lex2php
  48. %input $this->data
  49. %counter $this->count
  50. %token $this->token
  51. %value $this->value
  52. %line $this->line
  53. %matchlongest 1
  54. whitespace = /[ \t\n]+/
  55. select = "SELECT"
  56. as_alias = "AS"
  57. where = "WHERE"
  58. join = "JOIN"
  59. on = "ON"
  60. coma = ","
  61. par_open = "("
  62. par_close = ")"
  63. math_div = "/"
  64. math_mult = "*"
  65. math_plus = "+"
  66. math_minus = "-"
  67. log_and = "AND"
  68. log_or = "OR"
  69. eq = "="
  70. not_eq = "!="
  71. gt = ">"
  72. lt = "<"
  73. ge = ">="
  74. le = "<="
  75. like = "LIKE"
  76. not_like = "NOT LIKE"
  77. in = "IN"
  78. not_in = "NOT IN"
  79. interval = "INTERVAL"
  80. f_if = "IF"
  81. f_elt = "ELT"
  82. f_coalesce = "COALESCE"
  83. f_concat = "CONCAT"
  84. f_substr = "SUBSTR"
  85. f_trim = "TRIM"
  86. f_date = "DATE"
  87. f_date_format = "DATE_FORMAT"
  88. f_current_date = "CURRENT_DATE"
  89. f_now = "NOW"
  90. f_time = "TIME"
  91. f_to_days = "TO_DAYS"
  92. f_from_days = "FROM_DAYS"
  93. f_year = "YEAR"
  94. f_month = "MONTH"
  95. f_day = "DAY"
  96. f_date_add = "DATE_ADD"
  97. f_date_sub = "DATE_SUB"
  98. f_round = "ROUND"
  99. f_floor = "FLOOR"
  100. numval = /[0-9]+|0x[0-9a-fA-F]+/
  101. strval = /"([^\\"]|\\"|\\\\)*"|'.chr(94).chr(39).'([^\\'.chr(39).']|\\'.chr(39).'|\\\\)*'.chr(39).'/
  102. name = /([_a-zA-Z][_a-zA-Z0-9]*|`[^`]+`)/
  103. dot = "."
  104. */
  105. /*!lex2php
  106. whitespace {
  107. return false;
  108. }
  109. select {
  110. $this->token = OQLParser::SELECT;
  111. }
  112. as_alias {
  113. $this->token = OQLParser::AS_ALIAS;
  114. }
  115. where {
  116. $this->token = OQLParser::WHERE;
  117. }
  118. join {
  119. $this->token = OQLParser::JOIN;
  120. }
  121. on {
  122. $this->token = OQLParser::ON;
  123. }
  124. math_div {
  125. $this->token = OQLParser::MATH_DIV;
  126. }
  127. math_mult {
  128. $this->token = OQLParser::MATH_MULT;
  129. }
  130. math_plus {
  131. $this->token = OQLParser::MATH_PLUS;
  132. }
  133. math_minus {
  134. $this->token = OQLParser::MATH_MINUS;
  135. }
  136. log_and {
  137. $this->token = OQLParser::LOG_AND;
  138. }
  139. log_or {
  140. $this->token = OQLParser::LOG_OR;
  141. }
  142. coma {
  143. $this->token = OQLParser::COMA;
  144. }
  145. par_open {
  146. $this->token = OQLParser::PAR_OPEN;
  147. }
  148. par_close {
  149. $this->token = OQLParser::PAR_CLOSE;
  150. }
  151. eq {
  152. $this->token = OQLParser::EQ;
  153. }
  154. not_eq {
  155. $this->token = OQLParser::NOT_EQ;
  156. }
  157. gt {
  158. $this->token = OQLParser::GT;
  159. }
  160. lt {
  161. $this->token = OQLParser::LT;
  162. }
  163. ge {
  164. $this->token = OQLParser::GE;
  165. }
  166. le {
  167. $this->token = OQLParser::LE;
  168. }
  169. like {
  170. $this->token = OQLParser::LIKE;
  171. }
  172. not_like {
  173. $this->token = OQLParser::NOT_LIKE;
  174. }
  175. in {
  176. $this->token = OQLParser::IN;
  177. }
  178. not_in {
  179. $this->token = OQLParser::NOT_IN;
  180. }
  181. interval {
  182. $this->token = OQLParser::INTERVAL;
  183. }
  184. f_if {
  185. $this->token = OQLParser::F_IF;
  186. }
  187. f_elt {
  188. $this->token = OQLParser::F_ELT;
  189. }
  190. f_coalesce {
  191. $this->token = OQLParser::F_COALESCE;
  192. }
  193. f_concat {
  194. $this->token = OQLParser::F_CONCAT;
  195. }
  196. f_substr {
  197. $this->token = OQLParser::F_SUBSTR;
  198. }
  199. f_trim {
  200. $this->token = OQLParser::F_TRIM;
  201. }
  202. f_date {
  203. $this->token = OQLParser::F_DATE;
  204. }
  205. f_date_format {
  206. $this->token = OQLParser::F_DATE_FORMAT;
  207. }
  208. f_current_date {
  209. $this->token = OQLParser::F_CURRENT_DATE;
  210. }
  211. f_now {
  212. $this->token = OQLParser::F_NOW;
  213. }
  214. f_time {
  215. $this->token = OQLParser::F_TIME;
  216. }
  217. f_to_days {
  218. $this->token = OQLParser::F_TO_DAYS;
  219. }
  220. f_from_days {
  221. $this->token = OQLParser::F_FROM_DAYS;
  222. }
  223. f_year {
  224. $this->token = OQLParser::F_YEAR;
  225. }
  226. f_month {
  227. $this->token = OQLParser::F_MONTH;
  228. }
  229. f_day {
  230. $this->token = OQLParser::F_DAY;
  231. }
  232. f_date_add {
  233. $this->token = OQLParser::F_DATE_ADD;
  234. }
  235. f_date_sub {
  236. $this->token = OQLParser::F_DATE_SUB;
  237. }
  238. f_round {
  239. $this->token = OQLParser::F_ROUND;
  240. }
  241. f_floor {
  242. $this->token = OQLParser::F_FLOOR;
  243. }
  244. numval {
  245. $this->token = OQLParser::NUMVAL;
  246. }
  247. strval {
  248. $this->token = OQLParser::STRVAL;
  249. }
  250. name {
  251. $this->token = OQLParser::NAME;
  252. }
  253. dot {
  254. $this->token = OQLParser::DOT;
  255. }
  256. */
  257. }
  258. define('UNEXPECTED_INPUT_AT_LINE', 'Unexpected input at line');
  259. class OQLLexerException extends OQLException
  260. {
  261. public function __construct($sInput, $iLine, $iCol, $sUnexpected)
  262. {
  263. parent::__construct("Syntax error", $sInput, $iLine, $iCol, $sUnexpected);
  264. }
  265. }
  266. class OQLLexer extends OQLLexerRaw
  267. {
  268. public function getTokenPos()
  269. {
  270. return max(0, $this->count - strlen($this->value));
  271. }
  272. function yylex()
  273. {
  274. try
  275. {
  276. return parent::yylex();
  277. }
  278. catch (Exception $e)
  279. {
  280. $sMessage = $e->getMessage();
  281. if (substr($sMessage, 0, strlen(UNEXPECTED_INPUT_AT_LINE)) == UNEXPECTED_INPUT_AT_LINE)
  282. {
  283. $sLineAndChar = substr($sMessage, strlen(UNEXPECTED_INPUT_AT_LINE));
  284. if (preg_match('#^([0-9]+): (.+)$#', $sLineAndChar, $aMatches))
  285. {
  286. $iLine = $aMatches[1];
  287. $sUnexpected = $aMatches[2];
  288. throw new OQLLexerException($this->data, $iLine, $this->count, $sUnexpected);
  289. }
  290. }
  291. // Default: forward the exception
  292. throw $e;
  293. }
  294. }
  295. }
  296. ?>