oql-lexer.plex 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444
  1. <?php
  2. // Copyright (C) 2010-2015 Combodo SARL
  3. //
  4. // This file is part of iTop.
  5. //
  6. // iTop is free software; you can redistribute it and/or modify
  7. // it under the terms of the GNU Affero General Public License as published by
  8. // the Free Software Foundation, either version 3 of the License, or
  9. // (at your option) any later version.
  10. //
  11. // iTop is distributed in the hope that it will be useful,
  12. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  13. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  14. // GNU Affero General Public License for more details.
  15. //
  16. // You should have received a copy of the GNU Affero General Public License
  17. // along with iTop. If not, see <http://www.gnu.org/licenses/>
  18. /**
  19. * OQL syntax analyzer, to be used prior to run the lexical analyzer
  20. *
  21. * @copyright Copyright (C) 2010-2015 Combodo SARL
  22. * @license http://opensource.org/licenses/AGPL-3.0
  23. */
  24. // Notes (from the source file: oql-lexer.plex) - Romain
  25. //
  26. // The strval rule is a little bit cryptic.
  27. // This is due to both a bug in the lexer generator and the complexity of our need
  28. // The rule means: either a quoted string with ", or a quoted string with '
  29. // literal " (resp. ') must be escaped by a \
  30. // \ must be escaped by an additional \
  31. //
  32. // Here are the issues and limitation found in the lexer generator:
  33. // * Matching simple quotes is an issue, because regexp are not correctly escaped (and the ESC code is escaped itself)
  34. // Workaround: insert '.chr(39).' which will be a real ' in the end
  35. // * Matching an alternate regexp is an issue because you must specify "|^...."
  36. // and the regexp parser will not accept that syntax
  37. // Workaround: insert '.chr(94).' which will be a real ^
  38. //
  39. // Let's analyze an overview of the regexp, we have
  40. // 1) The strval rule in the lexer definition
  41. // /"([^\\"]|\\"|\\\\)*"|'.chr(94).chr(39).'([^\\'.chr(39).']|\\'.chr(39).'|\\\\)*'.chr(39).'/
  42. // 2) Becomes the php expression in the lexer
  43. // (note the escaped double quotes, hopefully having no effect, but showing where the issue is!)
  44. // $myRegexp = '/^\"([^\\\\\"]|\\\\\"|\\\\\\\\)*\"|'.chr(94).chr(39).'([^\\\\'.chr(39).']|\\\\'.chr(39).'|\\\\\\\\)*'.chr(39).'/';
  45. //
  46. // To be fixed in LexerGenerator/Parser.y, in doLongestMatch (doFirstMatch is ok)
  47. //
  48. //
  49. // Now, let's explain how the regexp has been designed.
  50. // Here is a simplified version, dealing with simple quotes, and based on the assumption that the lexer generator has been fixed!
  51. // The strval rule in the lexer definition
  52. // /'([^\\']*(\\')*(\\\\)*)*'/
  53. // This means anything containing \\ or \' or any other char but a standalone ' or \
  54. // This means ' or \ could not be found without a preceding \
  55. //
  56. class OQLLexerRaw
  57. {
  58. protected $data; // input string
  59. public $token; // token id
  60. public $value; // token string representation
  61. protected $line; // current line
  62. protected $count; // current column
  63. function __construct($data)
  64. {
  65. $this->data = $data;
  66. $this->count = 0;
  67. $this->line = 1;
  68. }
  69. /*!lex2php
  70. %input $this->data
  71. %counter $this->count
  72. %token $this->token
  73. %value $this->value
  74. %line $this->line
  75. %matchlongest 1
  76. whitespace = /[ \t\n\r]+/
  77. union = "UNION"
  78. select = "SELECT"
  79. from = "FROM"
  80. as_alias = "AS"
  81. where = "WHERE"
  82. join = "JOIN"
  83. on = "ON"
  84. coma = ","
  85. par_open = "("
  86. par_close = ")"
  87. math_div = "/"
  88. math_mult = "*"
  89. math_plus = "+"
  90. math_minus = "-"
  91. log_and = "AND"
  92. log_or = "OR"
  93. bitwise_and = "&"
  94. bitwise_or = "|"
  95. bitwise_xor = "^"
  96. bitwise_leftshift = "<<"
  97. bitwise_rightshift = ">>"
  98. regexp = "REGEXP"
  99. eq = "="
  100. not_eq = "!="
  101. gt = ">"
  102. lt = "<"
  103. ge = ">="
  104. le = "<="
  105. like = "LIKE"
  106. not_like = "NOT LIKE"
  107. in = "IN"
  108. not_in = "NOT IN"
  109. interval = "INTERVAL"
  110. f_if = "IF"
  111. f_elt = "ELT"
  112. f_coalesce = "COALESCE"
  113. f_isnull = "ISNULL"
  114. f_concat = "CONCAT"
  115. f_substr = "SUBSTR"
  116. f_trim = "TRIM"
  117. f_date = "DATE"
  118. f_date_format = "DATE_FORMAT"
  119. f_current_date = "CURRENT_DATE"
  120. f_now = "NOW"
  121. f_time = "TIME"
  122. f_to_days = "TO_DAYS"
  123. f_from_days = "FROM_DAYS"
  124. f_year = "YEAR"
  125. f_month = "MONTH"
  126. f_day = "DAY"
  127. f_hour = "HOUR"
  128. f_minute = "MINUTE"
  129. f_second = "SECOND"
  130. f_date_add = "DATE_ADD"
  131. f_date_sub = "DATE_SUB"
  132. f_round = "ROUND"
  133. f_floor = "FLOOR"
  134. f_inet_aton = "INET_ATON"
  135. f_inet_ntoa = "INET_NTOA"
  136. below = "BELOW"
  137. below_strict = "BELOW STRICT"
  138. not_below = "NOT BELOW"
  139. not_below_strict = "NOT BELOW STRICT"
  140. above = "ABOVE"
  141. above_strict = "ABOVE STRICT"
  142. not_above = "NOT ABOVE"
  143. not_above_strict = "NOT ABOVE STRICT"
  144. //
  145. // WARNING: there seems to be a bug in the Lexer about matching the longest pattern
  146. // when there are alternates in the regexp.
  147. //
  148. // For instance:
  149. // numval = /[0-9]+|0x[0-9a-fA-F]+/
  150. // Does not work: SELECT Toto WHERE name = 'Text0xCTest' => Fails because 0xC is recongnized as a numval (inside the string) instead of a strval !!
  151. //
  152. // Inserting a ^ after the alternate (see comment at the top of this file) does not work either
  153. // numval = /[0-9]+|'.chr(94).'0x[0-9a-fA-F]+/
  154. // SELECT Toto WHERE name = 'Text0xCTest' => works but
  155. // SELECT Toto WHERE id = 0xC => does not work, 'xC' is found as a name (apparently 0 is recognized as a numval and the remaining is a name !)
  156. //
  157. // numval = /([0-9]+|0x[0-9a-fA-F]+)/
  158. // Does not work either, the hexadecimal numbers are not matched properly
  159. // Anyhow let's distinguish the hexadecimal values from decimal integers, hex numbers will be stored as strings
  160. // and passed as-is to MySQL which enables us to pass 64-bit values without messing with them in PHP
  161. //
  162. hexval = /(0x[0-9a-fA-F]+)/
  163. numval = /([0-9]+)/
  164. strval = /"([^\\"]|\\"|\\\\)*"|'.chr(94).chr(39).'([^\\'.chr(39).']|\\'.chr(39).'|\\\\)*'.chr(39).'/
  165. name = /([_a-zA-Z][_a-zA-Z0-9]*|`[^`]+`)/
  166. varname = /:([_a-zA-Z][_a-zA-Z0-9]*->[_a-zA-Z][_a-zA-Z0-9]*|[_a-zA-Z][_a-zA-Z0-9]*)/
  167. dot = "."
  168. */
  169. /*!lex2php
  170. whitespace {
  171. return false;
  172. }
  173. union {
  174. $this->token = OQLParser::UNION;
  175. }
  176. select {
  177. $this->token = OQLParser::SELECT;
  178. }
  179. from {
  180. $this->token = OQLParser::FROM;
  181. }
  182. as_alias {
  183. $this->token = OQLParser::AS_ALIAS;
  184. }
  185. where {
  186. $this->token = OQLParser::WHERE;
  187. }
  188. join {
  189. $this->token = OQLParser::JOIN;
  190. }
  191. on {
  192. $this->token = OQLParser::ON;
  193. }
  194. math_div {
  195. $this->token = OQLParser::MATH_DIV;
  196. }
  197. math_mult {
  198. $this->token = OQLParser::MATH_MULT;
  199. }
  200. math_plus {
  201. $this->token = OQLParser::MATH_PLUS;
  202. }
  203. math_minus {
  204. $this->token = OQLParser::MATH_MINUS;
  205. }
  206. log_and {
  207. $this->token = OQLParser::LOG_AND;
  208. }
  209. log_or {
  210. $this->token = OQLParser::LOG_OR;
  211. }
  212. bitwise_or {
  213. $this->token = OQLParser::BITWISE_OR;
  214. }
  215. bitwise_and {
  216. $this->token = OQLParser::BITWISE_AND;
  217. }
  218. bitwise_xor {
  219. $this->token = OQLParser::BITWISE_XOR;
  220. }
  221. bitwise_leftshift {
  222. $this->token = OQLParser::BITWISE_LEFT_SHIFT;
  223. }
  224. bitwise_rightshift {
  225. $this->token = OQLParser::BITWISE_RIGHT_SHIFT;
  226. }
  227. coma {
  228. $this->token = OQLParser::COMA;
  229. }
  230. par_open {
  231. $this->token = OQLParser::PAR_OPEN;
  232. }
  233. par_close {
  234. $this->token = OQLParser::PAR_CLOSE;
  235. }
  236. regexp {
  237. $this->token = OQLParser::REGEXP;
  238. }
  239. eq {
  240. $this->token = OQLParser::EQ;
  241. }
  242. not_eq {
  243. $this->token = OQLParser::NOT_EQ;
  244. }
  245. gt {
  246. $this->token = OQLParser::GT;
  247. }
  248. lt {
  249. $this->token = OQLParser::LT;
  250. }
  251. ge {
  252. $this->token = OQLParser::GE;
  253. }
  254. le {
  255. $this->token = OQLParser::LE;
  256. }
  257. like {
  258. $this->token = OQLParser::LIKE;
  259. }
  260. not_like {
  261. $this->token = OQLParser::NOT_LIKE;
  262. }
  263. in {
  264. $this->token = OQLParser::IN;
  265. }
  266. not_in {
  267. $this->token = OQLParser::NOT_IN;
  268. }
  269. interval {
  270. $this->token = OQLParser::INTERVAL;
  271. }
  272. f_if {
  273. $this->token = OQLParser::F_IF;
  274. }
  275. f_elt {
  276. $this->token = OQLParser::F_ELT;
  277. }
  278. f_coalesce {
  279. $this->token = OQLParser::F_COALESCE;
  280. }
  281. f_isnull {
  282. $this->token = OQLParser::F_ISNULL;
  283. }
  284. f_concat {
  285. $this->token = OQLParser::F_CONCAT;
  286. }
  287. f_substr {
  288. $this->token = OQLParser::F_SUBSTR;
  289. }
  290. f_trim {
  291. $this->token = OQLParser::F_TRIM;
  292. }
  293. f_date {
  294. $this->token = OQLParser::F_DATE;
  295. }
  296. f_date_format {
  297. $this->token = OQLParser::F_DATE_FORMAT;
  298. }
  299. f_current_date {
  300. $this->token = OQLParser::F_CURRENT_DATE;
  301. }
  302. f_now {
  303. $this->token = OQLParser::F_NOW;
  304. }
  305. f_time {
  306. $this->token = OQLParser::F_TIME;
  307. }
  308. f_to_days {
  309. $this->token = OQLParser::F_TO_DAYS;
  310. }
  311. f_from_days {
  312. $this->token = OQLParser::F_FROM_DAYS;
  313. }
  314. f_year {
  315. $this->token = OQLParser::F_YEAR;
  316. }
  317. f_month {
  318. $this->token = OQLParser::F_MONTH;
  319. }
  320. f_day {
  321. $this->token = OQLParser::F_DAY;
  322. }
  323. f_hour {
  324. $this->token = OQLParser::F_HOUR;
  325. }
  326. f_minute {
  327. $this->token = OQLParser::F_MINUTE;
  328. }
  329. f_second {
  330. $this->token = OQLParser::F_SECOND;
  331. }
  332. f_date_add {
  333. $this->token = OQLParser::F_DATE_ADD;
  334. }
  335. f_date_sub {
  336. $this->token = OQLParser::F_DATE_SUB;
  337. }
  338. f_round {
  339. $this->token = OQLParser::F_ROUND;
  340. }
  341. f_floor {
  342. $this->token = OQLParser::F_FLOOR;
  343. }
  344. f_inet_aton {
  345. $this->token = OQLParser::F_INET_ATON;
  346. }
  347. f_inet_ntoa {
  348. $this->token = OQLParser::F_INET_NTOA;
  349. }
  350. below {
  351. $this->token = OQLParser::BELOW;
  352. }
  353. below_strict {
  354. $this->token = OQLParser::BELOW_STRICT;
  355. }
  356. not_below {
  357. $this->token = OQLParser::NOT_BELOW;
  358. }
  359. not_below_strict {
  360. $this->token = OQLParser::NOT_BELOW_STRICT;
  361. }
  362. above {
  363. $this->token = OQLParser::ABOVE;
  364. }
  365. above_strict {
  366. $this->token = OQLParser::ABOVE_STRICT;
  367. }
  368. not_above {
  369. $this->token = OQLParser::NOT_ABOVE;
  370. }
  371. not_above_strict {
  372. $this->token = OQLParser::NOT_ABOVE_STRICT;
  373. }
  374. hexval {
  375. $this->token = OQLParser::HEXVAL;
  376. }
  377. numval {
  378. $this->token = OQLParser::NUMVAL;
  379. }
  380. strval {
  381. $this->token = OQLParser::STRVAL;
  382. }
  383. name {
  384. $this->token = OQLParser::NAME;
  385. }
  386. varname {
  387. $this->token = OQLParser::VARNAME;
  388. }
  389. dot {
  390. $this->token = OQLParser::DOT;
  391. }
  392. */
  393. }
  394. define('UNEXPECTED_INPUT_AT_LINE', 'Unexpected input at line');
  395. class OQLLexerException extends OQLException
  396. {
  397. public function __construct($sInput, $iLine, $iCol, $sUnexpected)
  398. {
  399. parent::__construct("Syntax error", $sInput, $iLine, $iCol, $sUnexpected);
  400. }
  401. }
  402. class OQLLexer extends OQLLexerRaw
  403. {
  404. public function getTokenPos()
  405. {
  406. return max(0, $this->count - strlen($this->value));
  407. }
  408. function yylex()
  409. {
  410. try
  411. {
  412. return parent::yylex();
  413. }
  414. catch (Exception $e)
  415. {
  416. $sMessage = $e->getMessage();
  417. if (substr($sMessage, 0, strlen(UNEXPECTED_INPUT_AT_LINE)) == UNEXPECTED_INPUT_AT_LINE)
  418. {
  419. $sLineAndChar = substr($sMessage, strlen(UNEXPECTED_INPUT_AT_LINE));
  420. if (preg_match('#^([0-9]+): (.+)$#', $sLineAndChar, $aMatches))
  421. {
  422. $iLine = $aMatches[1];
  423. $sUnexpected = $aMatches[2];
  424. throw new OQLLexerException($this->data, $iLine, $this->count, $sUnexpected);
  425. }
  426. }
  427. // Default: forward the exception
  428. throw $e;
  429. }
  430. }
  431. }
  432. ?>