oql-lexer.php 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564
  1. <?php
  2. // Notes (from the source file: oql-lexer.plex) - Romain
  3. //
  4. // The strval rule is a little bit cryptic.
  5. // This is due to both a bug in the lexer generator and the complexity of our need
  6. // The rule means: either a quoted string with ", or a quoted string with '
  7. // literal " (resp. ') must be escaped by a \
  8. // \ must be escaped by an additional \
  9. //
  10. // Here are the issues and limitation found in the lexer generator:
  11. // * Matching simple quotes is an issue, because regexp are not correctly escaped (and the ESC code is escaped itself)
  12. // Workaround: insert '.chr(39).' which will be a real ' in the end
  13. // * Matching an alternate regexp is an issue because you must specify "|^...."
  14. // and the regexp parser will not accept that syntax
  15. // Workaround: insert '.chr(94).' which will be a real ^
  16. //
  17. // Let's analyze an overview of the regexp, we have
  18. // 1) The strval rule in the lexer definition
  19. // /"([^\\"]|\\"|\\\\)*"|'.chr(94).chr(39).'([^\\'.chr(39).']|\\'.chr(39).'|\\\\)*'.chr(39).'/
  20. // 2) Becomes the php expression in the lexer
  21. // (note the escaped double quotes, hopefully having no effect, but showing where the issue is!)
  22. // $myRegexp = '/^\"([^\\\\\"]|\\\\\"|\\\\\\\\)*\"|'.chr(94).chr(39).'([^\\\\'.chr(39).']|\\\\'.chr(39).'|\\\\\\\\)*'.chr(39).'/';
  23. //
  24. // To be fixed in LexerGenerator/Parser.y, in doLongestMatch (doFirstMatch is ok)
  25. //
  26. //
  27. // Now, let's explain how the regexp has been designed.
  28. // Here is a simplified version, dealing with simple quotes, and based on the assumption that the lexer generator has been fixed!
  29. // The strval rule in the lexer definition
  30. // /'([^\\']*(\\')*(\\\\)*)*'/
  31. // This means anything containing \\ or \' or any other char but a standalone ' or \
  32. // This means ' or \ could not be found without a preceding \
  33. //
  34. class OQLLexerRaw
  35. {
  36. protected $data; // input string
  37. public $token; // token id
  38. public $value; // token string representation
  39. protected $line; // current line
  40. protected $count; // current column
  41. function __construct($data)
  42. {
  43. $this->data = $data;
  44. $this->count = 0;
  45. $this->line = 1;
  46. }
  47. private $_yy_state = 1;
  48. private $_yy_stack = array();
  49. function yylex()
  50. {
  51. return $this->{'yylex' . $this->_yy_state}();
  52. }
  53. function yypushstate($state)
  54. {
  55. array_push($this->_yy_stack, $this->_yy_state);
  56. $this->_yy_state = $state;
  57. }
  58. function yypopstate()
  59. {
  60. $this->_yy_state = array_pop($this->_yy_stack);
  61. }
  62. function yybegin($state)
  63. {
  64. $this->_yy_state = $state;
  65. }
  66. function yylex1()
  67. {
  68. if ($this->count >= strlen($this->data)) {
  69. return false; // end of input
  70. }
  71. do {
  72. $rules = array(
  73. '/^[ \t\n\r]+/',
  74. '/^SELECT/',
  75. '/^FROM/',
  76. '/^AS/',
  77. '/^WHERE/',
  78. '/^JOIN/',
  79. '/^ON/',
  80. '/^\//',
  81. '/^\\*/',
  82. '/^\\+/',
  83. '/^-/',
  84. '/^AND/',
  85. '/^OR/',
  86. '/^,/',
  87. '/^\\(/',
  88. '/^\\)/',
  89. '/^=/',
  90. '/^!=/',
  91. '/^>/',
  92. '/^</',
  93. '/^>=/',
  94. '/^<=/',
  95. '/^LIKE/',
  96. '/^NOT LIKE/',
  97. '/^IN/',
  98. '/^NOT IN/',
  99. '/^INTERVAL/',
  100. '/^IF/',
  101. '/^ELT/',
  102. '/^COALESCE/',
  103. '/^CONCAT/',
  104. '/^SUBSTR/',
  105. '/^TRIM/',
  106. '/^DATE/',
  107. '/^DATE_FORMAT/',
  108. '/^CURRENT_DATE/',
  109. '/^NOW/',
  110. '/^TIME/',
  111. '/^TO_DAYS/',
  112. '/^FROM_DAYS/',
  113. '/^YEAR/',
  114. '/^MONTH/',
  115. '/^DAY/',
  116. '/^HOUR/',
  117. '/^MINUTE/',
  118. '/^SECOND/',
  119. '/^DATE_ADD/',
  120. '/^DATE_SUB/',
  121. '/^ROUND/',
  122. '/^FLOOR/',
  123. '/^INET_ATON/',
  124. '/^INET_NTOA/',
  125. '/^[0-9]+|0x[0-9a-fA-F]+/',
  126. '/^\"([^\\\\\"]|\\\\\"|\\\\\\\\)*\"|'.chr(94).chr(39).'([^\\\\'.chr(39).']|\\\\'.chr(39).'|\\\\\\\\)*'.chr(39).'/',
  127. '/^([_a-zA-Z][_a-zA-Z0-9]*|`[^`]+`)/',
  128. '/^:([_a-zA-Z][_a-zA-Z0-9]*->[_a-zA-Z][_a-zA-Z0-9]*|[_a-zA-Z][_a-zA-Z0-9]*)/',
  129. '/^\\./',
  130. );
  131. $match = false;
  132. foreach ($rules as $index => $rule) {
  133. if (preg_match($rule, substr($this->data, $this->count), $yymatches)) {
  134. if ($match) {
  135. if (strlen($yymatches[0]) > strlen($match[0][0])) {
  136. $match = array($yymatches, $index); // matches, token
  137. }
  138. } else {
  139. $match = array($yymatches, $index);
  140. }
  141. }
  142. }
  143. if (!$match) {
  144. throw new Exception('Unexpected input at line' . $this->line .
  145. ': ' . $this->data[$this->count]);
  146. }
  147. $this->token = $match[1];
  148. $this->value = $match[0][0];
  149. $yysubmatches = $match[0];
  150. array_shift($yysubmatches);
  151. if (!$yysubmatches) {
  152. $yysubmatches = array();
  153. }
  154. $r = $this->{'yy_r1_' . $this->token}($yysubmatches);
  155. if ($r === null) {
  156. $this->count += strlen($this->value);
  157. $this->line += substr_count($this->value, "\n");
  158. // accept this token
  159. return true;
  160. } elseif ($r === true) {
  161. // we have changed state
  162. // process this token in the new state
  163. return $this->yylex();
  164. } elseif ($r === false) {
  165. $this->count += strlen($this->value);
  166. $this->line += substr_count($this->value, "\n");
  167. if ($this->count >= strlen($this->data)) {
  168. return false; // end of input
  169. }
  170. // skip this token
  171. continue;
  172. } else {
  173. $yy_yymore_patterns = array_slice($rules, $this->token, true);
  174. // yymore is needed
  175. do {
  176. if (!isset($yy_yymore_patterns[$this->token])) {
  177. throw new Exception('cannot do yymore for the last token');
  178. }
  179. $match = false;
  180. foreach ($yy_yymore_patterns[$this->token] as $index => $rule) {
  181. if (preg_match('/' . $rule . '/',
  182. substr($this->data, $this->count), $yymatches)) {
  183. $yymatches = array_filter($yymatches, 'strlen'); // remove empty sub-patterns
  184. if ($match) {
  185. if (strlen($yymatches[0]) > strlen($match[0][0])) {
  186. $match = array($yymatches, $index); // matches, token
  187. }
  188. } else {
  189. $match = array($yymatches, $index);
  190. }
  191. }
  192. }
  193. if (!$match) {
  194. throw new Exception('Unexpected input at line' . $this->line .
  195. ': ' . $this->data[$this->count]);
  196. }
  197. $this->token = $match[1];
  198. $this->value = $match[0][0];
  199. $yysubmatches = $match[0];
  200. array_shift($yysubmatches);
  201. if (!$yysubmatches) {
  202. $yysubmatches = array();
  203. }
  204. $this->line = substr_count($this->value, "\n");
  205. $r = $this->{'yy_r1_' . $this->token}();
  206. } while ($r !== null || !$r);
  207. if ($r === true) {
  208. // we have changed state
  209. // process this token in the new state
  210. return $this->yylex();
  211. } else {
  212. // accept
  213. $this->count += strlen($this->value);
  214. $this->line += substr_count($this->value, "\n");
  215. return true;
  216. }
  217. }
  218. } while (true);
  219. } // end function
  220. function yy_r1_0($yy_subpatterns)
  221. {
  222. return false;
  223. }
  224. function yy_r1_1($yy_subpatterns)
  225. {
  226. $this->token = OQLParser::SELECT;
  227. }
  228. function yy_r1_2($yy_subpatterns)
  229. {
  230. $this->token = OQLParser::FROM;
  231. }
  232. function yy_r1_3($yy_subpatterns)
  233. {
  234. $this->token = OQLParser::AS_ALIAS;
  235. }
  236. function yy_r1_4($yy_subpatterns)
  237. {
  238. $this->token = OQLParser::WHERE;
  239. }
  240. function yy_r1_5($yy_subpatterns)
  241. {
  242. $this->token = OQLParser::JOIN;
  243. }
  244. function yy_r1_6($yy_subpatterns)
  245. {
  246. $this->token = OQLParser::ON;
  247. }
  248. function yy_r1_7($yy_subpatterns)
  249. {
  250. $this->token = OQLParser::MATH_DIV;
  251. }
  252. function yy_r1_8($yy_subpatterns)
  253. {
  254. $this->token = OQLParser::MATH_MULT;
  255. }
  256. function yy_r1_9($yy_subpatterns)
  257. {
  258. $this->token = OQLParser::MATH_PLUS;
  259. }
  260. function yy_r1_10($yy_subpatterns)
  261. {
  262. $this->token = OQLParser::MATH_MINUS;
  263. }
  264. function yy_r1_11($yy_subpatterns)
  265. {
  266. $this->token = OQLParser::LOG_AND;
  267. }
  268. function yy_r1_12($yy_subpatterns)
  269. {
  270. $this->token = OQLParser::LOG_OR;
  271. }
  272. function yy_r1_13($yy_subpatterns)
  273. {
  274. $this->token = OQLParser::COMA;
  275. }
  276. function yy_r1_14($yy_subpatterns)
  277. {
  278. $this->token = OQLParser::PAR_OPEN;
  279. }
  280. function yy_r1_15($yy_subpatterns)
  281. {
  282. $this->token = OQLParser::PAR_CLOSE;
  283. }
  284. function yy_r1_16($yy_subpatterns)
  285. {
  286. $this->token = OQLParser::EQ;
  287. }
  288. function yy_r1_17($yy_subpatterns)
  289. {
  290. $this->token = OQLParser::NOT_EQ;
  291. }
  292. function yy_r1_18($yy_subpatterns)
  293. {
  294. $this->token = OQLParser::GT;
  295. }
  296. function yy_r1_19($yy_subpatterns)
  297. {
  298. $this->token = OQLParser::LT;
  299. }
  300. function yy_r1_20($yy_subpatterns)
  301. {
  302. $this->token = OQLParser::GE;
  303. }
  304. function yy_r1_21($yy_subpatterns)
  305. {
  306. $this->token = OQLParser::LE;
  307. }
  308. function yy_r1_22($yy_subpatterns)
  309. {
  310. $this->token = OQLParser::LIKE;
  311. }
  312. function yy_r1_23($yy_subpatterns)
  313. {
  314. $this->token = OQLParser::NOT_LIKE;
  315. }
  316. function yy_r1_24($yy_subpatterns)
  317. {
  318. $this->token = OQLParser::IN;
  319. }
  320. function yy_r1_25($yy_subpatterns)
  321. {
  322. $this->token = OQLParser::NOT_IN;
  323. }
  324. function yy_r1_26($yy_subpatterns)
  325. {
  326. $this->token = OQLParser::INTERVAL;
  327. }
  328. function yy_r1_27($yy_subpatterns)
  329. {
  330. $this->token = OQLParser::F_IF;
  331. }
  332. function yy_r1_28($yy_subpatterns)
  333. {
  334. $this->token = OQLParser::F_ELT;
  335. }
  336. function yy_r1_29($yy_subpatterns)
  337. {
  338. $this->token = OQLParser::F_COALESCE;
  339. }
  340. function yy_r1_30($yy_subpatterns)
  341. {
  342. $this->token = OQLParser::F_CONCAT;
  343. }
  344. function yy_r1_31($yy_subpatterns)
  345. {
  346. $this->token = OQLParser::F_SUBSTR;
  347. }
  348. function yy_r1_32($yy_subpatterns)
  349. {
  350. $this->token = OQLParser::F_TRIM;
  351. }
  352. function yy_r1_33($yy_subpatterns)
  353. {
  354. $this->token = OQLParser::F_DATE;
  355. }
  356. function yy_r1_34($yy_subpatterns)
  357. {
  358. $this->token = OQLParser::F_DATE_FORMAT;
  359. }
  360. function yy_r1_35($yy_subpatterns)
  361. {
  362. $this->token = OQLParser::F_CURRENT_DATE;
  363. }
  364. function yy_r1_36($yy_subpatterns)
  365. {
  366. $this->token = OQLParser::F_NOW;
  367. }
  368. function yy_r1_37($yy_subpatterns)
  369. {
  370. $this->token = OQLParser::F_TIME;
  371. }
  372. function yy_r1_38($yy_subpatterns)
  373. {
  374. $this->token = OQLParser::F_TO_DAYS;
  375. }
  376. function yy_r1_39($yy_subpatterns)
  377. {
  378. $this->token = OQLParser::F_FROM_DAYS;
  379. }
  380. function yy_r1_40($yy_subpatterns)
  381. {
  382. $this->token = OQLParser::F_YEAR;
  383. }
  384. function yy_r1_41($yy_subpatterns)
  385. {
  386. $this->token = OQLParser::F_MONTH;
  387. }
  388. function yy_r1_42($yy_subpatterns)
  389. {
  390. $this->token = OQLParser::F_DAY;
  391. }
  392. function yy_r1_43($yy_subpatterns)
  393. {
  394. $this->token = OQLParser::F_HOUR;
  395. }
  396. function yy_r1_44($yy_subpatterns)
  397. {
  398. $this->token = OQLParser::F_MINUTE;
  399. }
  400. function yy_r1_45($yy_subpatterns)
  401. {
  402. $this->token = OQLParser::F_SECOND;
  403. }
  404. function yy_r1_46($yy_subpatterns)
  405. {
  406. $this->token = OQLParser::F_DATE_ADD;
  407. }
  408. function yy_r1_47($yy_subpatterns)
  409. {
  410. $this->token = OQLParser::F_DATE_SUB;
  411. }
  412. function yy_r1_48($yy_subpatterns)
  413. {
  414. $this->token = OQLParser::F_ROUND;
  415. }
  416. function yy_r1_49($yy_subpatterns)
  417. {
  418. $this->token = OQLParser::F_FLOOR;
  419. }
  420. function yy_r1_50($yy_subpatterns)
  421. {
  422. $this->token = OQLParser::F_INET_ATON;
  423. }
  424. function yy_r1_51($yy_subpatterns)
  425. {
  426. $this->token = OQLParser::F_INET_NTOA;
  427. }
  428. function yy_r1_52($yy_subpatterns)
  429. {
  430. $this->token = OQLParser::NUMVAL;
  431. }
  432. function yy_r1_53($yy_subpatterns)
  433. {
  434. $this->token = OQLParser::STRVAL;
  435. }
  436. function yy_r1_54($yy_subpatterns)
  437. {
  438. $this->token = OQLParser::NAME;
  439. }
  440. function yy_r1_55($yy_subpatterns)
  441. {
  442. $this->token = OQLParser::VARNAME;
  443. }
  444. function yy_r1_56($yy_subpatterns)
  445. {
  446. $this->token = OQLParser::DOT;
  447. }
  448. }
  449. define('UNEXPECTED_INPUT_AT_LINE', 'Unexpected input at line');
  450. class OQLLexerException extends OQLException
  451. {
  452. public function __construct($sInput, $iLine, $iCol, $sUnexpected)
  453. {
  454. parent::__construct("Syntax error", $sInput, $iLine, $iCol, $sUnexpected);
  455. }
  456. }
  457. class OQLLexer extends OQLLexerRaw
  458. {
  459. public function getTokenPos()
  460. {
  461. return max(0, $this->count - strlen($this->value));
  462. }
  463. function yylex()
  464. {
  465. try
  466. {
  467. return parent::yylex();
  468. }
  469. catch (Exception $e)
  470. {
  471. $sMessage = $e->getMessage();
  472. if (substr($sMessage, 0, strlen(UNEXPECTED_INPUT_AT_LINE)) == UNEXPECTED_INPUT_AT_LINE)
  473. {
  474. $sLineAndChar = substr($sMessage, strlen(UNEXPECTED_INPUT_AT_LINE));
  475. if (preg_match('#^([0-9]+): (.+)$#', $sLineAndChar, $aMatches))
  476. {
  477. $iLine = $aMatches[1];
  478. $sUnexpected = $aMatches[2];
  479. throw new OQLLexerException($this->data, $iLine, $this->count, $sUnexpected);
  480. }
  481. }
  482. // Default: forward the exception
  483. throw $e;
  484. }
  485. }
  486. }
  487. ?>