oql-lexer.php 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588
  1. <?php
  2. // Copyright (C) 2010 Combodo SARL
  3. //
  4. // This program is free software; you can redistribute it and/or modify
  5. // it under the terms of the GNU General Public License as published by
  6. // the Free Software Foundation; version 3 of the License.
  7. //
  8. // This program is distributed in the hope that it will be useful,
  9. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. // GNU General Public License for more details.
  12. //
  13. // You should have received a copy of the GNU General Public License
  14. // along with this program; if not, write to the Free Software
  15. // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  16. /**
  17. * OQL syntax analyzer, to be used prior to run the lexical analyzer
  18. *
  19. * @author Erwan Taloc <erwan.taloc@combodo.com>
  20. * @author Romain Quetiez <romain.quetiez@combodo.com>
  21. * @author Denis Flaven <denis.flaven@combodo.com>
  22. * @license http://www.opensource.org/licenses/gpl-3.0.html LGPL
  23. */
  24. // Notes (from the source file: oql-lexer.plex) - Romain
  25. //
  26. // The strval rule is a little bit cryptic.
  27. // This is due to both a bug in the lexer generator and the complexity of our need
  28. // The rule means: either a quoted string with ", or a quoted string with '
  29. // literal " (resp. ') must be escaped by a \
  30. // \ must be escaped by an additional \
  31. //
  32. // Here are the issues and limitation found in the lexer generator:
  33. // * Matching simple quotes is an issue, because regexp are not correctly escaped (and the ESC code is escaped itself)
  34. // Workaround: insert '.chr(39).' which will be a real ' in the end
  35. // * Matching an alternate regexp is an issue because you must specify "|^...."
  36. // and the regexp parser will not accept that syntax
  37. // Workaround: insert '.chr(94).' which will be a real ^
  38. //
  39. // Let's analyze an overview of the regexp, we have
  40. // 1) The strval rule in the lexer definition
  41. // /"([^\\"]|\\"|\\\\)*"|'.chr(94).chr(39).'([^\\'.chr(39).']|\\'.chr(39).'|\\\\)*'.chr(39).'/
  42. // 2) Becomes the php expression in the lexer
  43. // (note the escaped double quotes, hopefully having no effect, but showing where the issue is!)
  44. // $myRegexp = '/^\"([^\\\\\"]|\\\\\"|\\\\\\\\)*\"|'.chr(94).chr(39).'([^\\\\'.chr(39).']|\\\\'.chr(39).'|\\\\\\\\)*'.chr(39).'/';
  45. //
  46. // To be fixed in LexerGenerator/Parser.y, in doLongestMatch (doFirstMatch is ok)
  47. //
  48. //
  49. // Now, let's explain how the regexp has been designed.
  50. // Here is a simplified version, dealing with simple quotes, and based on the assumption that the lexer generator has been fixed!
  51. // The strval rule in the lexer definition
  52. // /'([^\\']*(\\')*(\\\\)*)*'/
  53. // This means anything containing \\ or \' or any other char but a standalone ' or \
  54. // This means ' or \ could not be found without a preceding \
  55. //
  56. class OQLLexerRaw
  57. {
  58. protected $data; // input string
  59. public $token; // token id
  60. public $value; // token string representation
  61. protected $line; // current line
  62. protected $count; // current column
  63. function __construct($data)
  64. {
  65. $this->data = $data;
  66. $this->count = 0;
  67. $this->line = 1;
  68. }
  69. private $_yy_state = 1;
  70. private $_yy_stack = array();
  71. function yylex()
  72. {
  73. return $this->{'yylex' . $this->_yy_state}();
  74. }
  75. function yypushstate($state)
  76. {
  77. array_push($this->_yy_stack, $this->_yy_state);
  78. $this->_yy_state = $state;
  79. }
  80. function yypopstate()
  81. {
  82. $this->_yy_state = array_pop($this->_yy_stack);
  83. }
  84. function yybegin($state)
  85. {
  86. $this->_yy_state = $state;
  87. }
  88. function yylex1()
  89. {
  90. if ($this->count >= strlen($this->data)) {
  91. return false; // end of input
  92. }
  93. do {
  94. $rules = array(
  95. '/^[ \t\n\r]+/',
  96. '/^SELECT/',
  97. '/^FROM/',
  98. '/^AS/',
  99. '/^WHERE/',
  100. '/^JOIN/',
  101. '/^ON/',
  102. '/^\//',
  103. '/^\\*/',
  104. '/^\\+/',
  105. '/^-/',
  106. '/^AND/',
  107. '/^OR/',
  108. '/^,/',
  109. '/^\\(/',
  110. '/^\\)/',
  111. '/^=/',
  112. '/^!=/',
  113. '/^>/',
  114. '/^</',
  115. '/^>=/',
  116. '/^<=/',
  117. '/^LIKE/',
  118. '/^NOT LIKE/',
  119. '/^IN/',
  120. '/^NOT IN/',
  121. '/^INTERVAL/',
  122. '/^IF/',
  123. '/^ELT/',
  124. '/^COALESCE/',
  125. '/^CONCAT/',
  126. '/^SUBSTR/',
  127. '/^TRIM/',
  128. '/^DATE/',
  129. '/^DATE_FORMAT/',
  130. '/^CURRENT_DATE/',
  131. '/^NOW/',
  132. '/^TIME/',
  133. '/^TO_DAYS/',
  134. '/^FROM_DAYS/',
  135. '/^YEAR/',
  136. '/^MONTH/',
  137. '/^DAY/',
  138. '/^HOUR/',
  139. '/^MINUTE/',
  140. '/^SECOND/',
  141. '/^DATE_ADD/',
  142. '/^DATE_SUB/',
  143. '/^ROUND/',
  144. '/^FLOOR/',
  145. '/^INET_ATON/',
  146. '/^INET_NTOA/',
  147. '/^[0-9]+|0x[0-9a-fA-F]+/',
  148. '/^\"([^\\\\\"]|\\\\\"|\\\\\\\\)*\"|'.chr(94).chr(39).'([^\\\\'.chr(39).']|\\\\'.chr(39).'|\\\\\\\\)*'.chr(39).'/',
  149. '/^([_a-zA-Z][_a-zA-Z0-9]*|`[^`]+`)/',
  150. '/^:([_a-zA-Z][_a-zA-Z0-9]*->[_a-zA-Z][_a-zA-Z0-9]*|[_a-zA-Z][_a-zA-Z0-9]*)/',
  151. '/^\\./',
  152. );
  153. $match = false;
  154. foreach ($rules as $index => $rule) {
  155. if (preg_match($rule, substr($this->data, $this->count), $yymatches)) {
  156. if ($match) {
  157. if (strlen($yymatches[0]) > strlen($match[0][0])) {
  158. $match = array($yymatches, $index); // matches, token
  159. }
  160. } else {
  161. $match = array($yymatches, $index);
  162. }
  163. }
  164. }
  165. if (!$match) {
  166. throw new Exception('Unexpected input at line' . $this->line .
  167. ': ' . $this->data[$this->count]);
  168. }
  169. $this->token = $match[1];
  170. $this->value = $match[0][0];
  171. $yysubmatches = $match[0];
  172. array_shift($yysubmatches);
  173. if (!$yysubmatches) {
  174. $yysubmatches = array();
  175. }
  176. $r = $this->{'yy_r1_' . $this->token}($yysubmatches);
  177. if ($r === null) {
  178. $this->count += strlen($this->value);
  179. $this->line += substr_count($this->value, "\n");
  180. // accept this token
  181. return true;
  182. } elseif ($r === true) {
  183. // we have changed state
  184. // process this token in the new state
  185. return $this->yylex();
  186. } elseif ($r === false) {
  187. $this->count += strlen($this->value);
  188. $this->line += substr_count($this->value, "\n");
  189. if ($this->count >= strlen($this->data)) {
  190. return false; // end of input
  191. }
  192. // skip this token
  193. continue;
  194. } else {
  195. $yy_yymore_patterns = array_slice($rules, $this->token, true);
  196. // yymore is needed
  197. do {
  198. if (!isset($yy_yymore_patterns[$this->token])) {
  199. throw new Exception('cannot do yymore for the last token');
  200. }
  201. $match = false;
  202. foreach ($yy_yymore_patterns[$this->token] as $index => $rule) {
  203. if (preg_match('/' . $rule . '/',
  204. substr($this->data, $this->count), $yymatches)) {
  205. $yymatches = array_filter($yymatches, 'strlen'); // remove empty sub-patterns
  206. if ($match) {
  207. if (strlen($yymatches[0]) > strlen($match[0][0])) {
  208. $match = array($yymatches, $index); // matches, token
  209. }
  210. } else {
  211. $match = array($yymatches, $index);
  212. }
  213. }
  214. }
  215. if (!$match) {
  216. throw new Exception('Unexpected input at line' . $this->line .
  217. ': ' . $this->data[$this->count]);
  218. }
  219. $this->token = $match[1];
  220. $this->value = $match[0][0];
  221. $yysubmatches = $match[0];
  222. array_shift($yysubmatches);
  223. if (!$yysubmatches) {
  224. $yysubmatches = array();
  225. }
  226. $this->line = substr_count($this->value, "\n");
  227. $r = $this->{'yy_r1_' . $this->token}();
  228. } while ($r !== null || !$r);
  229. if ($r === true) {
  230. // we have changed state
  231. // process this token in the new state
  232. return $this->yylex();
  233. } else {
  234. // accept
  235. $this->count += strlen($this->value);
  236. $this->line += substr_count($this->value, "\n");
  237. return true;
  238. }
  239. }
  240. } while (true);
  241. } // end function
  242. function yy_r1_0($yy_subpatterns)
  243. {
  244. return false;
  245. }
  246. function yy_r1_1($yy_subpatterns)
  247. {
  248. $this->token = OQLParser::SELECT;
  249. }
  250. function yy_r1_2($yy_subpatterns)
  251. {
  252. $this->token = OQLParser::FROM;
  253. }
  254. function yy_r1_3($yy_subpatterns)
  255. {
  256. $this->token = OQLParser::AS_ALIAS;
  257. }
  258. function yy_r1_4($yy_subpatterns)
  259. {
  260. $this->token = OQLParser::WHERE;
  261. }
  262. function yy_r1_5($yy_subpatterns)
  263. {
  264. $this->token = OQLParser::JOIN;
  265. }
  266. function yy_r1_6($yy_subpatterns)
  267. {
  268. $this->token = OQLParser::ON;
  269. }
  270. function yy_r1_7($yy_subpatterns)
  271. {
  272. $this->token = OQLParser::MATH_DIV;
  273. }
  274. function yy_r1_8($yy_subpatterns)
  275. {
  276. $this->token = OQLParser::MATH_MULT;
  277. }
  278. function yy_r1_9($yy_subpatterns)
  279. {
  280. $this->token = OQLParser::MATH_PLUS;
  281. }
  282. function yy_r1_10($yy_subpatterns)
  283. {
  284. $this->token = OQLParser::MATH_MINUS;
  285. }
  286. function yy_r1_11($yy_subpatterns)
  287. {
  288. $this->token = OQLParser::LOG_AND;
  289. }
  290. function yy_r1_12($yy_subpatterns)
  291. {
  292. $this->token = OQLParser::LOG_OR;
  293. }
  294. function yy_r1_13($yy_subpatterns)
  295. {
  296. $this->token = OQLParser::COMA;
  297. }
  298. function yy_r1_14($yy_subpatterns)
  299. {
  300. $this->token = OQLParser::PAR_OPEN;
  301. }
  302. function yy_r1_15($yy_subpatterns)
  303. {
  304. $this->token = OQLParser::PAR_CLOSE;
  305. }
  306. function yy_r1_16($yy_subpatterns)
  307. {
  308. $this->token = OQLParser::EQ;
  309. }
  310. function yy_r1_17($yy_subpatterns)
  311. {
  312. $this->token = OQLParser::NOT_EQ;
  313. }
  314. function yy_r1_18($yy_subpatterns)
  315. {
  316. $this->token = OQLParser::GT;
  317. }
  318. function yy_r1_19($yy_subpatterns)
  319. {
  320. $this->token = OQLParser::LT;
  321. }
  322. function yy_r1_20($yy_subpatterns)
  323. {
  324. $this->token = OQLParser::GE;
  325. }
  326. function yy_r1_21($yy_subpatterns)
  327. {
  328. $this->token = OQLParser::LE;
  329. }
  330. function yy_r1_22($yy_subpatterns)
  331. {
  332. $this->token = OQLParser::LIKE;
  333. }
  334. function yy_r1_23($yy_subpatterns)
  335. {
  336. $this->token = OQLParser::NOT_LIKE;
  337. }
  338. function yy_r1_24($yy_subpatterns)
  339. {
  340. $this->token = OQLParser::IN;
  341. }
  342. function yy_r1_25($yy_subpatterns)
  343. {
  344. $this->token = OQLParser::NOT_IN;
  345. }
  346. function yy_r1_26($yy_subpatterns)
  347. {
  348. $this->token = OQLParser::INTERVAL;
  349. }
  350. function yy_r1_27($yy_subpatterns)
  351. {
  352. $this->token = OQLParser::F_IF;
  353. }
  354. function yy_r1_28($yy_subpatterns)
  355. {
  356. $this->token = OQLParser::F_ELT;
  357. }
  358. function yy_r1_29($yy_subpatterns)
  359. {
  360. $this->token = OQLParser::F_COALESCE;
  361. }
  362. function yy_r1_30($yy_subpatterns)
  363. {
  364. $this->token = OQLParser::F_CONCAT;
  365. }
  366. function yy_r1_31($yy_subpatterns)
  367. {
  368. $this->token = OQLParser::F_SUBSTR;
  369. }
  370. function yy_r1_32($yy_subpatterns)
  371. {
  372. $this->token = OQLParser::F_TRIM;
  373. }
  374. function yy_r1_33($yy_subpatterns)
  375. {
  376. $this->token = OQLParser::F_DATE;
  377. }
  378. function yy_r1_34($yy_subpatterns)
  379. {
  380. $this->token = OQLParser::F_DATE_FORMAT;
  381. }
  382. function yy_r1_35($yy_subpatterns)
  383. {
  384. $this->token = OQLParser::F_CURRENT_DATE;
  385. }
  386. function yy_r1_36($yy_subpatterns)
  387. {
  388. $this->token = OQLParser::F_NOW;
  389. }
  390. function yy_r1_37($yy_subpatterns)
  391. {
  392. $this->token = OQLParser::F_TIME;
  393. }
  394. function yy_r1_38($yy_subpatterns)
  395. {
  396. $this->token = OQLParser::F_TO_DAYS;
  397. }
  398. function yy_r1_39($yy_subpatterns)
  399. {
  400. $this->token = OQLParser::F_FROM_DAYS;
  401. }
  402. function yy_r1_40($yy_subpatterns)
  403. {
  404. $this->token = OQLParser::F_YEAR;
  405. }
  406. function yy_r1_41($yy_subpatterns)
  407. {
  408. $this->token = OQLParser::F_MONTH;
  409. }
  410. function yy_r1_42($yy_subpatterns)
  411. {
  412. $this->token = OQLParser::F_DAY;
  413. }
  414. function yy_r1_43($yy_subpatterns)
  415. {
  416. $this->token = OQLParser::F_HOUR;
  417. }
  418. function yy_r1_44($yy_subpatterns)
  419. {
  420. $this->token = OQLParser::F_MINUTE;
  421. }
  422. function yy_r1_45($yy_subpatterns)
  423. {
  424. $this->token = OQLParser::F_SECOND;
  425. }
  426. function yy_r1_46($yy_subpatterns)
  427. {
  428. $this->token = OQLParser::F_DATE_ADD;
  429. }
  430. function yy_r1_47($yy_subpatterns)
  431. {
  432. $this->token = OQLParser::F_DATE_SUB;
  433. }
  434. function yy_r1_48($yy_subpatterns)
  435. {
  436. $this->token = OQLParser::F_ROUND;
  437. }
  438. function yy_r1_49($yy_subpatterns)
  439. {
  440. $this->token = OQLParser::F_FLOOR;
  441. }
  442. function yy_r1_50($yy_subpatterns)
  443. {
  444. $this->token = OQLParser::F_INET_ATON;
  445. }
  446. function yy_r1_51($yy_subpatterns)
  447. {
  448. $this->token = OQLParser::F_INET_NTOA;
  449. }
  450. function yy_r1_52($yy_subpatterns)
  451. {
  452. $this->token = OQLParser::NUMVAL;
  453. }
  454. function yy_r1_53($yy_subpatterns)
  455. {
  456. $this->token = OQLParser::STRVAL;
  457. }
  458. function yy_r1_54($yy_subpatterns)
  459. {
  460. $this->token = OQLParser::NAME;
  461. }
  462. function yy_r1_55($yy_subpatterns)
  463. {
  464. $this->token = OQLParser::VARNAME;
  465. }
  466. function yy_r1_56($yy_subpatterns)
  467. {
  468. $this->token = OQLParser::DOT;
  469. }
  470. }
  471. define('UNEXPECTED_INPUT_AT_LINE', 'Unexpected input at line');
  472. class OQLLexerException extends OQLException
  473. {
  474. public function __construct($sInput, $iLine, $iCol, $sUnexpected)
  475. {
  476. parent::__construct("Syntax error", $sInput, $iLine, $iCol, $sUnexpected);
  477. }
  478. }
  479. class OQLLexer extends OQLLexerRaw
  480. {
  481. public function getTokenPos()
  482. {
  483. return max(0, $this->count - strlen($this->value));
  484. }
  485. function yylex()
  486. {
  487. try
  488. {
  489. return parent::yylex();
  490. }
  491. catch (Exception $e)
  492. {
  493. $sMessage = $e->getMessage();
  494. if (substr($sMessage, 0, strlen(UNEXPECTED_INPUT_AT_LINE)) == UNEXPECTED_INPUT_AT_LINE)
  495. {
  496. $sLineAndChar = substr($sMessage, strlen(UNEXPECTED_INPUT_AT_LINE));
  497. if (preg_match('#^([0-9]+): (.+)$#', $sLineAndChar, $aMatches))
  498. {
  499. $iLine = $aMatches[1];
  500. $sUnexpected = $aMatches[2];
  501. throw new OQLLexerException($this->data, $iLine, $this->count, $sUnexpected);
  502. }
  503. }
  504. // Default: forward the exception
  505. throw $e;
  506. }
  507. }
  508. }
  509. ?>