oql-lexer.php 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. <?php
  2. // Notes (from the source file: oql-lexer.plex) - Romain
  3. //
  4. // The strval rule is a little bit cryptic.
  5. // This is due to both a bug in the lexer generator and the complexity of our need
  6. // The rule means: either a quoted string with ", or a quoted string with '
  7. // literal " (resp. ') must be escaped by a \
  8. // \ must be escaped by an additional \
  9. //
  10. // Here are the issues and limitation found in the lexer generator:
  11. // * Matching simple quotes is an issue, because regexp are not correctly escaped (and the ESC code is escaped itself)
  12. // Workaround: insert '.chr(39).' which will be a real ' in the end
  13. // * Matching an alternate regexp is an issue because you must specify "|^...."
  14. // and the regexp parser will not accept that syntax
  15. // Workaround: insert '.chr(94).' which will be a real ^
  16. //
  17. // Let's analyze an overview of the regexp, we have
  18. // 1) The strval rule in the lexer definition
  19. // /"([^\\"]|\\"|\\\\)*"|'.chr(94).chr(39).'([^\\'.chr(39).']|\\'.chr(39).'|\\\\)*'.chr(39).'/
  20. // 2) Becomes the php expression in the lexer
  21. // (note the escaped double quotes, hopefully having no effect, but showing where the issue is!)
  22. // $myRegexp = '/^\"([^\\\\\"]|\\\\\"|\\\\\\\\)*\"|'.chr(94).chr(39).'([^\\\\'.chr(39).']|\\\\'.chr(39).'|\\\\\\\\)*'.chr(39).'/';
  23. //
  24. // To be fixed in LexerGenerator/Parser.y, in doLongestMatch (doFirstMatch is ok)
  25. //
  26. //
  27. // Now, let's explain how the regexp has been designed.
  28. // Here is a simplified version, dealing with simple quotes, and based on the assumption that the lexer generator has been fixed!
  29. // The strval rule in the lexer definition
  30. // /'([^\\']*(\\')*(\\\\)*)*'/
  31. // This means anything containing \\ or \' or any other char but a standalone ' or \
  32. // This means ' or \ could not be found without a preceding \
  33. //
  34. class OQLLexerRaw
  35. {
  36. protected $data; // input string
  37. public $token; // token id
  38. public $value; // token string representation
  39. protected $line; // current line
  40. protected $count; // current column
  41. function __construct($data)
  42. {
  43. $this->data = $data;
  44. $this->count = 0;
  45. $this->line = 1;
  46. }
  47. private $_yy_state = 1;
  48. private $_yy_stack = array();
  49. function yylex()
  50. {
  51. return $this->{'yylex' . $this->_yy_state}();
  52. }
  53. function yypushstate($state)
  54. {
  55. array_push($this->_yy_stack, $this->_yy_state);
  56. $this->_yy_state = $state;
  57. }
  58. function yypopstate()
  59. {
  60. $this->_yy_state = array_pop($this->_yy_stack);
  61. }
  62. function yybegin($state)
  63. {
  64. $this->_yy_state = $state;
  65. }
  66. function yylex1()
  67. {
  68. if ($this->count >= strlen($this->data)) {
  69. return false; // end of input
  70. }
  71. do {
  72. $rules = array(
  73. '/^[ \t\n]+/',
  74. '/^SELECT/',
  75. '/^AS/',
  76. '/^WHERE/',
  77. '/^JOIN/',
  78. '/^ON/',
  79. '/^\//',
  80. '/^\\*/',
  81. '/^\\+/',
  82. '/^-/',
  83. '/^AND/',
  84. '/^OR/',
  85. '/^,/',
  86. '/^\\(/',
  87. '/^\\)/',
  88. '/^=/',
  89. '/^!=/',
  90. '/^>/',
  91. '/^</',
  92. '/^>=/',
  93. '/^<=/',
  94. '/^LIKE/',
  95. '/^NOT LIKE/',
  96. '/^IN/',
  97. '/^NOT IN/',
  98. '/^INTERVAL/',
  99. '/^IF/',
  100. '/^ELT/',
  101. '/^COALESCE/',
  102. '/^CONCAT/',
  103. '/^SUBSTR/',
  104. '/^TRIM/',
  105. '/^DATE/',
  106. '/^DATE_FORMAT/',
  107. '/^CURRENT_DATE/',
  108. '/^NOW/',
  109. '/^TIME/',
  110. '/^TO_DAYS/',
  111. '/^FROM_DAYS/',
  112. '/^YEAR/',
  113. '/^MONTH/',
  114. '/^DAY/',
  115. '/^DATE_ADD/',
  116. '/^DATE_SUB/',
  117. '/^ROUND/',
  118. '/^FLOOR/',
  119. '/^[0-9]+|0x[0-9a-fA-F]+/',
  120. '/^\"([^\\\\\"]|\\\\\"|\\\\\\\\)*\"|'.chr(94).chr(39).'([^\\\\'.chr(39).']|\\\\'.chr(39).'|\\\\\\\\)*'.chr(39).'/',
  121. '/^([_a-zA-Z][_a-zA-Z0-9]*|`[^`]+`)/',
  122. '/^\\./',
  123. );
  124. $match = false;
  125. foreach ($rules as $index => $rule) {
  126. if (preg_match($rule, substr($this->data, $this->count), $yymatches)) {
  127. if ($match) {
  128. if (strlen($yymatches[0]) > strlen($match[0][0])) {
  129. $match = array($yymatches, $index); // matches, token
  130. }
  131. } else {
  132. $match = array($yymatches, $index);
  133. }
  134. }
  135. }
  136. if (!$match) {
  137. throw new Exception('Unexpected input at line' . $this->line .
  138. ': ' . $this->data[$this->count]);
  139. }
  140. $this->token = $match[1];
  141. $this->value = $match[0][0];
  142. $yysubmatches = $match[0];
  143. array_shift($yysubmatches);
  144. if (!$yysubmatches) {
  145. $yysubmatches = array();
  146. }
  147. $r = $this->{'yy_r1_' . $this->token}($yysubmatches);
  148. if ($r === null) {
  149. $this->count += strlen($this->value);
  150. $this->line += substr_count($this->value, "\n");
  151. // accept this token
  152. return true;
  153. } elseif ($r === true) {
  154. // we have changed state
  155. // process this token in the new state
  156. return $this->yylex();
  157. } elseif ($r === false) {
  158. $this->count += strlen($this->value);
  159. $this->line += substr_count($this->value, "\n");
  160. if ($this->count >= strlen($this->data)) {
  161. return false; // end of input
  162. }
  163. // skip this token
  164. continue;
  165. } else {
  166. $yy_yymore_patterns = array_slice($rules, $this->token, true);
  167. // yymore is needed
  168. do {
  169. if (!isset($yy_yymore_patterns[$this->token])) {
  170. throw new Exception('cannot do yymore for the last token');
  171. }
  172. $match = false;
  173. foreach ($yy_yymore_patterns[$this->token] as $index => $rule) {
  174. if (preg_match('/' . $rule . '/',
  175. substr($this->data, $this->count), $yymatches)) {
  176. $yymatches = array_filter($yymatches, 'strlen'); // remove empty sub-patterns
  177. if ($match) {
  178. if (strlen($yymatches[0]) > strlen($match[0][0])) {
  179. $match = array($yymatches, $index); // matches, token
  180. }
  181. } else {
  182. $match = array($yymatches, $index);
  183. }
  184. }
  185. }
  186. if (!$match) {
  187. throw new Exception('Unexpected input at line' . $this->line .
  188. ': ' . $this->data[$this->count]);
  189. }
  190. $this->token = $match[1];
  191. $this->value = $match[0][0];
  192. $yysubmatches = $match[0];
  193. array_shift($yysubmatches);
  194. if (!$yysubmatches) {
  195. $yysubmatches = array();
  196. }
  197. $this->line = substr_count($this->value, "\n");
  198. $r = $this->{'yy_r1_' . $this->token}();
  199. } while ($r !== null || !$r);
  200. if ($r === true) {
  201. // we have changed state
  202. // process this token in the new state
  203. return $this->yylex();
  204. } else {
  205. // accept
  206. $this->count += strlen($this->value);
  207. $this->line += substr_count($this->value, "\n");
  208. return true;
  209. }
  210. }
  211. } while (true);
  212. } // end function
  213. function yy_r1_0($yy_subpatterns)
  214. {
  215. return false;
  216. }
  217. function yy_r1_1($yy_subpatterns)
  218. {
  219. $this->token = OQLParser::SELECT;
  220. }
  221. function yy_r1_2($yy_subpatterns)
  222. {
  223. $this->token = OQLParser::AS_ALIAS;
  224. }
  225. function yy_r1_3($yy_subpatterns)
  226. {
  227. $this->token = OQLParser::WHERE;
  228. }
  229. function yy_r1_4($yy_subpatterns)
  230. {
  231. $this->token = OQLParser::JOIN;
  232. }
  233. function yy_r1_5($yy_subpatterns)
  234. {
  235. $this->token = OQLParser::ON;
  236. }
  237. function yy_r1_6($yy_subpatterns)
  238. {
  239. $this->token = OQLParser::MATH_DIV;
  240. }
  241. function yy_r1_7($yy_subpatterns)
  242. {
  243. $this->token = OQLParser::MATH_MULT;
  244. }
  245. function yy_r1_8($yy_subpatterns)
  246. {
  247. $this->token = OQLParser::MATH_PLUS;
  248. }
  249. function yy_r1_9($yy_subpatterns)
  250. {
  251. $this->token = OQLParser::MATH_MINUS;
  252. }
  253. function yy_r1_10($yy_subpatterns)
  254. {
  255. $this->token = OQLParser::LOG_AND;
  256. }
  257. function yy_r1_11($yy_subpatterns)
  258. {
  259. $this->token = OQLParser::LOG_OR;
  260. }
  261. function yy_r1_12($yy_subpatterns)
  262. {
  263. $this->token = OQLParser::COMA;
  264. }
  265. function yy_r1_13($yy_subpatterns)
  266. {
  267. $this->token = OQLParser::PAR_OPEN;
  268. }
  269. function yy_r1_14($yy_subpatterns)
  270. {
  271. $this->token = OQLParser::PAR_CLOSE;
  272. }
  273. function yy_r1_15($yy_subpatterns)
  274. {
  275. $this->token = OQLParser::EQ;
  276. }
  277. function yy_r1_16($yy_subpatterns)
  278. {
  279. $this->token = OQLParser::NOT_EQ;
  280. }
  281. function yy_r1_17($yy_subpatterns)
  282. {
  283. $this->token = OQLParser::GT;
  284. }
  285. function yy_r1_18($yy_subpatterns)
  286. {
  287. $this->token = OQLParser::LT;
  288. }
  289. function yy_r1_19($yy_subpatterns)
  290. {
  291. $this->token = OQLParser::GE;
  292. }
  293. function yy_r1_20($yy_subpatterns)
  294. {
  295. $this->token = OQLParser::LE;
  296. }
  297. function yy_r1_21($yy_subpatterns)
  298. {
  299. $this->token = OQLParser::LIKE;
  300. }
  301. function yy_r1_22($yy_subpatterns)
  302. {
  303. $this->token = OQLParser::NOT_LIKE;
  304. }
  305. function yy_r1_23($yy_subpatterns)
  306. {
  307. $this->token = OQLParser::IN;
  308. }
  309. function yy_r1_24($yy_subpatterns)
  310. {
  311. $this->token = OQLParser::NOT_IN;
  312. }
  313. function yy_r1_25($yy_subpatterns)
  314. {
  315. $this->token = OQLParser::INTERVAL;
  316. }
  317. function yy_r1_26($yy_subpatterns)
  318. {
  319. $this->token = OQLParser::F_IF;
  320. }
  321. function yy_r1_27($yy_subpatterns)
  322. {
  323. $this->token = OQLParser::F_ELT;
  324. }
  325. function yy_r1_28($yy_subpatterns)
  326. {
  327. $this->token = OQLParser::F_COALESCE;
  328. }
  329. function yy_r1_29($yy_subpatterns)
  330. {
  331. $this->token = OQLParser::F_CONCAT;
  332. }
  333. function yy_r1_30($yy_subpatterns)
  334. {
  335. $this->token = OQLParser::F_SUBSTR;
  336. }
  337. function yy_r1_31($yy_subpatterns)
  338. {
  339. $this->token = OQLParser::F_TRIM;
  340. }
  341. function yy_r1_32($yy_subpatterns)
  342. {
  343. $this->token = OQLParser::F_DATE;
  344. }
  345. function yy_r1_33($yy_subpatterns)
  346. {
  347. $this->token = OQLParser::F_DATE_FORMAT;
  348. }
  349. function yy_r1_34($yy_subpatterns)
  350. {
  351. $this->token = OQLParser::F_CURRENT_DATE;
  352. }
  353. function yy_r1_35($yy_subpatterns)
  354. {
  355. $this->token = OQLParser::F_NOW;
  356. }
  357. function yy_r1_36($yy_subpatterns)
  358. {
  359. $this->token = OQLParser::F_TIME;
  360. }
  361. function yy_r1_37($yy_subpatterns)
  362. {
  363. $this->token = OQLParser::F_TO_DAYS;
  364. }
  365. function yy_r1_38($yy_subpatterns)
  366. {
  367. $this->token = OQLParser::F_FROM_DAYS;
  368. }
  369. function yy_r1_39($yy_subpatterns)
  370. {
  371. $this->token = OQLParser::F_YEAR;
  372. }
  373. function yy_r1_40($yy_subpatterns)
  374. {
  375. $this->token = OQLParser::F_MONTH;
  376. }
  377. function yy_r1_41($yy_subpatterns)
  378. {
  379. $this->token = OQLParser::F_DAY;
  380. }
  381. function yy_r1_42($yy_subpatterns)
  382. {
  383. $this->token = OQLParser::F_DATE_ADD;
  384. }
  385. function yy_r1_43($yy_subpatterns)
  386. {
  387. $this->token = OQLParser::F_DATE_SUB;
  388. }
  389. function yy_r1_44($yy_subpatterns)
  390. {
  391. $this->token = OQLParser::F_ROUND;
  392. }
  393. function yy_r1_45($yy_subpatterns)
  394. {
  395. $this->token = OQLParser::F_FLOOR;
  396. }
  397. function yy_r1_46($yy_subpatterns)
  398. {
  399. $this->token = OQLParser::NUMVAL;
  400. }
  401. function yy_r1_47($yy_subpatterns)
  402. {
  403. $this->token = OQLParser::STRVAL;
  404. }
  405. function yy_r1_48($yy_subpatterns)
  406. {
  407. $this->token = OQLParser::NAME;
  408. }
  409. function yy_r1_49($yy_subpatterns)
  410. {
  411. $this->token = OQLParser::DOT;
  412. }
  413. }
  414. define('UNEXPECTED_INPUT_AT_LINE', 'Unexpected input at line');
  415. class OQLLexerException extends OQLException
  416. {
  417. public function __construct($sInput, $iLine, $iCol, $sUnexpected)
  418. {
  419. parent::__construct("Syntax error", $sInput, $iLine, $iCol, $sUnexpected);
  420. }
  421. }
  422. class OQLLexer extends OQLLexerRaw
  423. {
  424. public function getTokenPos()
  425. {
  426. return max(0, $this->count - strlen($this->value));
  427. }
  428. function yylex()
  429. {
  430. try
  431. {
  432. return parent::yylex();
  433. }
  434. catch (Exception $e)
  435. {
  436. $sMessage = $e->getMessage();
  437. if (substr($sMessage, 0, strlen(UNEXPECTED_INPUT_AT_LINE)) == UNEXPECTED_INPUT_AT_LINE)
  438. {
  439. $sLineAndChar = substr($sMessage, strlen(UNEXPECTED_INPUT_AT_LINE));
  440. if (preg_match('#^([0-9]+): (.+)$#', $sLineAndChar, $aMatches))
  441. {
  442. $iLine = $aMatches[1];
  443. $sUnexpected = $aMatches[2];
  444. throw new OQLLexerException($this->data, $iLine, $this->count, $sUnexpected);
  445. }
  446. }
  447. // Default: forward the exception
  448. throw $e;
  449. }
  450. }
  451. }
  452. ?>