oql-lexer.plex 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397
  1. <?php
  2. // Copyright (C) 2010 Combodo SARL
  3. //
  4. // This program is free software; you can redistribute it and/or modify
  5. // it under the terms of the GNU General Public License as published by
  6. // the Free Software Foundation; version 3 of the License.
  7. //
  8. // This program is distributed in the hope that it will be useful,
  9. // but WITHOUT ANY WARRANTY; without even the implied warranty of
  10. // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
  11. // GNU General Public License for more details.
  12. //
  13. // You should have received a copy of the GNU General Public License
  14. // along with this program; if not, write to the Free Software
  15. // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
  16. /**
  17. * OQL syntax analyzer, to be used prior to run the lexical analyzer
  18. *
  19. * @author Erwan Taloc <erwan.taloc@combodo.com>
  20. * @author Romain Quetiez <romain.quetiez@combodo.com>
  21. * @author Denis Flaven <denis.flaven@combodo.com>
  22. * @license http://www.opensource.org/licenses/gpl-3.0.html LGPL
  23. */
  24. // Notes (from the source file: oql-lexer.plex) - Romain
  25. //
  26. // The strval rule is a little bit cryptic.
  27. // This is due to both a bug in the lexer generator and the complexity of our need
  28. // The rule means: either a quoted string with ", or a quoted string with '
  29. // literal " (resp. ') must be escaped by a \
  30. // \ must be escaped by an additional \
  31. //
  32. // Here are the issues and limitation found in the lexer generator:
  33. // * Matching simple quotes is an issue, because regexp are not correctly escaped (and the ESC code is escaped itself)
  34. // Workaround: insert '.chr(39).' which will be a real ' in the end
  35. // * Matching an alternate regexp is an issue because you must specify "|^...."
  36. // and the regexp parser will not accept that syntax
  37. // Workaround: insert '.chr(94).' which will be a real ^
  38. //
  39. // Let's analyze an overview of the regexp, we have
  40. // 1) The strval rule in the lexer definition
  41. // /"([^\\"]|\\"|\\\\)*"|'.chr(94).chr(39).'([^\\'.chr(39).']|\\'.chr(39).'|\\\\)*'.chr(39).'/
  42. // 2) Becomes the php expression in the lexer
  43. // (note the escaped double quotes, hopefully having no effect, but showing where the issue is!)
  44. // $myRegexp = '/^\"([^\\\\\"]|\\\\\"|\\\\\\\\)*\"|'.chr(94).chr(39).'([^\\\\'.chr(39).']|\\\\'.chr(39).'|\\\\\\\\)*'.chr(39).'/';
  45. //
  46. // To be fixed in LexerGenerator/Parser.y, in doLongestMatch (doFirstMatch is ok)
  47. //
  48. //
  49. // Now, let's explain how the regexp has been designed.
  50. // Here is a simplified version, dealing with simple quotes, and based on the assumption that the lexer generator has been fixed!
  51. // The strval rule in the lexer definition
  52. // /'([^\\']*(\\')*(\\\\)*)*'/
  53. // This means anything containing \\ or \' or any other char but a standalone ' or \
  54. // This means ' or \ could not be found without a preceding \
  55. //
  56. class OQLLexerRaw
  57. {
  58. protected $data; // input string
  59. public $token; // token id
  60. public $value; // token string representation
  61. protected $line; // current line
  62. protected $count; // current column
  63. function __construct($data)
  64. {
  65. $this->data = $data;
  66. $this->count = 0;
  67. $this->line = 1;
  68. }
  69. /*!lex2php
  70. %input $this->data
  71. %counter $this->count
  72. %token $this->token
  73. %value $this->value
  74. %line $this->line
  75. %matchlongest 1
  76. whitespace = /[ \t\n\r]+/
  77. select = "SELECT"
  78. from = "FROM"
  79. as_alias = "AS"
  80. where = "WHERE"
  81. join = "JOIN"
  82. on = "ON"
  83. coma = ","
  84. par_open = "("
  85. par_close = ")"
  86. math_div = "/"
  87. math_mult = "*"
  88. math_plus = "+"
  89. math_minus = "-"
  90. log_and = "AND"
  91. log_or = "OR"
  92. regexp = "REGEXP"
  93. eq = "="
  94. not_eq = "!="
  95. gt = ">"
  96. lt = "<"
  97. ge = ">="
  98. le = "<="
  99. like = "LIKE"
  100. not_like = "NOT LIKE"
  101. in = "IN"
  102. not_in = "NOT IN"
  103. interval = "INTERVAL"
  104. f_if = "IF"
  105. f_elt = "ELT"
  106. f_coalesce = "COALESCE"
  107. f_isnull = "ISNULL"
  108. f_concat = "CONCAT"
  109. f_substr = "SUBSTR"
  110. f_trim = "TRIM"
  111. f_date = "DATE"
  112. f_date_format = "DATE_FORMAT"
  113. f_current_date = "CURRENT_DATE"
  114. f_now = "NOW"
  115. f_time = "TIME"
  116. f_to_days = "TO_DAYS"
  117. f_from_days = "FROM_DAYS"
  118. f_year = "YEAR"
  119. f_month = "MONTH"
  120. f_day = "DAY"
  121. f_hour = "HOUR"
  122. f_minute = "MINUTE"
  123. f_second = "SECOND"
  124. f_date_add = "DATE_ADD"
  125. f_date_sub = "DATE_SUB"
  126. f_round = "ROUND"
  127. f_floor = "FLOOR"
  128. f_inet_aton = "INET_ATON"
  129. f_inet_ntoa = "INET_NTOA"
  130. below = "BELOW"
  131. below_strict = "BELOW STRICT"
  132. not_below = "NOT BELOW"
  133. not_below_strict = "NOT BELOW STRICT"
  134. above = "ABOVE"
  135. above_strict = "ABOVE STRICT"
  136. not_above = "NOT ABOVE"
  137. not_above_strict = "NOT ABOVE STRICT"
  138. numval = /[0-9]+|0x[0-9a-fA-F]+/
  139. strval = /"([^\\"]|\\"|\\\\)*"|'.chr(94).chr(39).'([^\\'.chr(39).']|\\'.chr(39).'|\\\\)*'.chr(39).'/
  140. name = /([_a-zA-Z][_a-zA-Z0-9]*|`[^`]+`)/
  141. varname = /:([_a-zA-Z][_a-zA-Z0-9]*->[_a-zA-Z][_a-zA-Z0-9]*|[_a-zA-Z][_a-zA-Z0-9]*)/
  142. dot = "."
  143. */
  144. /*!lex2php
  145. whitespace {
  146. return false;
  147. }
  148. select {
  149. $this->token = OQLParser::SELECT;
  150. }
  151. from {
  152. $this->token = OQLParser::FROM;
  153. }
  154. as_alias {
  155. $this->token = OQLParser::AS_ALIAS;
  156. }
  157. where {
  158. $this->token = OQLParser::WHERE;
  159. }
  160. join {
  161. $this->token = OQLParser::JOIN;
  162. }
  163. on {
  164. $this->token = OQLParser::ON;
  165. }
  166. math_div {
  167. $this->token = OQLParser::MATH_DIV;
  168. }
  169. math_mult {
  170. $this->token = OQLParser::MATH_MULT;
  171. }
  172. math_plus {
  173. $this->token = OQLParser::MATH_PLUS;
  174. }
  175. math_minus {
  176. $this->token = OQLParser::MATH_MINUS;
  177. }
  178. log_and {
  179. $this->token = OQLParser::LOG_AND;
  180. }
  181. log_or {
  182. $this->token = OQLParser::LOG_OR;
  183. }
  184. coma {
  185. $this->token = OQLParser::COMA;
  186. }
  187. par_open {
  188. $this->token = OQLParser::PAR_OPEN;
  189. }
  190. par_close {
  191. $this->token = OQLParser::PAR_CLOSE;
  192. }
  193. regexp {
  194. $this->token = OQLParser::REGEXP;
  195. }
  196. eq {
  197. $this->token = OQLParser::EQ;
  198. }
  199. not_eq {
  200. $this->token = OQLParser::NOT_EQ;
  201. }
  202. gt {
  203. $this->token = OQLParser::GT;
  204. }
  205. lt {
  206. $this->token = OQLParser::LT;
  207. }
  208. ge {
  209. $this->token = OQLParser::GE;
  210. }
  211. le {
  212. $this->token = OQLParser::LE;
  213. }
  214. like {
  215. $this->token = OQLParser::LIKE;
  216. }
  217. not_like {
  218. $this->token = OQLParser::NOT_LIKE;
  219. }
  220. in {
  221. $this->token = OQLParser::IN;
  222. }
  223. not_in {
  224. $this->token = OQLParser::NOT_IN;
  225. }
  226. interval {
  227. $this->token = OQLParser::INTERVAL;
  228. }
  229. f_if {
  230. $this->token = OQLParser::F_IF;
  231. }
  232. f_elt {
  233. $this->token = OQLParser::F_ELT;
  234. }
  235. f_coalesce {
  236. $this->token = OQLParser::F_COALESCE;
  237. }
  238. f_isnull {
  239. $this->token = OQLParser::F_ISNULL;
  240. }
  241. f_concat {
  242. $this->token = OQLParser::F_CONCAT;
  243. }
  244. f_substr {
  245. $this->token = OQLParser::F_SUBSTR;
  246. }
  247. f_trim {
  248. $this->token = OQLParser::F_TRIM;
  249. }
  250. f_date {
  251. $this->token = OQLParser::F_DATE;
  252. }
  253. f_date_format {
  254. $this->token = OQLParser::F_DATE_FORMAT;
  255. }
  256. f_current_date {
  257. $this->token = OQLParser::F_CURRENT_DATE;
  258. }
  259. f_now {
  260. $this->token = OQLParser::F_NOW;
  261. }
  262. f_time {
  263. $this->token = OQLParser::F_TIME;
  264. }
  265. f_to_days {
  266. $this->token = OQLParser::F_TO_DAYS;
  267. }
  268. f_from_days {
  269. $this->token = OQLParser::F_FROM_DAYS;
  270. }
  271. f_year {
  272. $this->token = OQLParser::F_YEAR;
  273. }
  274. f_month {
  275. $this->token = OQLParser::F_MONTH;
  276. }
  277. f_day {
  278. $this->token = OQLParser::F_DAY;
  279. }
  280. f_hour {
  281. $this->token = OQLParser::F_HOUR;
  282. }
  283. f_minute {
  284. $this->token = OQLParser::F_MINUTE;
  285. }
  286. f_second {
  287. $this->token = OQLParser::F_SECOND;
  288. }
  289. f_date_add {
  290. $this->token = OQLParser::F_DATE_ADD;
  291. }
  292. f_date_sub {
  293. $this->token = OQLParser::F_DATE_SUB;
  294. }
  295. f_round {
  296. $this->token = OQLParser::F_ROUND;
  297. }
  298. f_floor {
  299. $this->token = OQLParser::F_FLOOR;
  300. }
  301. f_inet_aton {
  302. $this->token = OQLParser::F_INET_ATON;
  303. }
  304. f_inet_ntoa {
  305. $this->token = OQLParser::F_INET_NTOA;
  306. }
  307. below {
  308. $this->token = OQLParser::BELOW;
  309. }
  310. below_strict {
  311. $this->token = OQLParser::BELOW_STRICT;
  312. }
  313. not_below {
  314. $this->token = OQLParser::NOT_BELOW;
  315. }
  316. not_below_strict {
  317. $this->token = OQLParser::NOT_BELOW_STRICT;
  318. }
  319. above {
  320. $this->token = OQLParser::ABOVE;
  321. }
  322. above_strict {
  323. $this->token = OQLParser::ABOVE_STRICT;
  324. }
  325. not_above {
  326. $this->token = OQLParser::NOT_ABOVE;
  327. }
  328. not_above_strict {
  329. $this->token = OQLParser::NOT_ABOVE_STRICT;
  330. }
  331. numval {
  332. $this->token = OQLParser::NUMVAL;
  333. }
  334. strval {
  335. $this->token = OQLParser::STRVAL;
  336. }
  337. name {
  338. $this->token = OQLParser::NAME;
  339. }
  340. varname {
  341. $this->token = OQLParser::VARNAME;
  342. }
  343. dot {
  344. $this->token = OQLParser::DOT;
  345. }
  346. */
  347. }
  348. define('UNEXPECTED_INPUT_AT_LINE', 'Unexpected input at line');
  349. class OQLLexerException extends OQLException
  350. {
  351. public function __construct($sInput, $iLine, $iCol, $sUnexpected)
  352. {
  353. parent::__construct("Syntax error", $sInput, $iLine, $iCol, $sUnexpected);
  354. }
  355. }
  356. class OQLLexer extends OQLLexerRaw
  357. {
  358. public function getTokenPos()
  359. {
  360. return max(0, $this->count - strlen($this->value));
  361. }
  362. function yylex()
  363. {
  364. try
  365. {
  366. return parent::yylex();
  367. }
  368. catch (Exception $e)
  369. {
  370. $sMessage = $e->getMessage();
  371. if (substr($sMessage, 0, strlen(UNEXPECTED_INPUT_AT_LINE)) == UNEXPECTED_INPUT_AT_LINE)
  372. {
  373. $sLineAndChar = substr($sMessage, strlen(UNEXPECTED_INPUT_AT_LINE));
  374. if (preg_match('#^([0-9]+): (.+)$#', $sLineAndChar, $aMatches))
  375. {
  376. $iLine = $aMatches[1];
  377. $sUnexpected = $aMatches[2];
  378. throw new OQLLexerException($this->data, $iLine, $this->count, $sUnexpected);
  379. }
  380. }
  381. // Default: forward the exception
  382. throw $e;
  383. }
  384. }
  385. }
  386. ?>