Parser.y 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795
  1. %name PHP_LexerGenerator_Parser
  2. %declare_class {class PHP_LexerGenerator_Parser}
  3. %include {
  4. /* ?><?php {//*/
  5. /**
  6. * PHP_LexerGenerator, a php 5 lexer generator.
  7. *
  8. * This lexer generator translates a file in a format similar to
  9. * re2c ({@link http://re2c.org}) and translates it into a PHP 5-based lexer
  10. *
  11. * PHP version 5
  12. *
  13. * LICENSE:
  14. *
  15. * Copyright (c) 2006, Gregory Beaver <cellog@php.net>
  16. * All rights reserved.
  17. *
  18. * Redistribution and use in source and binary forms, with or without
  19. * modification, are permitted provided that the following conditions
  20. * are met:
  21. *
  22. * * Redistributions of source code must retain the above copyright
  23. * notice, this list of conditions and the following disclaimer.
  24. * * Redistributions in binary form must reproduce the above copyright
  25. * notice, this list of conditions and the following disclaimer in
  26. * the documentation and/or other materials provided with the distribution.
  27. * * Neither the name of the PHP_LexerGenerator nor the names of its
  28. * contributors may be used to endorse or promote products derived
  29. * from this software without specific prior written permission.
  30. *
  31. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
  32. * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  33. * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  34. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  35. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  36. * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  37. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  38. * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  39. * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  40. * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  41. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. *
  43. * @category php
  44. * @package PHP_LexerGenerator
  45. * @author Gregory Beaver <cellog@php.net>
  46. * @copyright 2006 Gregory Beaver
  47. * @license http://www.opensource.org/licenses/bsd-license.php New BSD License
  48. * @version CVS: $Id: Parser.y 246683 2007-11-22 04:43:52Z instance $
  49. * @since File available since Release 0.1.0
  50. */
  51. /**
  52. * For regular expression validation
  53. */
  54. require_once 'PHP/LexerGenerator/Regex/Lexer.php';
  55. require_once 'PHP/LexerGenerator/Regex/Parser.php';
  56. require_once 'PHP/LexerGenerator/Exception.php';
  57. /**
  58. * Token parser for plex files.
  59. *
  60. * This parser converts tokens pulled from {@link PHP_LexerGenerator_Lexer}
  61. * into abstract patterns and rules, then creates the output file
  62. * @package PHP_LexerGenerator
  63. * @author Gregory Beaver <cellog@php.net>
  64. * @copyright 2006 Gregory Beaver
  65. * @license http://www.php.net/license/3_01.txt PHP License 3.01
  66. * @version @package_version@
  67. * @since Class available since Release 0.1.0
  68. */
  69. }
  70. %syntax_error {
  71. echo "Syntax Error on line " . $this->lex->line . ": token '" .
  72. $this->lex->value . "' while parsing rule:";
  73. foreach ($this->yystack as $entry) {
  74. echo $this->tokenName($entry->major) . ' ';
  75. }
  76. foreach ($this->yy_get_expected_tokens($yymajor) as $token) {
  77. $expect[] = self::$yyTokenName[$token];
  78. }
  79. throw new Exception('Unexpected ' . $this->tokenName($yymajor) . '(' . $TOKEN
  80. . '), expected one of: ' . implode(',', $expect));
  81. }
  82. %include_class {
  83. private $patterns;
  84. private $out;
  85. private $lex;
  86. private $input;
  87. private $counter;
  88. private $token;
  89. private $value;
  90. private $line;
  91. private $matchlongest;
  92. private $_regexLexer;
  93. private $_regexParser;
  94. private $_patternIndex = 0;
  95. private $_outRuleIndex = 1;
  96. private $caseinsensitive;
  97. private $patternFlags;
  98. private $unicode;
  99. public $transTable = array(
  100. 1 => self::PHPCODE,
  101. 2 => self::COMMENTSTART,
  102. 3 => self::COMMENTEND,
  103. 4 => self::QUOTE,
  104. 5 => self::SINGLEQUOTE,
  105. 6 => self::PATTERN,
  106. 7 => self::CODE,
  107. 8 => self::SUBPATTERN,
  108. 9 => self::PI,
  109. );
  110. function __construct($outfile, $lex)
  111. {
  112. $this->out = fopen($outfile, 'wb');
  113. if (!$this->out) {
  114. throw new Exception('unable to open lexer output file "' . $outfile . '"');
  115. }
  116. $this->lex = $lex;
  117. $this->_regexLexer = new PHP_LexerGenerator_Regex_Lexer('');
  118. $this->_regexParser = new PHP_LexerGenerator_Regex_Parser($this->_regexLexer);
  119. }
  120. function doLongestMatch($rules, $statename, $ruleindex)
  121. {
  122. fwrite($this->out, '
  123. if (' . $this->counter . ' >= strlen(' . $this->input . ')) {
  124. return false; // end of input
  125. }
  126. do {
  127. $rules = array(');
  128. foreach ($rules as $rule) {
  129. fwrite($this->out, '
  130. \'/\G' . $rule['pattern'] . '/' . $this->patternFlags . ' \',');
  131. }
  132. fwrite($this->out, '
  133. );
  134. $match = false;
  135. foreach ($rules as $index => $rule) {
  136. if (preg_match($rule, substr(' . $this->input . ', ' .
  137. $this->counter . '), $yymatches)) {
  138. if ($match) {
  139. if (strlen($yymatches[0]) > strlen($match[0][0])) {
  140. $match = array($yymatches, $index); // matches, token
  141. }
  142. } else {
  143. $match = array($yymatches, $index);
  144. }
  145. }
  146. }
  147. if (!$match) {
  148. throw new Exception(\'Unexpected input at line \' . ' . $this->line . ' .
  149. \': \' . ' . $this->input . '[' . $this->counter . ']);
  150. }
  151. ' . $this->token . ' = $match[1];
  152. ' . $this->value . ' = $match[0][0];
  153. $yysubmatches = $match[0];
  154. array_shift($yysubmatches);
  155. if (!$yysubmatches) {
  156. $yysubmatches = array();
  157. }
  158. $r = $this->{\'yy_r' . $ruleindex . '_\' . ' . $this->token . '}($yysubmatches);
  159. if ($r === null) {
  160. ' . $this->counter . ' += strlen(' . $this->value . ');
  161. ' . $this->line . ' += substr_count(' . $this->value . ', "\n");
  162. // accept this token
  163. return true;
  164. } elseif ($r === true) {
  165. // we have changed state
  166. // process this token in the new state
  167. return $this->yylex();
  168. } elseif ($r === false) {
  169. ' . $this->counter . ' += strlen(' . $this->value . ');
  170. ' . $this->line . ' += substr_count(' . $this->value . ', "\n");
  171. if (' . $this->counter . ' >= strlen(' . $this->input . ')) {
  172. return false; // end of input
  173. }
  174. // skip this token
  175. continue;
  176. } else {');
  177. fwrite($this->out, '
  178. $yy_yymore_patterns = array_slice($rules, $this->token, true);
  179. // yymore is needed
  180. do {
  181. if (!isset($yy_yymore_patterns[' . $this->token . '])) {
  182. throw new Exception(\'cannot do yymore for the last token\');
  183. }
  184. $match = false;
  185. foreach ($yy_yymore_patterns[' . $this->token . '] as $index => $rule) {
  186. if (preg_match(\'/\' . $rule . \'/' . $this->patternFlags . '\',
  187. ' . $this->input . ', $yymatches, null, ' . $this->counter . ')) {
  188. $yymatches = array_filter($yymatches, \'strlen\'); // remove empty sub-patterns
  189. if ($match) {
  190. if (strlen($yymatches[0]) > strlen($match[0][0])) {
  191. $match = array($yymatches, $index); // matches, token
  192. }
  193. } else {
  194. $match = array($yymatches, $index);
  195. }
  196. }
  197. }
  198. if (!$match) {
  199. throw new Exception(\'Unexpected input at line \' . ' . $this->line . ' .
  200. \': \' . ' . $this->input . '[' . $this->counter . ']);
  201. }
  202. ' . $this->token . ' = $match[1];
  203. ' . $this->value . ' = $match[0][0];
  204. $yysubmatches = $match[0];
  205. array_shift($yysubmatches);
  206. if (!$yysubmatches) {
  207. $yysubmatches = array();
  208. }
  209. ' . $this->line . ' = substr_count(' . $this->value . ', "\n");
  210. $r = $this->{\'yy_r' . $ruleindex . '_\' . ' . $this->token . '}();
  211. } while ($r !== null || !$r);
  212. if ($r === true) {
  213. // we have changed state
  214. // process this token in the new state
  215. return $this->yylex();
  216. } else {
  217. // accept
  218. ' . $this->counter . ' += strlen(' . $this->value . ');
  219. ' . $this->line . ' += substr_count(' . $this->value . ', "\n");
  220. return true;
  221. }
  222. }
  223. } while (true);
  224. ');
  225. }
  226. function doFirstMatch($rules, $statename, $ruleindex)
  227. {
  228. $patterns = array();
  229. $pattern = '/';
  230. $ruleMap = array();
  231. $tokenindex = array();
  232. $actualindex = 1;
  233. $i = 0;
  234. foreach ($rules as $rule) {
  235. $ruleMap[$i++] = $actualindex;
  236. $tokenindex[$actualindex] = $rule['subpatterns'];
  237. $actualindex += $rule['subpatterns'] + 1;
  238. $patterns[] = '\G(' . $rule['pattern'] . ')';
  239. }
  240. // Re-index tokencount from zero.
  241. $tokencount = array_values($tokenindex);
  242. $tokenindex = var_export($tokenindex, true);
  243. $tokenindex = explode("\n", $tokenindex);
  244. // indent for prettiness
  245. $tokenindex = implode("\n ", $tokenindex);
  246. $pattern .= implode('|', $patterns);
  247. $pattern .= '/' . $this->patternFlags;
  248. fwrite($this->out, '
  249. $tokenMap = ' . $tokenindex . ';
  250. if (' . $this->counter . ' >= strlen(' . $this->input . ')) {
  251. return false; // end of input
  252. }
  253. ');
  254. fwrite($this->out, '$yy_global_pattern = \'' .
  255. $pattern . '\';' . "\n");
  256. fwrite($this->out, '
  257. do {
  258. if (preg_match($yy_global_pattern,' . $this->input . ', $yymatches, null, ' .
  259. $this->counter .
  260. ')) {
  261. $yysubmatches = $yymatches;
  262. $yymatches = array_filter($yymatches, \'strlen\'); // remove empty sub-patterns
  263. if (!count($yymatches)) {
  264. throw new Exception(\'Error: lexing failed because a rule matched\' .
  265. \' an empty string. Input "\' . substr(' . $this->input . ',
  266. ' . $this->counter . ', 5) . \'... state ' . $statename . '\');
  267. }
  268. next($yymatches); // skip global match
  269. ' . $this->token . ' = key($yymatches); // token number
  270. if ($tokenMap[' . $this->token . ']) {
  271. // extract sub-patterns for passing to lex function
  272. $yysubmatches = array_slice($yysubmatches, ' . $this->token . ' + 1,
  273. $tokenMap[' . $this->token . ']);
  274. } else {
  275. $yysubmatches = array();
  276. }
  277. ' . $this->value . ' = current($yymatches); // token value
  278. $r = $this->{\'yy_r' . $ruleindex . '_\' . ' . $this->token . '}($yysubmatches);
  279. if ($r === null) {
  280. ' . $this->counter . ' += strlen(' . $this->value . ');
  281. ' . $this->line . ' += substr_count(' . $this->value . ', "\n");
  282. // accept this token
  283. return true;
  284. } elseif ($r === true) {
  285. // we have changed state
  286. // process this token in the new state
  287. return $this->yylex();
  288. } elseif ($r === false) {
  289. ' . $this->counter . ' += strlen(' . $this->value . ');
  290. ' . $this->line . ' += substr_count(' . $this->value . ', "\n");
  291. if (' . $this->counter . ' >= strlen(' . $this->input . ')) {
  292. return false; // end of input
  293. }
  294. // skip this token
  295. continue;
  296. } else {');
  297. fwrite($this->out, '
  298. $yy_yymore_patterns = array(' . "\n");
  299. $extra = 0;
  300. for($i = 0; count($patterns); $i++) {
  301. unset($patterns[$i]);
  302. $extra += $tokencount[0];
  303. array_shift($tokencount);
  304. fwrite($this->out, ' ' . $ruleMap[$i] . ' => array(' . $extra . ', "' .
  305. implode('|', $patterns) . "\"),\n");
  306. }
  307. fwrite($this->out, ' );' . "\n");
  308. fwrite($this->out, '
  309. // yymore is needed
  310. do {
  311. if (!strlen($yy_yymore_patterns[' . $this->token . '][1])) {
  312. throw new Exception(\'cannot do yymore for the last token\');
  313. }
  314. $yysubmatches = array();
  315. if (preg_match(\'/\' . $yy_yymore_patterns[' . $this->token . '][1] . \'/' . $this->patternFlags . '\',
  316. ' . $this->input . ', $yymatches, null, ' . $this->counter .')) {
  317. $yysubmatches = $yymatches;
  318. $yymatches = array_filter($yymatches, \'strlen\'); // remove empty sub-patterns
  319. next($yymatches); // skip global match
  320. ' . $this->token . ' += key($yymatches) + $yy_yymore_patterns[' . $this->token . '][0]; // token number
  321. ' . $this->value . ' = current($yymatches); // token value
  322. ' . $this->line . ' = substr_count(' . $this->value . ', "\n");
  323. if ($tokenMap[' . $this->token . ']) {
  324. // extract sub-patterns for passing to lex function
  325. $yysubmatches = array_slice($yysubmatches, ' . $this->token . ' + 1,
  326. $tokenMap[' . $this->token . ']);
  327. } else {
  328. $yysubmatches = array();
  329. }
  330. }
  331. $r = $this->{\'yy_r' . $ruleindex . '_\' . ' . $this->token . '}($yysubmatches);
  332. } while ($r !== null && !is_bool($r));
  333. if ($r === true) {
  334. // we have changed state
  335. // process this token in the new state
  336. return $this->yylex();
  337. } elseif ($r === false) {
  338. ' . $this->counter . ' += strlen(' . $this->value . ');
  339. ' . $this->line . ' += substr_count(' . $this->value . ', "\n");
  340. if (' . $this->counter . ' >= strlen(' . $this->input . ')) {
  341. return false; // end of input
  342. }
  343. // skip this token
  344. continue;
  345. } else {
  346. // accept
  347. ' . $this->counter . ' += strlen(' . $this->value . ');
  348. ' . $this->line . ' += substr_count(' . $this->value . ', "\n");
  349. return true;
  350. }
  351. }
  352. } else {
  353. throw new Exception(\'Unexpected input at line\' . ' . $this->line . ' .
  354. \': \' . ' . $this->input . '[' . $this->counter . ']);
  355. }
  356. break;
  357. } while (true);
  358. ');
  359. }
  360. function makeCaseInsensitve($string)
  361. {
  362. return preg_replace('/[a-z]/ie', "'[\\0'.strtoupper('\\0').']'", strtolower($string));
  363. }
  364. function outputRules($rules, $statename)
  365. {
  366. if (!$statename) {
  367. $statename = $this -> _outRuleIndex;
  368. }
  369. fwrite($this->out, '
  370. function yylex' . $this -> _outRuleIndex . '()
  371. {');
  372. if ($this->matchlongest) {
  373. $ruleMap = array();
  374. foreach ($rules as $i => $rule) {
  375. $ruleMap[$i] = $i;
  376. }
  377. $this->doLongestMatch($rules, $statename, $this -> _outRuleIndex);
  378. } else {
  379. $ruleMap = array();
  380. $actualindex = 1;
  381. $i = 0;
  382. foreach ($rules as $rule) {
  383. $ruleMap[$i++] = $actualindex;
  384. $actualindex += $rule['subpatterns'] + 1;
  385. }
  386. $this->doFirstMatch($rules, $statename, $this -> _outRuleIndex);
  387. }
  388. fwrite($this->out, '
  389. } // end function
  390. ');
  391. if (is_string($statename)) {
  392. fwrite($this->out, '
  393. const ' . $statename . ' = ' . $this -> _outRuleIndex . ';
  394. ');
  395. }
  396. foreach ($rules as $i => $rule) {
  397. fwrite($this->out, ' function yy_r' . $this -> _outRuleIndex . '_' . $ruleMap[$i] . '($yy_subpatterns)
  398. {
  399. ' . $rule['code'] .
  400. ' }
  401. ');
  402. }
  403. $this -> _outRuleIndex++; // for next set of rules
  404. }
  405. function error($msg)
  406. {
  407. echo 'Error on line ' . $this->lex->line . ': ' , $msg;
  408. }
  409. function _validatePattern($pattern, $update = false)
  410. {
  411. $this->_regexLexer->reset($pattern, $this->lex->line);
  412. $this->_regexParser->reset($this->_patternIndex, $update);
  413. try {
  414. while ($this->_regexLexer->yylex()) {
  415. $this->_regexParser->doParse(
  416. $this->_regexLexer->token, $this->_regexLexer->value);
  417. }
  418. $this->_regexParser->doParse(0, 0);
  419. } catch (PHP_LexerGenerator_Exception $e) {
  420. $this->error($e->getMessage());
  421. throw new PHP_LexerGenerator_Exception('Invalid pattern "' . $pattern . '"');
  422. }
  423. return $this->_regexParser->result;
  424. }
  425. }
  426. start ::= lexfile.
  427. lexfile ::= declare rules(B). {
  428. fwrite($this->out, '
  429. private $_yy_state = 1;
  430. private $_yy_stack = array();
  431. function yylex()
  432. {
  433. return $this->{\'yylex\' . $this->_yy_state}();
  434. }
  435. function yypushstate($state)
  436. {
  437. array_push($this->_yy_stack, $this->_yy_state);
  438. $this->_yy_state = $state;
  439. }
  440. function yypopstate()
  441. {
  442. $this->_yy_state = array_pop($this->_yy_stack);
  443. }
  444. function yybegin($state)
  445. {
  446. $this->_yy_state = $state;
  447. }
  448. ');
  449. foreach (B as $rule) {
  450. $this->outputRules($rule['rules'], $rule['statename']);
  451. if ($rule['code']) {
  452. fwrite($this->out, $rule['code']);
  453. }
  454. }
  455. }
  456. lexfile ::= declare(D) PHPCODE(B) rules(C). {
  457. fwrite($this->out, '
  458. private $_yy_state = 1;
  459. private $_yy_stack = array();
  460. function yylex()
  461. {
  462. return $this->{\'yylex\' . $this->_yy_state}();
  463. }
  464. function yypushstate($state)
  465. {
  466. array_push($this->_yy_stack, $this->_yy_state);
  467. $this->_yy_state = $state;
  468. }
  469. function yypopstate()
  470. {
  471. $this->_yy_state = array_pop($this->_yy_stack);
  472. }
  473. function yybegin($state)
  474. {
  475. $this->_yy_state = $state;
  476. }
  477. ');
  478. if (strlen(B)) {
  479. fwrite($this->out, B);
  480. }
  481. foreach (C as $rule) {
  482. $this->outputRules($rule['rules'], $rule['statename']);
  483. if ($rule['code']) {
  484. fwrite($this->out, $rule['code']);
  485. }
  486. }
  487. }
  488. lexfile ::= PHPCODE(B) declare(D) rules(C). {
  489. if (strlen(B)) {
  490. fwrite($this->out, B);
  491. }
  492. fwrite($this->out, '
  493. private $_yy_state = 1;
  494. private $_yy_stack = array();
  495. function yylex()
  496. {
  497. return $this->{\'yylex\' . $this->_yy_state}();
  498. }
  499. function yypushstate($state)
  500. {
  501. array_push($this->_yy_stack, $this->_yy_state);
  502. $this->_yy_state = $state;
  503. }
  504. function yypopstate()
  505. {
  506. $this->_yy_state = array_pop($this->_yy_stack);
  507. }
  508. function yybegin($state)
  509. {
  510. $this->_yy_state = $state;
  511. }
  512. ');
  513. foreach (C as $rule) {
  514. $this->outputRules($rule['rules'], $rule['statename']);
  515. if ($rule['code']) {
  516. fwrite($this->out, $rule['code']);
  517. }
  518. }
  519. }
  520. lexfile ::= PHPCODE(A) declare(D) PHPCODE(B) rules(C). {
  521. if (strlen(A)) {
  522. fwrite($this->out, A);
  523. }
  524. fwrite($this->out, '
  525. private $_yy_state = 1;
  526. private $_yy_stack = array();
  527. function yylex()
  528. {
  529. return $this->{\'yylex\' . $this->_yy_state}();
  530. }
  531. function yypushstate($state)
  532. {
  533. array_push($this->_yy_stack, $this->_yy_state);
  534. $this->_yy_state = $state;
  535. }
  536. function yypopstate()
  537. {
  538. $this->_yy_state = array_pop($this->_yy_stack);
  539. }
  540. function yybegin($state)
  541. {
  542. $this->_yy_state = $state;
  543. }
  544. ');
  545. if (strlen(B)) {
  546. fwrite($this->out, B);
  547. }
  548. foreach (C as $rule) {
  549. $this->outputRules($rule['rules'], $rule['statename']);
  550. if ($rule['code']) {
  551. fwrite($this->out, $rule['code']);
  552. }
  553. }
  554. }
  555. declare(A) ::= COMMENTSTART declarations(B) COMMENTEND. {
  556. A = B;
  557. $this->patterns = B['patterns'];
  558. $this->_patternIndex = 1;
  559. }
  560. declarations(A) ::= processing_instructions(B) pattern_declarations(C). {
  561. $expected = array(
  562. 'counter' => true,
  563. 'input' => true,
  564. 'token' => true,
  565. 'value' => true,
  566. 'line' => true,
  567. );
  568. foreach (B as $pi) {
  569. if (isset($expected[$pi['pi']])) {
  570. unset($expected[$pi['pi']]);
  571. continue;
  572. }
  573. if (count($expected)) {
  574. throw new Exception('Processing Instructions "' .
  575. implode(', ', array_keys($expected)) . '" must be defined');
  576. }
  577. }
  578. $expected = array(
  579. 'caseinsensitive' => true,
  580. 'counter' => true,
  581. 'input' => true,
  582. 'token' => true,
  583. 'value' => true,
  584. 'line' => true,
  585. 'matchlongest' => true,
  586. 'unicode' => true,
  587. );
  588. foreach (B as $pi) {
  589. if (isset($expected[$pi['pi']])) {
  590. $this->{$pi['pi']} = $pi['definition'];
  591. if ($pi['pi'] == 'matchlongest') {
  592. $this->matchlongest = true;
  593. }
  594. continue;
  595. }
  596. $this->error('Unknown processing instruction %' . $pi['pi'] .
  597. ', should be one of "' . implode(', ', array_keys($expected)) . '"');
  598. }
  599. $this->patternFlags = ($this->caseinsensitive ? 'i' : '')
  600. . ($this->unicode ? 'u' : '');
  601. A = array('patterns' => C, 'pis' => B);
  602. $this->_patternIndex = 1;
  603. }
  604. processing_instructions(A) ::= PI(B) SUBPATTERN(C). {
  605. A = array(array('pi' => B, 'definition' => C));
  606. }
  607. processing_instructions(A) ::= PI(B) CODE(C). {
  608. A = array(array('pi' => B, 'definition' => C));
  609. }
  610. processing_instructions(A) ::= processing_instructions(P) PI(B) SUBPATTERN(C). {
  611. A = P;
  612. A[] = array('pi' => B, 'definition' => C);
  613. }
  614. processing_instructions(A) ::= processing_instructions(P) PI(B) CODE(C). {
  615. A = P;
  616. A[] = array('pi' => B, 'definition' => C);
  617. }
  618. pattern_declarations(A) ::= PATTERN(B) subpattern(C). {
  619. A = array(B => C);
  620. // reset internal indicator of where we are in a pattern
  621. $this->_patternIndex = 0;
  622. }
  623. pattern_declarations(A) ::= pattern_declarations(B) PATTERN(C) subpattern(D). {
  624. A = B;
  625. if (isset(A[C])) {
  626. throw new Exception('Pattern "' . C . '" is already defined as "' .
  627. A[C] . '", cannot redefine as "' . D->string . '"');
  628. }
  629. A[C] = D;
  630. // reset internal indicator of where we are in a pattern declaration
  631. $this->_patternIndex = 0;
  632. }
  633. rules(A) ::= COMMENTSTART rule(B) COMMENTEND. {
  634. A = array(array('rules' => B, 'code' => '', 'statename' => ''));
  635. }
  636. rules(A) ::= COMMENTSTART PI(P) SUBPATTERN(S) rule(B) COMMENTEND. {
  637. if (P != 'statename') {
  638. throw new Exception('Error: only %statename processing instruction ' .
  639. 'is allowed in rule sections (found ' . P . ').');
  640. }
  641. A = array(array('rules' => B, 'code' => '', 'statename' => S));
  642. }
  643. rules(A) ::= COMMENTSTART rule(B) COMMENTEND PHPCODE(C). {
  644. A = array(array('rules' => B, 'code' => C, 'statename' => ''));
  645. }
  646. rules(A) ::= COMMENTSTART PI(P) SUBPATTERN(S) rule(B) COMMENTEND PHPCODE(C). {
  647. if (P != 'statename') {
  648. throw new Exception('Error: only %statename processing instruction ' .
  649. 'is allowed in rule sections (found ' . P . ').');
  650. }
  651. A = array(array('rules' => B, 'code' => C, 'statename' => S));
  652. $this->_patternIndex = 1;
  653. }
  654. rules(A) ::= reset_rules(R) rule(B) COMMENTEND. {
  655. A = R;
  656. A[] = array('rules' => B, 'code' => '', 'statename' => '');
  657. $this->_patternIndex = 1;
  658. }
  659. rules(A) ::= reset_rules(R) PI(P) SUBPATTERN(S) rule(B) COMMENTEND. {
  660. if (P != 'statename') {
  661. throw new Exception('Error: only %statename processing instruction ' .
  662. 'is allowed in rule sections (found ' . P . ').');
  663. }
  664. A = R;
  665. A[] = array('rules' => B, 'code' => '', 'statename' => S);
  666. }
  667. rules(A) ::= reset_rules(R) rule(B) COMMENTEND PHPCODE(C). {
  668. A = R;
  669. A[] = array('rules' => B, 'code' => C, 'statename' => '');
  670. }
  671. rules(A) ::= reset_rules(R) PI(P) SUBPATTERN(S) rule(B) COMMENTEND PHPCODE(C). {
  672. if (P != 'statename') {
  673. throw new Exception('Error: only %statename processing instruction ' .
  674. 'is allowed in rule sections (found ' . P . ').');
  675. }
  676. A = R;
  677. A[] = array('rules' => B, 'code' => C, 'statename' => S);
  678. }
  679. reset_rules(A) ::= rules(R) COMMENTSTART. {
  680. A = R;
  681. $this->_patternIndex = 1;
  682. }
  683. rule(A) ::= rule_subpattern(B) CODE(C). {
  684. $name = B[1];
  685. B = B[0];
  686. B = $this->_validatePattern(B);
  687. $this->_patternIndex += B['subpatterns'] + 1;
  688. if (@preg_match('/' . str_replace('/', '\\/', B['pattern']) . '/', '')) {
  689. $this->error('Rule "' . $name . '" can match the empty string, this will break lexing');
  690. }
  691. A = array(array('pattern' => str_replace('/', '\\/', B->string), 'code' => C, 'subpatterns' => B['subpatterns']));
  692. }
  693. rule(A) ::= rule(R) rule_subpattern(B) CODE(C).{
  694. A = R;
  695. $name = B[1];
  696. B = B[0];
  697. B = $this->_validatePattern(B);
  698. $this->_patternIndex += B['subpatterns'] + 1;
  699. if (@preg_match('/' . str_replace('/', '\\/', B['pattern']) . '/', '')) {
  700. $this->error('Rule "' . $name . '" can match the empty string, this will break lexing');
  701. }
  702. A[] = array('pattern' => str_replace('/', '\\/', B->string), 'code' => C, 'subpatterns' => B['subpatterns']);
  703. }
  704. rule_subpattern(A) ::= QUOTE(B). {
  705. A = array(preg_quote(B, '/'), B);
  706. }
  707. rule_subpattern(A) ::= SINGLEQUOTE(B). {
  708. A = array($this->makeCaseInsensitve(preg_quote(B, '/')), B);
  709. }
  710. rule_subpattern(A) ::= SUBPATTERN(B). {
  711. if (!isset($this->patterns[B])) {
  712. $this->error('Undefined pattern "' . B . '" used in rules');
  713. throw new Exception('Undefined pattern "' . B . '" used in rules');
  714. }
  715. A = array($this->patterns[B], B);
  716. }
  717. rule_subpattern(A) ::= rule_subpattern(B) QUOTE(C). {
  718. A = array(B[0] . preg_quote(C, '/'), B[1] . ' ' . C);
  719. }
  720. rule_subpattern(A) ::= rule_subpattern(B) SINGLEQUOTE(C). {
  721. A = array(B[0] . $this->makeCaseInsensitve(preg_quote(C, '/')), B[1] . ' ' . C);
  722. }
  723. rule_subpattern(A) ::= rule_subpattern(B) SUBPATTERN(C). {
  724. if (!isset($this->patterns[C])) {
  725. $this->error('Undefined pattern "' . C . '" used in rules');
  726. throw new Exception('Undefined pattern "' . C . '" used in rules');
  727. }
  728. A = array(B[0] . $this->patterns[C], B[1] . ' ' . C);
  729. }
  730. subpattern(A) ::= QUOTE(B). {
  731. A = preg_quote(B, '/');
  732. }
  733. subpattern(A) ::= SINGLEQUOTE(B). {
  734. A = $this->makeCaseInsensitve(preg_quote(B, '/'));
  735. }
  736. subpattern(A) ::= SUBPATTERN(B). {
  737. // increment internal sub-pattern counter
  738. // adjust back-references in pattern based on previous pattern
  739. $test = $this->_validatePattern(B, true);
  740. $this->_patternIndex += $test['subpatterns'];
  741. A = $test['pattern'];
  742. }
  743. subpattern(A) ::= subpattern(B) QUOTE(C). {
  744. A = B . preg_quote(C, '/');
  745. }
  746. subpattern(A) ::= subpattern(B) SINGLEQUOTE(C). {
  747. A = B . $this->makeCaseInsensitve(preg_quote(C, '/'));
  748. }
  749. subpattern(A) ::= subpattern(B) SUBPATTERN(C). {
  750. // increment internal sub-pattern counter
  751. // adjust back-references in pattern based on previous pattern
  752. $test = $this->_validatePattern(C, true);
  753. $this->_patternIndex += $test['subpatterns'];
  754. A = B . $test['pattern'];
  755. }