LexerGenerator.php 9.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332
  1. <?php
  2. /**
  3. * PHP_LexerGenerator, a php 5 lexer generator.
  4. *
  5. * This lexer generator translates a file in a format similar to
  6. * re2c ({@link http://re2c.org}) and translates it into a PHP 5-based lexer
  7. *
  8. * PHP version 5
  9. *
  10. * LICENSE:
  11. *
  12. * Copyright (c) 2006, Gregory Beaver <cellog@php.net>
  13. * All rights reserved.
  14. *
  15. * Redistribution and use in source and binary forms, with or without
  16. * modification, are permitted provided that the following conditions
  17. * are met:
  18. *
  19. * * Redistributions of source code must retain the above copyright
  20. * notice, this list of conditions and the following disclaimer.
  21. * * Redistributions in binary form must reproduce the above copyright
  22. * notice, this list of conditions and the following disclaimer in
  23. * the documentation and/or other materials provided with the distribution.
  24. * * Neither the name of the PHP_LexerGenerator nor the names of its
  25. * contributors may be used to endorse or promote products derived
  26. * from this software without specific prior written permission.
  27. *
  28. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
  29. * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
  30. * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
  31. * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  32. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
  33. * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
  34. * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
  35. * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
  36. * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
  37. * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
  38. * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  39. *
  40. * @category php
  41. * @package PHP_LexerGenerator
  42. * @author Gregory Beaver <cellog@php.net>
  43. * @copyright 2006 Gregory Beaver
  44. * @license http://www.opensource.org/licenses/bsd-license.php New BSD License
  45. * @version CVS: $Id: LexerGenerator.php 294970 2010-02-12 03:46:38Z clockwerx $
  46. * @since File available since Release 0.1.0
  47. */
  48. /**
  49. * The Lexer generation parser
  50. */
  51. require_once 'PHP/LexerGenerator/Parser.php';
  52. /**
  53. * Hand-written lexer for lex2php format files
  54. */
  55. require_once 'PHP/LexerGenerator/Lexer.php';
  56. /**
  57. * The basic home class for the lexer generator. A lexer scans text and
  58. * organizes it into tokens for usage by a parser.
  59. *
  60. * Sample Usage:
  61. * <code>
  62. * require_once 'PHP/LexerGenerator.php';
  63. * $lex = new PHP_LexerGenerator('/path/to/lexerfile.plex');
  64. * </code>
  65. *
  66. * A file named "/path/to/lexerfile.php" will be created.
  67. *
  68. * File format consists of a PHP file containing specially
  69. * formatted comments like so:
  70. *
  71. * <code>
  72. * /*!lex2php
  73. * {@*}
  74. * </code>
  75. *
  76. * All lexer definition files must contain at least two lex2php comment blocks:
  77. * - 1 regex declaration block
  78. * - 1 or more rule declaration blocks
  79. *
  80. * The first lex2php comment is the regex declaration block and must contain
  81. * several processor instruction as well as defining a name for all
  82. * regular expressions. Processor instructions start with
  83. * a "%" symbol and must be:
  84. *
  85. * - %counter
  86. * - %input
  87. * - %token
  88. * - %value
  89. * - %line
  90. *
  91. * token and counter should define the class variables used to define lexer input
  92. * and the index into the input. token and value should be used to define the class
  93. * variables used to store the token number and its textual value. Finally, line
  94. * should be used to define the class variable used to define the current line number
  95. * of scanning.
  96. *
  97. * For example:
  98. * <code>
  99. * /*!lex2php
  100. * %counter {$this->N}
  101. * %input {$this->data}
  102. * %token {$this->token}
  103. * %value {$this->value}
  104. * %line {%this->linenumber}
  105. * {@*}
  106. * </code>
  107. *
  108. * Patterns consist of an identifier containing an letters or an underscore, and
  109. * a descriptive match pattern.
  110. *
  111. * Descriptive match patterns may either be regular expressions (regexes) or
  112. * quoted literal strings. Here are some examples:
  113. *
  114. * <pre>
  115. * pattern = "quoted literal"
  116. * ANOTHER = /[a-zA-Z_]+/
  117. * COMPLEX = @<([a-zA-Z_]+)( +(([a-zA-Z_]+)=((["\'])([^\6]*)\6))+){0,1}>[^<]*</\1>@
  118. * </pre>
  119. *
  120. * Quoted strings must escape the \ and " characters with \" and \\.
  121. *
  122. * Regex patterns must be in Perl-compatible regular expression format (preg).
  123. * special characters (like \t \n or \x3H) can only be used in regexes, all
  124. * \ will be escaped in literal strings.
  125. *
  126. * Sub-patterns may be defined and back-references (like \1) may be used. Any sub-
  127. * patterns detected will be passed to the token handler in the variable
  128. * $yysubmatches.
  129. *
  130. * In addition, lookahead expressions, and once-only expressions are allowed.
  131. * Lookbehind expressions are impossible (scanning always occurs from the
  132. * current position forward), and recursion (?R) can't work and is not allowed.
  133. *
  134. * <code>
  135. * /*!lex2php
  136. * %counter {$this->N}
  137. * %input {$this->data}
  138. * %token {$this->token}
  139. * %value {$this->value}
  140. * %line {%this->linenumber}
  141. * alpha = /[a-zA-Z]/
  142. * alphaplus = /[a-zA-Z]+/
  143. * number = /[0-9]/
  144. * numerals = /[0-9]+/
  145. * whitespace = /[ \t\n]+/
  146. * blah = "$\""
  147. * blahblah = /a\$/
  148. * GAMEEND = @(?:1\-0|0\-1|1/2\-1/2)@
  149. * PAWNMOVE = /P?[a-h]([2-7]|[18]\=(Q|R|B|N))|P?[a-h]x[a-h]([2-7]|[18]\=(Q|R|B|N))/
  150. * {@*}
  151. * </code>
  152. *
  153. * All regexes must be delimited. Any legal preg delimiter can be used (as in @ or / in
  154. * the example above)
  155. *
  156. * Rule lex2php blocks each define a lexer state. You can optionally name the state
  157. * with the %statename processor instruction. State names can be used to transfer to
  158. * a new lexer state with the yybegin() method
  159. *
  160. * <code>
  161. * /*!lexphp
  162. * %statename INITIAL
  163. * blah {
  164. * $this->yybegin(self::INBLAH);
  165. * // note - $this->yybegin(2) would also work
  166. * }
  167. * {@*}
  168. * /*!lex2php
  169. * %statename INBLAH
  170. * ANYTHING {
  171. * $this->yybegin(self::INITIAL);
  172. * // note - $this->yybegin(1) would also work
  173. * }
  174. * {@*}
  175. * </code>
  176. *
  177. * You can maintain a parser state stack simply by using yypushstate() and
  178. * yypopstate() instead of yybegin():
  179. *
  180. * <code>
  181. * /*!lexphp
  182. * %statename INITIAL
  183. * blah {
  184. * $this->yypushstate(self::INBLAH);
  185. * }
  186. * {@*}
  187. * /*!lex2php
  188. * %statename INBLAH
  189. * ANYTHING {
  190. * $this->yypopstate();
  191. * // now INBLAH doesn't care where it was called from
  192. * }
  193. * {@*}
  194. * </code>
  195. *
  196. * Code blocks can choose to skip the current token and cycle to the next token by
  197. * returning "false"
  198. *
  199. * <code>
  200. * /*!lex2php
  201. * WHITESPACE {
  202. * return false;
  203. * }
  204. * {@*}
  205. * </code>
  206. *
  207. * If you wish to re-process the current token in a new state, simply return true.
  208. * If you forget to change lexer state, this will cause an unterminated loop,
  209. * so be careful!
  210. *
  211. * <code>
  212. * /*!lex2php
  213. * "(" {
  214. * $this->yypushstate(self::INPARAMS);
  215. * return true;
  216. * }
  217. * {@*}
  218. * </code>
  219. *
  220. * Lastly, if you wish to cycle to the next matching rule, return any value other than
  221. * true, false or null:
  222. *
  223. * <code>
  224. * /*!lex2php
  225. * "{@" ALPHA {
  226. * if ($this->value == '{@internal') {
  227. * return 'more';
  228. * }
  229. * ...
  230. * }
  231. * "{@internal" {
  232. * ...
  233. * }
  234. * {@*}
  235. * </code>
  236. *
  237. * Note that this procedure is exceptionally inefficient, and it would be far better
  238. * to take advantage of PHP_LexerGenerator's top-down precedence and instead code:
  239. *
  240. * <code>
  241. * /*!lex2php
  242. * "{@internal" {
  243. * ...
  244. * }
  245. * "{@" ALPHA {
  246. * ...
  247. * }
  248. * {@*}
  249. * </code>
  250. * @package PHP_LexerGenerator
  251. * @author Gregory Beaver <cellog@php.net>
  252. * @copyright 2006 Gregory Beaver
  253. * @license http://www.php.net/license/3_01.txt PHP License 3.01
  254. * @version @package_version@
  255. * @since Class available since Release 0.1.0
  256. * @example TestLexer.plex Example lexer source
  257. * @example TestLexer.php Example lexer generated php code
  258. * @example usage.php Example usage of PHP_LexerGenerator
  259. * @example Lexer.plex File_ChessPGN lexer source (complex)
  260. * @example Lexer.php File_ChessPGN lexer generated php code
  261. */
  262. class PHP_LexerGenerator
  263. {
  264. /**
  265. * Plex file lexer.
  266. * @var PHP_LexerGenerator_Lexer
  267. */
  268. private $_lex;
  269. /**
  270. * Plex file parser.
  271. * @var PHP_LexerGenerator_Parser
  272. */
  273. private $_parser;
  274. /**
  275. * Path to the output PHP file.
  276. * @var string
  277. */
  278. private $_outfile;
  279. /**
  280. * Debug flag. When set, Parser trace information is generated.
  281. * @var boolean
  282. */
  283. public $debug = false;
  284. /**
  285. * Create a lexer generator and optionally generate a lexer file.
  286. *
  287. * @param string Optional plex file {@see PHP_LexerGenerator::create}.
  288. * @param string Optional output file {@see PHP_LexerGenerator::create}.
  289. */
  290. function __construct($lexerfile = '', $outfile = '')
  291. {
  292. if ($lexerfile) {
  293. $this -> create($lexerfile, $outfile);
  294. }
  295. }
  296. /**
  297. * Create a lexer file from its skeleton plex file.
  298. *
  299. * @param string Path to the plex file.
  300. * @param string Optional path to output file. Default is lexerfile with
  301. * extension of ".php".
  302. */
  303. function create($lexerfile, $outfile = '')
  304. {
  305. $this->_lex = new PHP_LexerGenerator_Lexer(file_get_contents($lexerfile));
  306. $info = pathinfo($lexerfile);
  307. if ($outfile) {
  308. $this->outfile = $outfile;
  309. } else {
  310. $this->outfile = $info['dirname'] . DIRECTORY_SEPARATOR .
  311. substr($info['basename'], 0,
  312. strlen($info['basename']) - strlen($info['extension'])) . 'php';
  313. }
  314. $this->_parser = new PHP_LexerGenerator_Parser($this->outfile, $this->_lex);
  315. if ($this -> debug) {
  316. $this->_parser->PrintTrace();
  317. }
  318. while ($this->_lex->advance($this->_parser)) {
  319. $this->_parser->doParse($this->_lex->token, $this->_lex->value);
  320. }
  321. $this->_parser->doParse(0, 0);
  322. }
  323. }
  324. //$a = new PHP_LexerGenerator('/development/File_ChessPGN/ChessPGN/Lexer.plex');
  325. ?>