123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332 |
- <?php
- /**
- * PHP_LexerGenerator, a php 5 lexer generator.
- *
- * This lexer generator translates a file in a format similar to
- * re2c ({@link http://re2c.org}) and translates it into a PHP 5-based lexer
- *
- * PHP version 5
- *
- * LICENSE:
- *
- * Copyright (c) 2006, Gregory Beaver <cellog@php.net>
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- *
- * * Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * * Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in
- * the documentation and/or other materials provided with the distribution.
- * * Neither the name of the PHP_LexerGenerator nor the names of its
- * contributors may be used to endorse or promote products derived
- * from this software without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
- * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO,
- * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
- * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
- * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
- * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
- * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
- * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
- * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
- * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- *
- * @category php
- * @package PHP_LexerGenerator
- * @author Gregory Beaver <cellog@php.net>
- * @copyright 2006 Gregory Beaver
- * @license http://www.opensource.org/licenses/bsd-license.php New BSD License
- * @version CVS: $Id: LexerGenerator.php 294970 2010-02-12 03:46:38Z clockwerx $
- * @since File available since Release 0.1.0
- */
- /**
- * The Lexer generation parser
- */
- require_once 'PHP/LexerGenerator/Parser.php';
- /**
- * Hand-written lexer for lex2php format files
- */
- require_once 'PHP/LexerGenerator/Lexer.php';
- /**
- * The basic home class for the lexer generator. A lexer scans text and
- * organizes it into tokens for usage by a parser.
- *
- * Sample Usage:
- * <code>
- * require_once 'PHP/LexerGenerator.php';
- * $lex = new PHP_LexerGenerator('/path/to/lexerfile.plex');
- * </code>
- *
- * A file named "/path/to/lexerfile.php" will be created.
- *
- * File format consists of a PHP file containing specially
- * formatted comments like so:
- *
- * <code>
- * /*!lex2php
- * {@*}
- * </code>
- *
- * All lexer definition files must contain at least two lex2php comment blocks:
- * - 1 regex declaration block
- * - 1 or more rule declaration blocks
- *
- * The first lex2php comment is the regex declaration block and must contain
- * several processor instruction as well as defining a name for all
- * regular expressions. Processor instructions start with
- * a "%" symbol and must be:
- *
- * - %counter
- * - %input
- * - %token
- * - %value
- * - %line
- *
- * token and counter should define the class variables used to define lexer input
- * and the index into the input. token and value should be used to define the class
- * variables used to store the token number and its textual value. Finally, line
- * should be used to define the class variable used to define the current line number
- * of scanning.
- *
- * For example:
- * <code>
- * /*!lex2php
- * %counter {$this->N}
- * %input {$this->data}
- * %token {$this->token}
- * %value {$this->value}
- * %line {%this->linenumber}
- * {@*}
- * </code>
- *
- * Patterns consist of an identifier containing an letters or an underscore, and
- * a descriptive match pattern.
- *
- * Descriptive match patterns may either be regular expressions (regexes) or
- * quoted literal strings. Here are some examples:
- *
- * <pre>
- * pattern = "quoted literal"
- * ANOTHER = /[a-zA-Z_]+/
- * COMPLEX = @<([a-zA-Z_]+)( +(([a-zA-Z_]+)=((["\'])([^\6]*)\6))+){0,1}>[^<]*</\1>@
- * </pre>
- *
- * Quoted strings must escape the \ and " characters with \" and \\.
- *
- * Regex patterns must be in Perl-compatible regular expression format (preg).
- * special characters (like \t \n or \x3H) can only be used in regexes, all
- * \ will be escaped in literal strings.
- *
- * Sub-patterns may be defined and back-references (like \1) may be used. Any sub-
- * patterns detected will be passed to the token handler in the variable
- * $yysubmatches.
- *
- * In addition, lookahead expressions, and once-only expressions are allowed.
- * Lookbehind expressions are impossible (scanning always occurs from the
- * current position forward), and recursion (?R) can't work and is not allowed.
- *
- * <code>
- * /*!lex2php
- * %counter {$this->N}
- * %input {$this->data}
- * %token {$this->token}
- * %value {$this->value}
- * %line {%this->linenumber}
- * alpha = /[a-zA-Z]/
- * alphaplus = /[a-zA-Z]+/
- * number = /[0-9]/
- * numerals = /[0-9]+/
- * whitespace = /[ \t\n]+/
- * blah = "$\""
- * blahblah = /a\$/
- * GAMEEND = @(?:1\-0|0\-1|1/2\-1/2)@
- * PAWNMOVE = /P?[a-h]([2-7]|[18]\=(Q|R|B|N))|P?[a-h]x[a-h]([2-7]|[18]\=(Q|R|B|N))/
- * {@*}
- * </code>
- *
- * All regexes must be delimited. Any legal preg delimiter can be used (as in @ or / in
- * the example above)
- *
- * Rule lex2php blocks each define a lexer state. You can optionally name the state
- * with the %statename processor instruction. State names can be used to transfer to
- * a new lexer state with the yybegin() method
- *
- * <code>
- * /*!lexphp
- * %statename INITIAL
- * blah {
- * $this->yybegin(self::INBLAH);
- * // note - $this->yybegin(2) would also work
- * }
- * {@*}
- * /*!lex2php
- * %statename INBLAH
- * ANYTHING {
- * $this->yybegin(self::INITIAL);
- * // note - $this->yybegin(1) would also work
- * }
- * {@*}
- * </code>
- *
- * You can maintain a parser state stack simply by using yypushstate() and
- * yypopstate() instead of yybegin():
- *
- * <code>
- * /*!lexphp
- * %statename INITIAL
- * blah {
- * $this->yypushstate(self::INBLAH);
- * }
- * {@*}
- * /*!lex2php
- * %statename INBLAH
- * ANYTHING {
- * $this->yypopstate();
- * // now INBLAH doesn't care where it was called from
- * }
- * {@*}
- * </code>
- *
- * Code blocks can choose to skip the current token and cycle to the next token by
- * returning "false"
- *
- * <code>
- * /*!lex2php
- * WHITESPACE {
- * return false;
- * }
- * {@*}
- * </code>
- *
- * If you wish to re-process the current token in a new state, simply return true.
- * If you forget to change lexer state, this will cause an unterminated loop,
- * so be careful!
- *
- * <code>
- * /*!lex2php
- * "(" {
- * $this->yypushstate(self::INPARAMS);
- * return true;
- * }
- * {@*}
- * </code>
- *
- * Lastly, if you wish to cycle to the next matching rule, return any value other than
- * true, false or null:
- *
- * <code>
- * /*!lex2php
- * "{@" ALPHA {
- * if ($this->value == '{@internal') {
- * return 'more';
- * }
- * ...
- * }
- * "{@internal" {
- * ...
- * }
- * {@*}
- * </code>
- *
- * Note that this procedure is exceptionally inefficient, and it would be far better
- * to take advantage of PHP_LexerGenerator's top-down precedence and instead code:
- *
- * <code>
- * /*!lex2php
- * "{@internal" {
- * ...
- * }
- * "{@" ALPHA {
- * ...
- * }
- * {@*}
- * </code>
- * @package PHP_LexerGenerator
- * @author Gregory Beaver <cellog@php.net>
- * @copyright 2006 Gregory Beaver
- * @license http://www.php.net/license/3_01.txt PHP License 3.01
- * @version @package_version@
- * @since Class available since Release 0.1.0
- * @example TestLexer.plex Example lexer source
- * @example TestLexer.php Example lexer generated php code
- * @example usage.php Example usage of PHP_LexerGenerator
- * @example Lexer.plex File_ChessPGN lexer source (complex)
- * @example Lexer.php File_ChessPGN lexer generated php code
- */
- class PHP_LexerGenerator
- {
- /**
- * Plex file lexer.
- * @var PHP_LexerGenerator_Lexer
- */
- private $_lex;
- /**
- * Plex file parser.
- * @var PHP_LexerGenerator_Parser
- */
- private $_parser;
- /**
- * Path to the output PHP file.
- * @var string
- */
- private $_outfile;
- /**
- * Debug flag. When set, Parser trace information is generated.
- * @var boolean
- */
- public $debug = false;
- /**
- * Create a lexer generator and optionally generate a lexer file.
- *
- * @param string Optional plex file {@see PHP_LexerGenerator::create}.
- * @param string Optional output file {@see PHP_LexerGenerator::create}.
- */
- function __construct($lexerfile = '', $outfile = '')
- {
- if ($lexerfile) {
- $this -> create($lexerfile, $outfile);
- }
- }
- /**
- * Create a lexer file from its skeleton plex file.
- *
- * @param string Path to the plex file.
- * @param string Optional path to output file. Default is lexerfile with
- * extension of ".php".
- */
- function create($lexerfile, $outfile = '')
- {
- $this->_lex = new PHP_LexerGenerator_Lexer(file_get_contents($lexerfile));
- $info = pathinfo($lexerfile);
- if ($outfile) {
- $this->outfile = $outfile;
- } else {
- $this->outfile = $info['dirname'] . DIRECTORY_SEPARATOR .
- substr($info['basename'], 0,
- strlen($info['basename']) - strlen($info['extension'])) . 'php';
- }
- $this->_parser = new PHP_LexerGenerator_Parser($this->outfile, $this->_lex);
- if ($this -> debug) {
- $this->_parser->PrintTrace();
- }
- while ($this->_lex->advance($this->_parser)) {
- $this->_parser->doParse($this->_lex->token, $this->_lex->value);
- }
- $this->_parser->doParse(0, 0);
- }
- }
- //$a = new PHP_LexerGenerator('/development/File_ChessPGN/ChessPGN/Lexer.plex');
- ?>
|