* All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * * * Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * * Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in * the documentation and/or other materials provided with the distribution. * * Neither the name of the PHP_LexerGenerator nor the names of its * contributors may be used to endorse or promote products derived * from this software without specific prior written permission. * * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS * IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. * * @category php * @package PHP_LexerGenerator * @author Gregory Beaver * @copyright 2006 Gregory Beaver * @license http://www.opensource.org/licenses/bsd-license.php New BSD License * @version CVS: $Id: LexerGenerator.php 294970 2010-02-12 03:46:38Z clockwerx $ * @since File available since Release 0.1.0 */ /** * The Lexer generation parser */ require_once 'PHP/LexerGenerator/Parser.php'; /** * Hand-written lexer for lex2php format files */ require_once 'PHP/LexerGenerator/Lexer.php'; /** * The basic home class for the lexer generator. A lexer scans text and * organizes it into tokens for usage by a parser. * * Sample Usage: * * require_once 'PHP/LexerGenerator.php'; * $lex = new PHP_LexerGenerator('/path/to/lexerfile.plex'); * * * A file named "/path/to/lexerfile.php" will be created. * * File format consists of a PHP file containing specially * formatted comments like so: * * * /*!lex2php * {@*} * * * All lexer definition files must contain at least two lex2php comment blocks: * - 1 regex declaration block * - 1 or more rule declaration blocks * * The first lex2php comment is the regex declaration block and must contain * several processor instruction as well as defining a name for all * regular expressions. Processor instructions start with * a "%" symbol and must be: * * - %counter * - %input * - %token * - %value * - %line * * token and counter should define the class variables used to define lexer input * and the index into the input. token and value should be used to define the class * variables used to store the token number and its textual value. Finally, line * should be used to define the class variable used to define the current line number * of scanning. * * For example: * * /*!lex2php * %counter {$this->N} * %input {$this->data} * %token {$this->token} * %value {$this->value} * %line {%this->linenumber} * {@*} * * * Patterns consist of an identifier containing an letters or an underscore, and * a descriptive match pattern. * * Descriptive match patterns may either be regular expressions (regexes) or * quoted literal strings. Here are some examples: * *
 * pattern = "quoted literal"
 * ANOTHER = /[a-zA-Z_]+/
 * COMPLEX = @<([a-zA-Z_]+)( +(([a-zA-Z_]+)=((["\'])([^\6]*)\6))+){0,1}>[^<]*@
 * 
* * Quoted strings must escape the \ and " characters with \" and \\. * * Regex patterns must be in Perl-compatible regular expression format (preg). * special characters (like \t \n or \x3H) can only be used in regexes, all * \ will be escaped in literal strings. * * Sub-patterns may be defined and back-references (like \1) may be used. Any sub- * patterns detected will be passed to the token handler in the variable * $yysubmatches. * * In addition, lookahead expressions, and once-only expressions are allowed. * Lookbehind expressions are impossible (scanning always occurs from the * current position forward), and recursion (?R) can't work and is not allowed. * * * /*!lex2php * %counter {$this->N} * %input {$this->data} * %token {$this->token} * %value {$this->value} * %line {%this->linenumber} * alpha = /[a-zA-Z]/ * alphaplus = /[a-zA-Z]+/ * number = /[0-9]/ * numerals = /[0-9]+/ * whitespace = /[ \t\n]+/ * blah = "$\"" * blahblah = /a\$/ * GAMEEND = @(?:1\-0|0\-1|1/2\-1/2)@ * PAWNMOVE = /P?[a-h]([2-7]|[18]\=(Q|R|B|N))|P?[a-h]x[a-h]([2-7]|[18]\=(Q|R|B|N))/ * {@*} * * * All regexes must be delimited. Any legal preg delimiter can be used (as in @ or / in * the example above) * * Rule lex2php blocks each define a lexer state. You can optionally name the state * with the %statename processor instruction. State names can be used to transfer to * a new lexer state with the yybegin() method * * * /*!lexphp * %statename INITIAL * blah { * $this->yybegin(self::INBLAH); * // note - $this->yybegin(2) would also work * } * {@*} * /*!lex2php * %statename INBLAH * ANYTHING { * $this->yybegin(self::INITIAL); * // note - $this->yybegin(1) would also work * } * {@*} * * * You can maintain a parser state stack simply by using yypushstate() and * yypopstate() instead of yybegin(): * * * /*!lexphp * %statename INITIAL * blah { * $this->yypushstate(self::INBLAH); * } * {@*} * /*!lex2php * %statename INBLAH * ANYTHING { * $this->yypopstate(); * // now INBLAH doesn't care where it was called from * } * {@*} * * * Code blocks can choose to skip the current token and cycle to the next token by * returning "false" * * * /*!lex2php * WHITESPACE { * return false; * } * {@*} * * * If you wish to re-process the current token in a new state, simply return true. * If you forget to change lexer state, this will cause an unterminated loop, * so be careful! * * * /*!lex2php * "(" { * $this->yypushstate(self::INPARAMS); * return true; * } * {@*} * * * Lastly, if you wish to cycle to the next matching rule, return any value other than * true, false or null: * * * /*!lex2php * "{@" ALPHA { * if ($this->value == '{@internal') { * return 'more'; * } * ... * } * "{@internal" { * ... * } * {@*} * * * Note that this procedure is exceptionally inefficient, and it would be far better * to take advantage of PHP_LexerGenerator's top-down precedence and instead code: * * * /*!lex2php * "{@internal" { * ... * } * "{@" ALPHA { * ... * } * {@*} * * @package PHP_LexerGenerator * @author Gregory Beaver * @copyright 2006 Gregory Beaver * @license http://www.php.net/license/3_01.txt PHP License 3.01 * @version @package_version@ * @since Class available since Release 0.1.0 * @example TestLexer.plex Example lexer source * @example TestLexer.php Example lexer generated php code * @example usage.php Example usage of PHP_LexerGenerator * @example Lexer.plex File_ChessPGN lexer source (complex) * @example Lexer.php File_ChessPGN lexer generated php code */ class PHP_LexerGenerator { /** * Plex file lexer. * @var PHP_LexerGenerator_Lexer */ private $_lex; /** * Plex file parser. * @var PHP_LexerGenerator_Parser */ private $_parser; /** * Path to the output PHP file. * @var string */ private $_outfile; /** * Debug flag. When set, Parser trace information is generated. * @var boolean */ public $debug = false; /** * Create a lexer generator and optionally generate a lexer file. * * @param string Optional plex file {@see PHP_LexerGenerator::create}. * @param string Optional output file {@see PHP_LexerGenerator::create}. */ function __construct($lexerfile = '', $outfile = '') { if ($lexerfile) { $this -> create($lexerfile, $outfile); } } /** * Create a lexer file from its skeleton plex file. * * @param string Path to the plex file. * @param string Optional path to output file. Default is lexerfile with * extension of ".php". */ function create($lexerfile, $outfile = '') { $this->_lex = new PHP_LexerGenerator_Lexer(file_get_contents($lexerfile)); $info = pathinfo($lexerfile); if ($outfile) { $this->outfile = $outfile; } else { $this->outfile = $info['dirname'] . DIRECTORY_SEPARATOR . substr($info['basename'], 0, strlen($info['basename']) - strlen($info['extension'])) . 'php'; } $this->_parser = new PHP_LexerGenerator_Parser($this->outfile, $this->_lex); if ($this -> debug) { $this->_parser->PrintTrace(); } while ($this->_lex->advance($this->_parser)) { $this->_parser->doParse($this->_lex->token, $this->_lex->value); } $this->_parser->doParse(0, 0); } } //$a = new PHP_LexerGenerator('/development/File_ChessPGN/ChessPGN/Lexer.plex'); ?>