123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533 |
- <?php
- /**
- * PHP_LexerGenerator, a php 5 lexer generator.
- *
- * This lexer generator translates a file in a format similar to
- * re2c ({@link http://re2c.org}) and translates it into a PHP 5-based lexer
- *
- * PHP version 5
- *
- * LICENSE: This source file is subject to version 3.01 of the PHP license
- * that is available through the world-wide-web at the following URI:
- * http://www.php.net/license/3_01.txt. If you did not receive a copy of
- * the PHP License and are unable to obtain it through the web, please
- * send a note to license@php.net so we can mail you a copy immediately.
- *
- * @category php
- * @package PHP_LexerGenerator
- * @author Gregory Beaver <cellog@php.net>
- * @copyright 2006 Gregory Beaver
- * @license http://www.php.net/license/3_01.txt PHP License 3.01
- * @version CVS: $Id: Lexer.php 246683 2007-11-22 04:43:52Z instance $
- * @since File available since Release 0.1.0
- */
- require_once 'PHP/LexerGenerator/Parser.php';
- /**
- * Token scanner for plex files.
- *
- * This scanner detects comments beginning with "/*!lex2php" and
- * then returns their components (processing instructions, patterns, strings
- * action code, and regexes)
- * @package PHP_LexerGenerator
- * @author Gregory Beaver <cellog@php.net>
- * @copyright 2006 Gregory Beaver
- * @license http://www.php.net/license/3_01.txt PHP License 3.01
- * @version @package_version@
- * @since Class available since Release 0.1.0
- */
- class PHP_LexerGenerator_Lexer
- {
- private $data;
- private $N;
- private $state;
- /**
- * Current line number in input
- * @var int
- */
- public $line;
- /**
- * Number of scanning errors detected
- * @var int
- */
- public $errors = 0;
- /**
- * integer identifier of the current token
- * @var int
- */
- public $token;
- /**
- * string content of current token
- * @var string
- */
- public $value;
- const CODE = PHP_LexerGenerator_Parser::CODE;
- const COMMENTEND = PHP_LexerGenerator_Parser::COMMENTEND;
- const COMMENTSTART = PHP_LexerGenerator_Parser::COMMENTSTART;
- const PATTERN = PHP_LexerGenerator_Parser::PATTERN;
- const PHPCODE = PHP_LexerGenerator_Parser::PHPCODE;
- const PI = PHP_LexerGenerator_Parser::PI;
- const QUOTE = PHP_LexerGenerator_Parser::QUOTE;
- const SINGLEQUOTE = PHP_LexerGenerator_Parser::SINGLEQUOTE;
- const SUBPATTERN = PHP_LexerGenerator_Parser::SUBPATTERN;
- /**
- * prepare scanning
- * @param string the input
- */
- function __construct($data)
- {
- $this->data = str_replace("\r\n", "\n", $data);
- $this->N = 0;
- $this->line = 1;
- $this->state = 'Start';
- $this->errors = 0;
- }
- /**
- * Output an error message
- * @param string
- */
- private function error($msg)
- {
- echo 'Error on line ' . $this->line . ': ' . $msg;
- $this->errors++;
- }
- /**
- * Initial scanning state lexer
- * @return boolean
- */
- private function lexStart()
- {
- if ($this->N >= strlen($this->data)) {
- return false;
- }
- $a = strpos($this->data, '/*!lex2php' . "\n", $this->N);
- if ($a === false) {
- $this->value = substr($this->data, $this->N);
- $this->N = strlen($this->data);
- $this->token = self::PHPCODE;
- return true;
- }
- if ($a > $this->N) {
- $this->value = substr($this->data, $this->N, $a - $this->N);
- $this->N = $a;
- $this->token = self::PHPCODE;
- return true;
- }
- $this->value = '/*!lex2php' . "\n";
- $this->N += 11; // strlen("/*lex2php\n")
- $this->token = self::COMMENTSTART;
- $this->state = 'Declare';
- return true;
- }
- /**
- * lexer for top-level canning state after the initial declaration comment
- * @return boolean
- */
- private function lexStartNonDeclare()
- {
- if ($this->N >= strlen($this->data)) {
- return false;
- }
- $a = strpos($this->data, '/*!lex2php' . "\n", $this->N);
- if ($a === false) {
- $this->value = substr($this->data, $this->N);
- $this->N = strlen($this->data);
- $this->token = self::PHPCODE;
- return true;
- }
- if ($a > $this->N) {
- $this->value = substr($this->data, $this->N, $a - $this->N);
- $this->N = $a;
- $this->token = self::PHPCODE;
- return true;
- }
- $this->value = '/*!lex2php' . "\n";
- $this->N += 11; // strlen("/*lex2php\n")
- $this->token = self::COMMENTSTART;
- $this->state = 'Rule';
- return true;
- }
- /**
- * lexer for declaration comment state
- * @return boolean
- */
- private function lexDeclare()
- {
- while (true) {
- $this -> skipWhitespaceEol();
- if (
- $this->N + 1 >= strlen($this->data)
- || $this->data[$this->N] != '/'
- || $this->data[$this->N + 1] != '/'
- ) {
- break;
- }
- // Skip single-line comment
- while (
- $this->N < strlen($this->data)
- && $this->data[$this->N] != "\n"
- ) {
- ++$this->N;
- }
- }
- if ($this->data[$this->N] == '*' && $this->data[$this->N + 1] == '/') {
- $this->state = 'StartNonDeclare';
- $this->value = '*/';
- $this->N += 2;
- $this->token = self::COMMENTEND;
- return true;
- }
- if (preg_match('/\G%([a-z]+)/', $this->data, $token, null, $this->N)) {
- $this->value = $token[1];
- $this->N += strlen($token[1]) + 1;
- $this->state = 'DeclarePI';
- $this->token = self::PI;
- return true;
- }
- if (preg_match('/\G[a-zA-Z_][a-zA-Z0-9_]*/', $this->data, $token, null, $this->N)) {
- $this->value = $token[0];
- $this->token = self::PATTERN;
- $this->N += strlen($token[0]);
- $this->state = 'DeclareEquals';
- return true;
- }
- $this->error('expecting declaration of sub-patterns');
- return false;
- }
- /**
- * lexer for processor instructions within declaration comment
- * @return boolean
- */
- private function lexDeclarePI()
- {
- $this -> skipWhitespace();
- if ($this->data[$this->N] == "\n") {
- $this->N++;
- $this->state = 'Declare';
- $this->line++;
- return $this->lexDeclare();
- }
- if ($this->data[$this->N] == '{') {
- return $this->lexCode();
- }
- if (!preg_match("/\G[^\n]+/", $this->data, $token, null, $this->N)) {
- $this->error('Unexpected end of file');
- return false;
- }
- $this->value = $token[0];
- $this->N += strlen($this->value);
- $this->token = self::SUBPATTERN;
- return true;
- }
- /**
- * lexer for processor instructions inside rule comments
- * @return boolean
- */
- private function lexDeclarePIRule()
- {
- $this -> skipWhitespace();
- if ($this->data[$this->N] == "\n") {
- $this->N++;
- $this->state = 'Rule';
- $this->line++;
- return $this->lexRule();
- }
- if ($this->data[$this->N] == '{') {
- return $this->lexCode();
- }
- if (!preg_match("/\G[^\n]+/", $this->data, $token, null, $this->N)) {
- $this->error('Unexpected end of file');
- return false;
- }
- $this->value = $token[0];
- $this->N += strlen($this->value);
- $this->token = self::SUBPATTERN;
- return true;
- }
- /**
- * lexer for the state representing scanning between a pattern and the "=" sign
- * @return boolean
- */
- private function lexDeclareEquals()
- {
- $this -> skipWhitespace();
- if ($this->N >= strlen($this->data)) {
- $this->error('unexpected end of input, expecting "=" for sub-pattern declaration');
- }
- if ($this->data[$this->N] != '=') {
- $this->error('expecting "=" for sub-pattern declaration');
- return false;
- }
- $this->N++;
- $this->state = 'DeclareRightside';
- $this -> skipWhitespace();
- if ($this->N >= strlen($this->data)) {
- $this->error('unexpected end of file, expecting right side of sub-pattern declaration');
- return false;
- }
- return $this->lexDeclareRightside();
- }
- /**
- * lexer for the right side of a pattern, detects quotes or regexes
- * @return boolean
- */
- private function lexDeclareRightside()
- {
- if ($this->data[$this->N] == "\n") {
- $this->state = 'lexDeclare';
- $this->N++;
- $this->line++;
- return $this->lexDeclare();
- }
- if ($this->data[$this->N] == '"') {
- return $this->lexQuote();
- }
- if ($this->data[$this->N] == '\'') {
- return $this->lexQuote('\'');
- }
- $this -> skipWhitespace();
- // match a pattern
- $test = $this->data[$this->N];
- $token = $this->N + 1;
- $a = 0;
- do {
- if ($a++) {
- $token++;
- }
- $token = strpos($this->data, $test, $token);
- } while ($token !== false && ($this->data[$token - 1] == '\\'
- && $this->data[$token - 2] != '\\'));
- if ($token === false) {
- $this->error('Unterminated regex pattern (started with "' . $test . '"');
- return false;
- }
- if (substr_count($this->data, "\n", $this->N, $token - $this->N)) {
- $this->error('Regex pattern extends over multiple lines');
- return false;
- }
- $this->value = substr($this->data, $this->N + 1, $token - $this->N - 1);
- // unescape the regex marker
- // we will re-escape when creating the final regex
- $this->value = str_replace('\\' . $test, $test, $this->value);
- $this->N = $token + 1;
- $this->token = self::SUBPATTERN;
- return true;
- }
- /**
- * lexer for quoted literals
- * @return boolean
- */
- private function lexQuote($quote = '"')
- {
- $token = $this->N + 1;
- $a = 0;
- do {
- if ($a++) {
- $token++;
- }
- $token = strpos($this->data, $quote, $token);
- } while ($token !== false && $token < strlen($this->data) &&
- ($this->data[$token - 1] == '\\' && $this->data[$token - 2] != '\\'));
- if ($token === false) {
- $this->error('unterminated quote');
- return false;
- }
- if (substr_count($this->data, "\n", $this->N, $token - $this->N)) {
- $this->error('quote extends over multiple lines');
- return false;
- }
- $this->value = substr($this->data, $this->N + 1, $token - $this->N - 1);
- $this->value = str_replace('\\'.$quote, $quote, $this->value);
- $this->value = str_replace('\\\\', '\\', $this->value);
- $this->N = $token + 1;
- if ($quote == '\'' ) {
- $this->token = self::SINGLEQUOTE;
- } else {
- $this->token = self::QUOTE;
- }
- return true;
- }
- /**
- * lexer for rules
- * @return boolean
- */
- private function lexRule()
- {
- while (
- $this->N < strlen($this->data)
- && (
- $this->data[$this->N] == ' '
- || $this->data[$this->N] == "\t"
- || $this->data[$this->N] == "\n"
- ) || (
- $this->N < strlen($this->data) - 1
- && $this->data[$this->N] == '/'
- && $this->data[$this->N + 1] == '/'
- )
- ) {
- if ( $this->data[$this->N] == '/' && $this->data[$this->N + 1] == '/' ) {
- // Skip single line comments
- $next_newline = strpos($this->data, "\n", $this->N) + 1;
- if ($next_newline) {
- $this->N = $next_newline;
- } else {
- $this->N = sizeof($this->data);
- }
- $this->line++;
- } else {
- if ($this->data[$this->N] == "\n") {
- $this->line++;
- }
- $this->N++; // skip all whitespace
- }
- }
- if ($this->N >= strlen($this->data)) {
- $this->error('unexpected end of input, expecting rule declaration');
- }
- if ($this->data[$this->N] == '*' && $this->data[$this->N + 1] == '/') {
- $this->state = 'StartNonDeclare';
- $this->value = '*/';
- $this->N += 2;
- $this->token = self::COMMENTEND;
- return true;
- }
- if ($this->data[$this->N] == '\'') {
- return $this->lexQuote('\'');
- }
- if (preg_match('/\G%([a-zA-Z_]+)/', $this->data, $token, null, $this->N)) {
- $this->value = $token[1];
- $this->N += strlen($token[1]) + 1;
- $this->state = 'DeclarePIRule';
- $this->token = self::PI;
- return true;
- }
- if ($this->data[$this->N] == "{") {
- return $this->lexCode();
- }
- if ($this->data[$this->N] == '"') {
- return $this->lexQuote();
- }
- if (preg_match('/\G[a-zA-Z_][a-zA-Z0-9_]*/', $this->data, $token, null, $this->N)) {
- $this->value = $token[0];
- $this->N += strlen($token[0]);
- $this->token = self::SUBPATTERN;
- return true;
- } else {
- $this->error('expecting token rule (quotes or sub-patterns)');
- return false;
- }
- }
- /**
- * lexer for php code blocks
- * @return boolean
- */
- private function lexCode()
- {
- $cp = $this->N + 1;
- for ($level = 1; $cp < strlen($this->data) && ($level > 1 || $this->data[$cp] != '}'); $cp++) {
- if ($this->data[$cp] == '{') {
- $level++;
- } elseif ($this->data[$cp] == '}') {
- $level--;
- } elseif ($this->data[$cp] == '/' && $this->data[$cp + 1] == '/') {
- /* Skip C++ style comments */
- $cp += 2;
- $z = strpos($this->data, "\n", $cp);
- if ($z === false) {
- $cp = strlen($this->data);
- break;
- }
- $cp = $z;
- } elseif ($this->data[$cp] == "'" || $this->data[$cp] == '"') {
- /* String a character literals */
- $startchar = $this->data[$cp];
- $prevc = 0;
- for ($cp++; $cp < strlen($this->data) && ($this->data[$cp] != $startchar || $prevc === '\\'); $cp++) {
- if ($prevc === '\\') {
- $prevc = 0;
- } else {
- $prevc = $this->data[$cp];
- }
- }
- }
- }
- if ($cp >= strlen($this->data)) {
- $this->error("PHP code starting on this line is not terminated before the end of the file.");
- $this->error++;
- return false;
- } else {
- $this->value = substr($this->data, $this->N + 1, $cp - $this->N - 1);
- $this->token = self::CODE;
- $this->N = $cp + 1;
- return true;
- }
- }
- /**
- * Skip whitespace characters
- */
- private function skipWhitespace() {
- while (
- $this->N < strlen($this->data)
- && (
- $this->data[$this->N] == ' '
- || $this->data[$this->N] == "\t"
- )
- ) {
- $this->N++; // skip whitespace
- }
- }
- /**
- * Skip whitespace and EOL characters
- */
- private function skipWhitespaceEol() {
- while (
- $this->N < strlen($this->data)
- && (
- $this->data[$this->N] == ' '
- || $this->data[$this->N] == "\t"
- || $this->data[$this->N] == "\n"
- )
- ) {
- if ($this->data[$this->N] == "\n") {
- ++$this -> line;
- }
- $this->N++; // skip whitespace
- }
- }
- /**
- * Primary scanner
- *
- * In addition to lexing, this properly increments the line number of lexing.
- * This calls the proper sub-lexer based on the parser state
- * @param unknown_type $parser
- * @return unknown
- */
- public function advance($parser)
- {
- if ($this->N >= strlen($this->data)) {
- return false;
- }
- if ($this->{'lex' . $this->state}()) {
- $this->line += substr_count($this->value, "\n");
- return true;
- }
- return false;
- }
- }
- ?>
|