1: <?php
2:
3: 4: 5: 6:
7:
8: namespace Latte;
9:
10:
11: 12: 13:
14: class Parser extends Object
15: {
16:
17: const RE_STRING = '\'(?:\\\\.|[^\'\\\\])*+\'|"(?:\\\\.|[^"\\\\])*+"';
18:
19:
20: const N_PREFIX = 'n:';
21:
22:
23: public $defaultSyntax = 'latte';
24:
25:
26: public $shortNoEscape = FALSE;
27:
28:
29: public $syntaxes = array(
30: 'latte' => array('\\{(?![\\s\'"{}])', '\\}'),
31: 'double' => array('\\{\\{(?![\\s\'"{}])', '\\}\\}'),
32: 'asp' => array('<%\s*', '\s*%>'),
33: 'python' => array('\\{[{%]\s*', '\s*[%}]\\}'),
34: 'off' => array('[^\x00-\xFF]', ''),
35: );
36:
37:
38: private $delimiters;
39:
40:
41: private $input;
42:
43:
44: private $output;
45:
46:
47: private $offset;
48:
49:
50: private $context;
51:
52:
53: private $lastHtmlTag;
54:
55:
56: private $syntaxEndTag;
57:
58:
59: private $syntaxEndLevel = 0;
60:
61:
62: private $xmlMode;
63:
64:
65: const CONTEXT_HTML_TEXT = 'htmlText',
66: CONTEXT_CDATA = 'cdata',
67: CONTEXT_HTML_TAG = 'htmlTag',
68: CONTEXT_HTML_ATTRIBUTE = 'htmlAttribute',
69: CONTEXT_RAW = 'raw',
70: = 'htmlComment',
71: CONTEXT_MACRO = 'macro';
72:
73:
74: 75: 76: 77: 78:
79: public function parse($input)
80: {
81: $this->offset = 0;
82:
83: if (substr($input, 0, 3) === "\xEF\xBB\xBF") {
84: $input = substr($input, 3);
85: }
86: if (!preg_match('##u', $input)) {
87: throw new \InvalidArgumentException('Template is not valid UTF-8 stream.');
88: }
89: $input = str_replace("\r\n", "\n", $input);
90: $this->input = $input;
91: $this->output = array();
92: $tokenCount = 0;
93:
94: $this->setSyntax($this->defaultSyntax);
95: $this->setContext(self::CONTEXT_HTML_TEXT);
96: $this->lastHtmlTag = $this->syntaxEndTag = NULL;
97:
98: while ($this->offset < strlen($input)) {
99: if ($this->{'context' . $this->context[0]}() === FALSE) {
100: break;
101: }
102: while ($tokenCount < count($this->output)) {
103: $this->filter($this->output[$tokenCount++]);
104: }
105: }
106: if ($this->context[0] === self::CONTEXT_MACRO) {
107: throw new CompileException('Malformed macro');
108: }
109:
110: if ($this->offset < strlen($input)) {
111: $this->addToken(Token::TEXT, substr($this->input, $this->offset));
112: }
113: return $this->output;
114: }
115:
116:
117: 118: 119:
120: private function contextHtmlText()
121: {
122: $matches = $this->match('~
123: (?:(?<=\n|^)[ \t]*)?<(?P<closing>/?)(?P<tag>[a-z0-9:]+)| ## begin of HTML tag <tag </tag - ignores <!DOCTYPE
124: <(?P<htmlcomment>!--(?!>))| ## begin of HTML comment <!--, but not <!-->
125: (?P<macro>' . $this->delimiters[0] . ')
126: ~xsi');
127:
128: if (!empty($matches['htmlcomment'])) {
129: $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
130: $this->setContext(self::CONTEXT_HTML_COMMENT);
131:
132: } elseif (!empty($matches['tag'])) {
133: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
134: $token->name = $matches['tag'];
135: $token->closing = (bool) $matches['closing'];
136: $this->lastHtmlTag = $matches['closing'] . strtolower($matches['tag']);
137: $this->setContext(self::CONTEXT_HTML_TAG);
138:
139: } else {
140: return $this->processMacro($matches);
141: }
142: }
143:
144:
145: 146: 147:
148: private function contextCData()
149: {
150: $matches = $this->match('~
151: </(?P<tag>' . $this->lastHtmlTag . ')(?![a-z0-9:])| ## end HTML tag </tag
152: (?P<macro>' . $this->delimiters[0] . ')
153: ~xsi');
154:
155: if (!empty($matches['tag'])) {
156: $token = $this->addToken(Token::HTML_TAG_BEGIN, $matches[0]);
157: $token->name = $this->lastHtmlTag;
158: $token->closing = TRUE;
159: $this->lastHtmlTag = '/' . $this->lastHtmlTag;
160: $this->setContext(self::CONTEXT_HTML_TAG);
161: } else {
162: return $this->processMacro($matches);
163: }
164: }
165:
166:
167: 168: 169:
170: private function contextHtmlTag()
171: {
172: $matches = $this->match('~
173: (?P<end>\ ?/?>)([ \t]*\n)?| ## end of HTML tag
174: (?P<macro>' . $this->delimiters[0] . ')|
175: \s*(?P<attr>[^\s/>={]+)(?:\s*=\s*(?P<value>["\']|[^\s/>{]+))? ## beginning of HTML attribute
176: ~xsi');
177:
178: if (!empty($matches['end'])) {
179: $this->addToken(Token::HTML_TAG_END, $matches[0]);
180: $this->setContext(!$this->xmlMode && in_array($this->lastHtmlTag, array('script', 'style'), TRUE) ? self::CONTEXT_CDATA : self::CONTEXT_HTML_TEXT);
181:
182: } elseif (isset($matches['attr']) && $matches['attr'] !== '') {
183: $token = $this->addToken(Token::HTML_ATTRIBUTE, $matches[0]);
184: $token->name = $matches['attr'];
185: $token->value = isset($matches['value']) ? $matches['value'] : '';
186:
187: if ($token->value === '"' || $token->value === "'") {
188: if (strncmp($token->name, self::N_PREFIX, strlen(self::N_PREFIX)) === 0) {
189: $token->value = '';
190: if ($m = $this->match('~(.*?)' . $matches['value'] . '~xsi')) {
191: $token->value = $m[1];
192: $token->text .= $m[0];
193: }
194: } else {
195: $this->setContext(self::CONTEXT_HTML_ATTRIBUTE, $matches['value']);
196: }
197: }
198: } else {
199: return $this->processMacro($matches);
200: }
201: }
202:
203:
204: 205: 206:
207: private function contextHtmlAttribute()
208: {
209: $matches = $this->match('~
210: (?P<quote>' . $this->context[1] . ')| ## end of HTML attribute
211: (?P<macro>' . $this->delimiters[0] . ')
212: ~xsi');
213:
214: if (!empty($matches['quote'])) {
215: $this->addToken(Token::TEXT, $matches[0]);
216: $this->setContext(self::CONTEXT_HTML_TAG);
217: } else {
218: return $this->processMacro($matches);
219: }
220: }
221:
222:
223: 224: 225:
226: private function ()
227: {
228: $matches = $this->match('~
229: (?P<htmlcomment>-->)| ## end of HTML comment
230: (?P<macro>' . $this->delimiters[0] . ')
231: ~xsi');
232:
233: if (!empty($matches['htmlcomment'])) {
234: $this->addToken(Token::HTML_TAG_END, $matches[0]);
235: $this->setContext(self::CONTEXT_HTML_TEXT);
236: } else {
237: return $this->processMacro($matches);
238: }
239: }
240:
241:
242: 243: 244:
245: private function ()
246: {
247: $matches = $this->match('~
248: (?P<macro>' . $this->delimiters[0] . ')
249: ~xsi');
250: return $this->processMacro($matches);
251: }
252:
253:
254: 255: 256:
257: private function contextMacro()
258: {
259: $matches = $this->match('~
260: (?P<comment>\\*.*?\\*' . $this->delimiters[1] . '\n{0,2})|
261: (?P<macro>(?>
262: ' . self::RE_STRING . '|
263: \{(?>' . self::RE_STRING . '|[^\'"{}])*+\}|
264: [^\'"{}]
265: )+?)
266: ' . $this->delimiters[1] . '
267: (?P<rmargin>[ \t]*(?=\n))?
268: ~xsiA');
269:
270: if (!empty($matches['macro'])) {
271: $token = $this->addToken(Token::MACRO_TAG, $this->context[1][1] . $matches[0]);
272: list($token->name, $token->value, $token->modifiers, $token->empty) = $this->parseMacroTag($matches['macro']);
273: $this->context = $this->context[1][0];
274:
275: } elseif (!empty($matches['comment'])) {
276: $this->addToken(Token::COMMENT, $this->context[1][1] . $matches[0]);
277: $this->context = $this->context[1][0];
278:
279: } else {
280: throw new CompileException('Malformed macro');
281: }
282: }
283:
284:
285: private function processMacro($matches)
286: {
287: if (!empty($matches['macro'])) {
288: $this->setContext(self::CONTEXT_MACRO, array($this->context, $matches['macro']));
289: } else {
290: return FALSE;
291: }
292: }
293:
294:
295: 296: 297: 298: 299:
300: private function match($re)
301: {
302: if (!preg_match($re, $this->input, $matches, PREG_OFFSET_CAPTURE, $this->offset)) {
303: if (preg_last_error()) {
304: throw new RegexpException(NULL, preg_last_error());
305: }
306: return array();
307: }
308:
309: $value = substr($this->input, $this->offset, $matches[0][1] - $this->offset);
310: if ($value !== '') {
311: $this->addToken(Token::TEXT, $value);
312: }
313: $this->offset = $matches[0][1] + strlen($matches[0][0]);
314: foreach ($matches as $k => $v) {
315: $matches[$k] = $v[0];
316: }
317: return $matches;
318: }
319:
320:
321: 322: 323:
324: public function setContentType($type)
325: {
326: if (strpos($type, 'html') !== FALSE) {
327: $this->xmlMode = FALSE;
328: $this->setContext(self::CONTEXT_HTML_TEXT);
329: } elseif (strpos($type, 'xml') !== FALSE) {
330: $this->xmlMode = TRUE;
331: $this->setContext(self::CONTEXT_HTML_TEXT);
332: } else {
333: $this->setContext(self::CONTEXT_RAW);
334: }
335: return $this;
336: }
337:
338:
339: 340: 341:
342: public function setContext($context, $quote = NULL)
343: {
344: $this->context = array($context, $quote);
345: return $this;
346: }
347:
348:
349: 350: 351: 352: 353:
354: public function setSyntax($type)
355: {
356: $type = $type ?: $this->defaultSyntax;
357: if (isset($this->syntaxes[$type])) {
358: $this->setDelimiters($this->syntaxes[$type][0], $this->syntaxes[$type][1]);
359: } else {
360: throw new \InvalidArgumentException("Unknown syntax '$type'");
361: }
362: return $this;
363: }
364:
365:
366: 367: 368: 369: 370: 371:
372: public function setDelimiters($left, $right)
373: {
374: $this->delimiters = array($left, $right);
375: return $this;
376: }
377:
378:
379: 380: 381: 382: 383: 384:
385: public function parseMacroTag($tag)
386: {
387: if (!preg_match('~^
388: (
389: (?P<name>\?|/?[a-z]\w*+(?:[.:]\w+)*+(?!::|\(|\\\\))| ## ?, name, /name, but not function( or class:: or namespace\
390: (?P<noescape>!?)(?P<shortname>/?[=\~#%^&_]?) ## !expression, !=expression, ...
391: )(?P<args>.*?)
392: (?P<modifiers>\|[a-z](?:' . self::RE_STRING . '|[^\'"/]|/(?=.))*+)?
393: (?P<empty>/?\z)
394: ()\z~isx', $tag, $match)) {
395: if (preg_last_error()) {
396: throw new RegexpException(NULL, preg_last_error());
397: }
398: return FALSE;
399: }
400: if ($match['name'] === '') {
401: $match['name'] = $match['shortname'] ?: '=';
402: if ($match['noescape']) {
403: if (!$this->shortNoEscape) {
404: trigger_error("The noescape shortcut {!...} is deprecated, use {...|noescape} modifier on line {$this->getLine()}.", E_USER_DEPRECATED);
405: }
406: $match['modifiers'] .= '|noescape';
407: }
408: }
409: return array($match['name'], trim($match['args']), $match['modifiers'], (bool) $match['empty']);
410: }
411:
412:
413: private function addToken($type, $text)
414: {
415: $this->output[] = $token = new Token;
416: $token->type = $type;
417: $token->text = $text;
418: $token->line = $this->getLine();
419: return $token;
420: }
421:
422:
423: public function getLine()
424: {
425: return $this->offset
426: ? substr_count(substr($this->input, 0, $this->offset - 1), "\n") + 1
427: : 0;
428: }
429:
430:
431: 432: 433:
434: protected function filter(Token $token)
435: {
436: if ($token->type === Token::MACRO_TAG && $token->name === '/syntax') {
437: $this->setSyntax($this->defaultSyntax);
438: $token->type = Token::COMMENT;
439:
440: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'syntax') {
441: $this->setSyntax($token->value);
442: $token->type = Token::COMMENT;
443:
444: } elseif ($token->type === Token::HTML_ATTRIBUTE && $token->name === 'n:syntax') {
445: $this->setSyntax($token->value);
446: $this->syntaxEndTag = $this->lastHtmlTag;
447: $this->syntaxEndLevel = 1;
448: $token->type = Token::COMMENT;
449:
450: } elseif ($token->type === Token::HTML_TAG_BEGIN && $this->lastHtmlTag === $this->syntaxEndTag) {
451: $this->syntaxEndLevel++;
452:
453: } elseif ($token->type === Token::HTML_TAG_END && $this->lastHtmlTag === ('/' . $this->syntaxEndTag) && --$this->syntaxEndLevel === 0) {
454: $this->setSyntax($this->defaultSyntax);
455:
456: } elseif ($token->type === Token::MACRO_TAG && $token->name === 'contentType') {
457: $this->setContentType($token->value);
458: }
459: }
460:
461: }
462: