friendship ended with social-app. php is my new best friend
1<?php
2/**
3 * @file
4 * The rules for generating output in the serializer.
5 *
6 * These output rules are likely to generate output similar to the document that
7 * was parsed. It is not intended to output exactly the document that was parsed.
8 */
9
10namespace Masterminds\HTML5\Serializer;
11
12use Masterminds\HTML5\Elements;
13
14/**
15 * Generate the output html5 based on element rules.
16 */
17class OutputRules implements RulesInterface
18{
19 /**
20 * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0.
21 */
22 const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml';
23
24 const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML';
25
26 const NAMESPACE_SVG = 'http://www.w3.org/2000/svg';
27
28 const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink';
29
30 const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace';
31
32 const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/';
33
34 /**
35 * Holds the HTML5 element names that causes a namespace switch.
36 *
37 * @var array
38 */
39 protected $implicitNamespaces = array(
40 self::NAMESPACE_HTML,
41 self::NAMESPACE_SVG,
42 self::NAMESPACE_MATHML,
43 self::NAMESPACE_XML,
44 self::NAMESPACE_XMLNS,
45 );
46
47 const IM_IN_HTML = 1;
48
49 const IM_IN_SVG = 2;
50
51 const IM_IN_MATHML = 3;
52
53 /**
54 * Used as cache to detect if is available ENT_HTML5.
55 *
56 * @var bool
57 */
58 private $hasHTML5 = false;
59
60 protected $traverser;
61
62 protected $encode = false;
63
64 protected $out;
65
66 protected $outputMode;
67
68 private $xpath;
69
70 protected $nonBooleanAttributes = array(
71 /*
72 array(
73 'nodeNamespace'=>'http://www.w3.org/1999/xhtml',
74 'attrNamespace'=>'http://www.w3.org/1999/xhtml',
75
76 'nodeName'=>'img', 'nodeName'=>array('img', 'a'),
77 'attrName'=>'alt', 'attrName'=>array('title', 'alt'),
78 ),
79 */
80 array(
81 'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
82 'attrName' => array('href',
83 'hreflang',
84 'http-equiv',
85 'icon',
86 'id',
87 'keytype',
88 'kind',
89 'label',
90 'lang',
91 'language',
92 'list',
93 'maxlength',
94 'media',
95 'method',
96 'name',
97 'placeholder',
98 'rel',
99 'rows',
100 'rowspan',
101 'sandbox',
102 'spellcheck',
103 'scope',
104 'seamless',
105 'shape',
106 'size',
107 'sizes',
108 'span',
109 'src',
110 'srcdoc',
111 'srclang',
112 'srcset',
113 'start',
114 'step',
115 'style',
116 'summary',
117 'tabindex',
118 'target',
119 'title',
120 'type',
121 'value',
122 'width',
123 'border',
124 'charset',
125 'cite',
126 'class',
127 'code',
128 'codebase',
129 'color',
130 'cols',
131 'colspan',
132 'content',
133 'coords',
134 'data',
135 'datetime',
136 'default',
137 'dir',
138 'dirname',
139 'enctype',
140 'for',
141 'form',
142 'formaction',
143 'headers',
144 'height',
145 'accept',
146 'accept-charset',
147 'accesskey',
148 'action',
149 'align',
150 'alt',
151 'bgcolor',
152 ),
153 ),
154 array(
155 'nodeNamespace' => 'http://www.w3.org/1999/xhtml',
156 'xpath' => 'starts-with(local-name(), \'data-\')',
157 ),
158 );
159
160 const DOCTYPE = '<!DOCTYPE html>';
161
162 public function __construct($output, $options = array())
163 {
164 if (isset($options['encode_entities'])) {
165 $this->encode = $options['encode_entities'];
166 }
167
168 $this->outputMode = static::IM_IN_HTML;
169 $this->out = $output;
170 $this->hasHTML5 = defined('ENT_HTML5');
171 }
172
173 public function addRule(array $rule)
174 {
175 $this->nonBooleanAttributes[] = $rule;
176 }
177
178 public function setTraverser(Traverser $traverser)
179 {
180 $this->traverser = $traverser;
181
182 return $this;
183 }
184
185 public function unsetTraverser()
186 {
187 $this->traverser = null;
188
189 return $this;
190 }
191
192 public function document($dom)
193 {
194 $this->doctype();
195 if ($dom->documentElement) {
196 foreach ($dom->childNodes as $node) {
197 $this->traverser->node($node);
198 }
199 $this->nl();
200 }
201 }
202
203 protected function doctype()
204 {
205 $this->wr(static::DOCTYPE);
206 $this->nl();
207 }
208
209 /**
210 * @param \DOMElement $ele
211 */
212 public function element($ele)
213 {
214 $name = $ele->tagName;
215
216 // Per spec:
217 // If the element has a declared namespace in the HTML, MathML or
218 // SVG namespaces, we use the lname instead of the tagName.
219 if ($this->traverser->isLocalElement($ele)) {
220 $name = $ele->localName;
221 }
222
223 // If we are in SVG or MathML there is special handling.
224 // Using if/elseif instead of switch because it's faster in PHP.
225 if ('svg' == $name) {
226 $this->outputMode = static::IM_IN_SVG;
227 $name = Elements::normalizeSvgElement($name);
228 } elseif ('math' == $name) {
229 $this->outputMode = static::IM_IN_MATHML;
230 }
231
232 $this->openTag($ele);
233 // The tag is already self-closed (`<svg />` or `<math />`) in `openTag` if there are no child nodes.
234 $handledAsVoidTag = $this->outputMode !== static::IM_IN_HTML && !$ele->hasChildNodes();
235
236 if (Elements::isA($name, Elements::TEXT_RAW)) {
237 foreach ($ele->childNodes as $child) {
238 if ($child instanceof \DOMCharacterData) {
239 $this->wr($child->data);
240 } elseif ($child instanceof \DOMElement) {
241 $this->element($child);
242 }
243 }
244 } else {
245 // Handle children.
246 if ($ele->hasChildNodes()) {
247 $this->traverser->children($ele->childNodes);
248 }
249
250 // Close out the SVG or MathML special handling.
251 if ('svg' == $name || 'math' == $name) {
252 $this->outputMode = static::IM_IN_HTML;
253 }
254 }
255
256 // If not unary, add a closing tag.
257 if (!$handledAsVoidTag && !Elements::isA($name, Elements::VOID_TAG)) {
258 $this->closeTag($ele);
259 }
260 }
261
262 /**
263 * Write a text node.
264 *
265 * @param \DOMText $ele The text node to write.
266 */
267 public function text($ele)
268 {
269 if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) {
270 $this->wr($ele->data);
271
272 return;
273 }
274
275 // FIXME: This probably needs some flags set.
276 $this->wr($this->enc($ele->data));
277 }
278
279 public function cdata($ele)
280 {
281 // This encodes CDATA.
282 $this->wr($ele->ownerDocument->saveXML($ele));
283 }
284
285 public function comment($ele)
286 {
287 // These produce identical output.
288 // $this->wr('<!--')->wr($ele->data)->wr('-->');
289 $this->wr($ele->ownerDocument->saveXML($ele));
290 }
291
292 public function processorInstruction($ele)
293 {
294 $this->wr('<?')
295 ->wr($ele->target)
296 ->wr(' ')
297 ->wr($ele->data)
298 ->wr('?>');
299 }
300
301 /**
302 * Write the namespace attributes.
303 *
304 * @param \DOMNode $ele The element being written.
305 */
306 protected function namespaceAttrs($ele)
307 {
308 if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) {
309 $this->xpath = new \DOMXPath($ele->ownerDocument);
310 }
311
312 foreach ($this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) {
313 if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) {
314 $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"');
315 }
316 }
317 }
318
319 /**
320 * Write the opening tag.
321 *
322 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
323 * qualified name (8.3).
324 *
325 * @param \DOMNode $ele The element being written.
326 */
327 protected function openTag($ele)
328 {
329 $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName);
330
331 $this->attrs($ele);
332 $this->namespaceAttrs($ele);
333
334 if ($this->outputMode == static::IM_IN_HTML) {
335 $this->wr('>');
336 } // If we are not in html mode we are in SVG, MathML, or XML embedded content.
337 else {
338 if ($ele->hasChildNodes()) {
339 $this->wr('>');
340 } // If there are no children this is self closing.
341 else {
342 $this->wr(' />');
343 }
344 }
345 }
346
347 protected function attrs($ele)
348 {
349 // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements.
350 if (!$ele->hasAttributes()) {
351 return $this;
352 }
353
354 // TODO: Currently, this always writes name="value", and does not do
355 // value-less attributes.
356 $map = $ele->attributes;
357 $len = $map->length;
358 for ($i = 0; $i < $len; ++$i) {
359 $node = $map->item($i);
360 $val = $this->enc($node->value, true);
361
362 // XXX: The spec says that we need to ensure that anything in
363 // the XML, XMLNS, or XLink NS's should use the canonical
364 // prefix. It seems that DOM does this for us already, but there
365 // may be exceptions.
366 $name = $node->nodeName;
367
368 // Special handling for attributes in SVG and MathML.
369 // Using if/elseif instead of switch because it's faster in PHP.
370 if ($this->outputMode == static::IM_IN_SVG) {
371 $name = Elements::normalizeSvgAttribute($name);
372 } elseif ($this->outputMode == static::IM_IN_MATHML) {
373 $name = Elements::normalizeMathMlAttribute($name);
374 }
375
376 $this->wr(' ')->wr($name);
377
378 if ((isset($val) && '' !== $val) || $this->nonBooleanAttribute($node)) {
379 $this->wr('="')->wr($val)->wr('"');
380 }
381 }
382 }
383
384 protected function nonBooleanAttribute(\DOMAttr $attr)
385 {
386 $ele = $attr->ownerElement;
387 foreach ($this->nonBooleanAttributes as $rule) {
388 if (isset($rule['nodeNamespace']) && $rule['nodeNamespace'] !== $ele->namespaceURI) {
389 continue;
390 }
391 if (isset($rule['attNamespace']) && $rule['attNamespace'] !== $attr->namespaceURI) {
392 continue;
393 }
394 if (isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName'] !== $ele->localName) {
395 continue;
396 }
397 if (isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)) {
398 continue;
399 }
400 if (isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName'] !== $attr->localName) {
401 continue;
402 }
403 if (isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)) {
404 continue;
405 }
406 if (isset($rule['xpath'])) {
407 $xp = $this->getXPath($attr);
408 if (isset($rule['prefixes'])) {
409 foreach ($rule['prefixes'] as $nsPrefix => $ns) {
410 $xp->registerNamespace($nsPrefix, $ns);
411 }
412 }
413 if (!$xp->evaluate($rule['xpath'], $attr)) {
414 continue;
415 }
416 }
417
418 return true;
419 }
420
421 return false;
422 }
423
424 private function getXPath(\DOMNode $node)
425 {
426 if (!$this->xpath) {
427 $this->xpath = new \DOMXPath($node->ownerDocument);
428 }
429
430 return $this->xpath;
431 }
432
433 /**
434 * Write the closing tag.
435 *
436 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the
437 * qualified name (8.3).
438 *
439 * @param \DOMNode $ele The element being written.
440 */
441 protected function closeTag($ele)
442 {
443 if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) {
444 $this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>');
445 }
446 }
447
448 /**
449 * Write to the output.
450 *
451 * @param string $text The string to put into the output
452 *
453 * @return $this
454 */
455 protected function wr($text)
456 {
457 fwrite($this->out, $text);
458
459 return $this;
460 }
461
462 /**
463 * Write a new line character.
464 *
465 * @return $this
466 */
467 protected function nl()
468 {
469 fwrite($this->out, PHP_EOL);
470
471 return $this;
472 }
473
474 /**
475 * Encode text.
476 *
477 * When encode is set to false, the default value, the text passed in is
478 * escaped per section 8.3 of the html5 spec. For details on how text is
479 * escaped see the escape() method.
480 *
481 * When encoding is set to true the text is converted to named character
482 * references where appropriate. Section 8.1.4 Character references of the
483 * html5 spec refers to using named character references. This is useful for
484 * characters that can't otherwise legally be used in the text.
485 *
486 * The named character references are listed in section 8.5.
487 *
488 * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities.
489 * This includes such characters as +.# and many other common ones. By default
490 * encoding here will just escape &'<>".
491 *
492 * Note, PHP 5.4+ has better html5 encoding.
493 *
494 * @todo Use the Entities class in php 5.3 to have html5 entities.
495 *
496 * @param string $text Text to encode.
497 * @param bool $attribute True if we are encoding an attrubute, false otherwise.
498 *
499 * @return string The encoded text.
500 */
501 protected function enc($text, $attribute = false)
502 {
503 // Escape the text rather than convert to named character references.
504 if (!$this->encode) {
505 return $this->escape($text, $attribute);
506 }
507
508 // If we are in PHP 5.4+ we can use the native html5 entity functionality to
509 // convert the named character references.
510
511 if ($this->hasHTML5) {
512 return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false);
513 } // If a version earlier than 5.4 html5 entities are not entirely handled.
514 // This manually handles them.
515 else {
516 return strtr($text, HTML5Entities::$map);
517 }
518 }
519
520 /**
521 * Escape test.
522 *
523 * According to the html5 spec section 8.3 Serializing HTML fragments, text
524 * within tags that are not style, script, xmp, iframe, noembed, and noframes
525 * need to be properly escaped.
526 *
527 * The & should be converted to &, no breaking space unicode characters
528 * converted to , when in attribute mode the " should be converted to
529 * ", and when not in attribute mode the < and > should be converted to
530 * < and >.
531 *
532 * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString
533 *
534 * @param string $text Text to escape.
535 * @param bool $attribute True if we are escaping an attrubute, false otherwise.
536 */
537 protected function escape($text, $attribute = false)
538 {
539 // Not using htmlspecialchars because, while it does escaping, it doesn't
540 // match the requirements of section 8.5. For example, it doesn't handle
541 // non-breaking spaces.
542 if ($attribute) {
543 $replace = array(
544 '"' => '"',
545 '&' => '&',
546 "\xc2\xa0" => ' ',
547 );
548 } else {
549 $replace = array(
550 '<' => '<',
551 '>' => '>',
552 '&' => '&',
553 "\xc2\xa0" => ' ',
554 );
555 }
556
557 return strtr($text, $replace);
558 }
559}