friendship ended with social-app. php is my new best friend
1<?php 2/** 3 * @file 4 * The rules for generating output in the serializer. 5 * 6 * These output rules are likely to generate output similar to the document that 7 * was parsed. It is not intended to output exactly the document that was parsed. 8 */ 9 10namespace Masterminds\HTML5\Serializer; 11 12use Masterminds\HTML5\Elements; 13 14/** 15 * Generate the output html5 based on element rules. 16 */ 17class OutputRules implements RulesInterface 18{ 19 /** 20 * Defined in http://www.w3.org/TR/html51/infrastructure.html#html-namespace-0. 21 */ 22 const NAMESPACE_HTML = 'http://www.w3.org/1999/xhtml'; 23 24 const NAMESPACE_MATHML = 'http://www.w3.org/1998/Math/MathML'; 25 26 const NAMESPACE_SVG = 'http://www.w3.org/2000/svg'; 27 28 const NAMESPACE_XLINK = 'http://www.w3.org/1999/xlink'; 29 30 const NAMESPACE_XML = 'http://www.w3.org/XML/1998/namespace'; 31 32 const NAMESPACE_XMLNS = 'http://www.w3.org/2000/xmlns/'; 33 34 /** 35 * Holds the HTML5 element names that causes a namespace switch. 36 * 37 * @var array 38 */ 39 protected $implicitNamespaces = array( 40 self::NAMESPACE_HTML, 41 self::NAMESPACE_SVG, 42 self::NAMESPACE_MATHML, 43 self::NAMESPACE_XML, 44 self::NAMESPACE_XMLNS, 45 ); 46 47 const IM_IN_HTML = 1; 48 49 const IM_IN_SVG = 2; 50 51 const IM_IN_MATHML = 3; 52 53 /** 54 * Used as cache to detect if is available ENT_HTML5. 55 * 56 * @var bool 57 */ 58 private $hasHTML5 = false; 59 60 protected $traverser; 61 62 protected $encode = false; 63 64 protected $out; 65 66 protected $outputMode; 67 68 private $xpath; 69 70 protected $nonBooleanAttributes = array( 71 /* 72 array( 73 'nodeNamespace'=>'http://www.w3.org/1999/xhtml', 74 'attrNamespace'=>'http://www.w3.org/1999/xhtml', 75 76 'nodeName'=>'img', 'nodeName'=>array('img', 'a'), 77 'attrName'=>'alt', 'attrName'=>array('title', 'alt'), 78 ), 79 */ 80 array( 81 'nodeNamespace' => 'http://www.w3.org/1999/xhtml', 82 'attrName' => array('href', 83 'hreflang', 84 'http-equiv', 85 'icon', 86 'id', 87 'keytype', 88 'kind', 89 'label', 90 'lang', 91 'language', 92 'list', 93 'maxlength', 94 'media', 95 'method', 96 'name', 97 'placeholder', 98 'rel', 99 'rows', 100 'rowspan', 101 'sandbox', 102 'spellcheck', 103 'scope', 104 'seamless', 105 'shape', 106 'size', 107 'sizes', 108 'span', 109 'src', 110 'srcdoc', 111 'srclang', 112 'srcset', 113 'start', 114 'step', 115 'style', 116 'summary', 117 'tabindex', 118 'target', 119 'title', 120 'type', 121 'value', 122 'width', 123 'border', 124 'charset', 125 'cite', 126 'class', 127 'code', 128 'codebase', 129 'color', 130 'cols', 131 'colspan', 132 'content', 133 'coords', 134 'data', 135 'datetime', 136 'default', 137 'dir', 138 'dirname', 139 'enctype', 140 'for', 141 'form', 142 'formaction', 143 'headers', 144 'height', 145 'accept', 146 'accept-charset', 147 'accesskey', 148 'action', 149 'align', 150 'alt', 151 'bgcolor', 152 ), 153 ), 154 array( 155 'nodeNamespace' => 'http://www.w3.org/1999/xhtml', 156 'xpath' => 'starts-with(local-name(), \'data-\')', 157 ), 158 ); 159 160 const DOCTYPE = '<!DOCTYPE html>'; 161 162 public function __construct($output, $options = array()) 163 { 164 if (isset($options['encode_entities'])) { 165 $this->encode = $options['encode_entities']; 166 } 167 168 $this->outputMode = static::IM_IN_HTML; 169 $this->out = $output; 170 $this->hasHTML5 = defined('ENT_HTML5'); 171 } 172 173 public function addRule(array $rule) 174 { 175 $this->nonBooleanAttributes[] = $rule; 176 } 177 178 public function setTraverser(Traverser $traverser) 179 { 180 $this->traverser = $traverser; 181 182 return $this; 183 } 184 185 public function unsetTraverser() 186 { 187 $this->traverser = null; 188 189 return $this; 190 } 191 192 public function document($dom) 193 { 194 $this->doctype(); 195 if ($dom->documentElement) { 196 foreach ($dom->childNodes as $node) { 197 $this->traverser->node($node); 198 } 199 $this->nl(); 200 } 201 } 202 203 protected function doctype() 204 { 205 $this->wr(static::DOCTYPE); 206 $this->nl(); 207 } 208 209 /** 210 * @param \DOMElement $ele 211 */ 212 public function element($ele) 213 { 214 $name = $ele->tagName; 215 216 // Per spec: 217 // If the element has a declared namespace in the HTML, MathML or 218 // SVG namespaces, we use the lname instead of the tagName. 219 if ($this->traverser->isLocalElement($ele)) { 220 $name = $ele->localName; 221 } 222 223 // If we are in SVG or MathML there is special handling. 224 // Using if/elseif instead of switch because it's faster in PHP. 225 if ('svg' == $name) { 226 $this->outputMode = static::IM_IN_SVG; 227 $name = Elements::normalizeSvgElement($name); 228 } elseif ('math' == $name) { 229 $this->outputMode = static::IM_IN_MATHML; 230 } 231 232 $this->openTag($ele); 233 // The tag is already self-closed (`<svg />` or `<math />`) in `openTag` if there are no child nodes. 234 $handledAsVoidTag = $this->outputMode !== static::IM_IN_HTML && !$ele->hasChildNodes(); 235 236 if (Elements::isA($name, Elements::TEXT_RAW)) { 237 foreach ($ele->childNodes as $child) { 238 if ($child instanceof \DOMCharacterData) { 239 $this->wr($child->data); 240 } elseif ($child instanceof \DOMElement) { 241 $this->element($child); 242 } 243 } 244 } else { 245 // Handle children. 246 if ($ele->hasChildNodes()) { 247 $this->traverser->children($ele->childNodes); 248 } 249 250 // Close out the SVG or MathML special handling. 251 if ('svg' == $name || 'math' == $name) { 252 $this->outputMode = static::IM_IN_HTML; 253 } 254 } 255 256 // If not unary, add a closing tag. 257 if (!$handledAsVoidTag && !Elements::isA($name, Elements::VOID_TAG)) { 258 $this->closeTag($ele); 259 } 260 } 261 262 /** 263 * Write a text node. 264 * 265 * @param \DOMText $ele The text node to write. 266 */ 267 public function text($ele) 268 { 269 if (isset($ele->parentNode) && isset($ele->parentNode->tagName) && Elements::isA($ele->parentNode->localName, Elements::TEXT_RAW)) { 270 $this->wr($ele->data); 271 272 return; 273 } 274 275 // FIXME: This probably needs some flags set. 276 $this->wr($this->enc($ele->data)); 277 } 278 279 public function cdata($ele) 280 { 281 // This encodes CDATA. 282 $this->wr($ele->ownerDocument->saveXML($ele)); 283 } 284 285 public function comment($ele) 286 { 287 // These produce identical output. 288 // $this->wr('<!--')->wr($ele->data)->wr('-->'); 289 $this->wr($ele->ownerDocument->saveXML($ele)); 290 } 291 292 public function processorInstruction($ele) 293 { 294 $this->wr('<?') 295 ->wr($ele->target) 296 ->wr(' ') 297 ->wr($ele->data) 298 ->wr('?>'); 299 } 300 301 /** 302 * Write the namespace attributes. 303 * 304 * @param \DOMNode $ele The element being written. 305 */ 306 protected function namespaceAttrs($ele) 307 { 308 if (!$this->xpath || $this->xpath->document !== $ele->ownerDocument) { 309 $this->xpath = new \DOMXPath($ele->ownerDocument); 310 } 311 312 foreach ($this->xpath->query('namespace::*[not(.=../../namespace::*)]', $ele) as $nsNode) { 313 if (!in_array($nsNode->nodeValue, $this->implicitNamespaces)) { 314 $this->wr(' ')->wr($nsNode->nodeName)->wr('="')->wr($nsNode->nodeValue)->wr('"'); 315 } 316 } 317 } 318 319 /** 320 * Write the opening tag. 321 * 322 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the 323 * qualified name (8.3). 324 * 325 * @param \DOMNode $ele The element being written. 326 */ 327 protected function openTag($ele) 328 { 329 $this->wr('<')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName); 330 331 $this->attrs($ele); 332 $this->namespaceAttrs($ele); 333 334 if ($this->outputMode == static::IM_IN_HTML) { 335 $this->wr('>'); 336 } // If we are not in html mode we are in SVG, MathML, or XML embedded content. 337 else { 338 if ($ele->hasChildNodes()) { 339 $this->wr('>'); 340 } // If there are no children this is self closing. 341 else { 342 $this->wr(' />'); 343 } 344 } 345 } 346 347 protected function attrs($ele) 348 { 349 // FIXME: Needs support for xml, xmlns, xlink, and namespaced elements. 350 if (!$ele->hasAttributes()) { 351 return $this; 352 } 353 354 // TODO: Currently, this always writes name="value", and does not do 355 // value-less attributes. 356 $map = $ele->attributes; 357 $len = $map->length; 358 for ($i = 0; $i < $len; ++$i) { 359 $node = $map->item($i); 360 $val = $this->enc($node->value, true); 361 362 // XXX: The spec says that we need to ensure that anything in 363 // the XML, XMLNS, or XLink NS's should use the canonical 364 // prefix. It seems that DOM does this for us already, but there 365 // may be exceptions. 366 $name = $node->nodeName; 367 368 // Special handling for attributes in SVG and MathML. 369 // Using if/elseif instead of switch because it's faster in PHP. 370 if ($this->outputMode == static::IM_IN_SVG) { 371 $name = Elements::normalizeSvgAttribute($name); 372 } elseif ($this->outputMode == static::IM_IN_MATHML) { 373 $name = Elements::normalizeMathMlAttribute($name); 374 } 375 376 $this->wr(' ')->wr($name); 377 378 if ((isset($val) && '' !== $val) || $this->nonBooleanAttribute($node)) { 379 $this->wr('="')->wr($val)->wr('"'); 380 } 381 } 382 } 383 384 protected function nonBooleanAttribute(\DOMAttr $attr) 385 { 386 $ele = $attr->ownerElement; 387 foreach ($this->nonBooleanAttributes as $rule) { 388 if (isset($rule['nodeNamespace']) && $rule['nodeNamespace'] !== $ele->namespaceURI) { 389 continue; 390 } 391 if (isset($rule['attNamespace']) && $rule['attNamespace'] !== $attr->namespaceURI) { 392 continue; 393 } 394 if (isset($rule['nodeName']) && !is_array($rule['nodeName']) && $rule['nodeName'] !== $ele->localName) { 395 continue; 396 } 397 if (isset($rule['nodeName']) && is_array($rule['nodeName']) && !in_array($ele->localName, $rule['nodeName'], true)) { 398 continue; 399 } 400 if (isset($rule['attrName']) && !is_array($rule['attrName']) && $rule['attrName'] !== $attr->localName) { 401 continue; 402 } 403 if (isset($rule['attrName']) && is_array($rule['attrName']) && !in_array($attr->localName, $rule['attrName'], true)) { 404 continue; 405 } 406 if (isset($rule['xpath'])) { 407 $xp = $this->getXPath($attr); 408 if (isset($rule['prefixes'])) { 409 foreach ($rule['prefixes'] as $nsPrefix => $ns) { 410 $xp->registerNamespace($nsPrefix, $ns); 411 } 412 } 413 if (!$xp->evaluate($rule['xpath'], $attr)) { 414 continue; 415 } 416 } 417 418 return true; 419 } 420 421 return false; 422 } 423 424 private function getXPath(\DOMNode $node) 425 { 426 if (!$this->xpath) { 427 $this->xpath = new \DOMXPath($node->ownerDocument); 428 } 429 430 return $this->xpath; 431 } 432 433 /** 434 * Write the closing tag. 435 * 436 * Tags for HTML, MathML, and SVG are in the local name. Otherwise, use the 437 * qualified name (8.3). 438 * 439 * @param \DOMNode $ele The element being written. 440 */ 441 protected function closeTag($ele) 442 { 443 if ($this->outputMode == static::IM_IN_HTML || $ele->hasChildNodes()) { 444 $this->wr('</')->wr($this->traverser->isLocalElement($ele) ? $ele->localName : $ele->tagName)->wr('>'); 445 } 446 } 447 448 /** 449 * Write to the output. 450 * 451 * @param string $text The string to put into the output 452 * 453 * @return $this 454 */ 455 protected function wr($text) 456 { 457 fwrite($this->out, $text); 458 459 return $this; 460 } 461 462 /** 463 * Write a new line character. 464 * 465 * @return $this 466 */ 467 protected function nl() 468 { 469 fwrite($this->out, PHP_EOL); 470 471 return $this; 472 } 473 474 /** 475 * Encode text. 476 * 477 * When encode is set to false, the default value, the text passed in is 478 * escaped per section 8.3 of the html5 spec. For details on how text is 479 * escaped see the escape() method. 480 * 481 * When encoding is set to true the text is converted to named character 482 * references where appropriate. Section 8.1.4 Character references of the 483 * html5 spec refers to using named character references. This is useful for 484 * characters that can't otherwise legally be used in the text. 485 * 486 * The named character references are listed in section 8.5. 487 * 488 * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#named-character-references True encoding will turn all named character references into their entities. 489 * This includes such characters as +.# and many other common ones. By default 490 * encoding here will just escape &'<>". 491 * 492 * Note, PHP 5.4+ has better html5 encoding. 493 * 494 * @todo Use the Entities class in php 5.3 to have html5 entities. 495 * 496 * @param string $text Text to encode. 497 * @param bool $attribute True if we are encoding an attrubute, false otherwise. 498 * 499 * @return string The encoded text. 500 */ 501 protected function enc($text, $attribute = false) 502 { 503 // Escape the text rather than convert to named character references. 504 if (!$this->encode) { 505 return $this->escape($text, $attribute); 506 } 507 508 // If we are in PHP 5.4+ we can use the native html5 entity functionality to 509 // convert the named character references. 510 511 if ($this->hasHTML5) { 512 return htmlentities($text, ENT_HTML5 | ENT_SUBSTITUTE | ENT_QUOTES, 'UTF-8', false); 513 } // If a version earlier than 5.4 html5 entities are not entirely handled. 514 // This manually handles them. 515 else { 516 return strtr($text, HTML5Entities::$map); 517 } 518 } 519 520 /** 521 * Escape test. 522 * 523 * According to the html5 spec section 8.3 Serializing HTML fragments, text 524 * within tags that are not style, script, xmp, iframe, noembed, and noframes 525 * need to be properly escaped. 526 * 527 * The & should be converted to &amp;, no breaking space unicode characters 528 * converted to &nbsp;, when in attribute mode the " should be converted to 529 * &quot;, and when not in attribute mode the < and > should be converted to 530 * &lt; and &gt;. 531 * 532 * @see http://www.w3.org/TR/2013/CR-html5-20130806/syntax.html#escapingString 533 * 534 * @param string $text Text to escape. 535 * @param bool $attribute True if we are escaping an attrubute, false otherwise. 536 */ 537 protected function escape($text, $attribute = false) 538 { 539 // Not using htmlspecialchars because, while it does escaping, it doesn't 540 // match the requirements of section 8.5. For example, it doesn't handle 541 // non-breaking spaces. 542 if ($attribute) { 543 $replace = array( 544 '"' => '&quot;', 545 '&' => '&amp;', 546 "\xc2\xa0" => '&nbsp;', 547 ); 548 } else { 549 $replace = array( 550 '<' => '&lt;', 551 '>' => '&gt;', 552 '&' => '&amp;', 553 "\xc2\xa0" => '&nbsp;', 554 ); 555 } 556 557 return strtr($text, $replace); 558 } 559}