Emogrifier.php 37 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020
  1. <?php
  2. namespace Pelago;
  3. /**
  4. * This class provides functions for converting CSS styles into inline style attributes in your HTML code.
  5. *
  6. * For more information, please see the README.md file.
  7. *
  8. * @version 1.0.0
  9. *
  10. * @author Cameron Brooks
  11. * @author Jaime Prado
  12. * @author Oliver Klee <typo3-coding@oliverklee.de>
  13. * @author Roman Ožana <ozana@omdesign.cz>
  14. */
  15. class Emogrifier
  16. {
  17. /**
  18. * @var int
  19. */
  20. const CACHE_KEY_CSS = 0;
  21. /**
  22. * @var int
  23. */
  24. const CACHE_KEY_SELECTOR = 1;
  25. /**
  26. * @var int
  27. */
  28. const CACHE_KEY_XPATH = 2;
  29. /**
  30. * @var int
  31. */
  32. const CACHE_KEY_CSS_DECLARATIONS_BLOCK = 3;
  33. /**
  34. * @var int
  35. */
  36. const CACHE_KEY_COMBINED_STYLES = 4;
  37. /**
  38. * for calculating nth-of-type and nth-child selectors
  39. *
  40. * @var int
  41. */
  42. const INDEX = 0;
  43. /**
  44. * for calculating nth-of-type and nth-child selectors
  45. *
  46. * @var int
  47. */
  48. const MULTIPLIER = 1;
  49. /**
  50. * @var string
  51. */
  52. const ID_ATTRIBUTE_MATCHER = '/(\\w+)?\\#([\\w\\-]+)/';
  53. /**
  54. * @var string
  55. */
  56. const CLASS_ATTRIBUTE_MATCHER = '/(\\w+|[\\*\\]])?((\\.[\\w\\-]+)+)/';
  57. /**
  58. * @var string
  59. */
  60. const CONTENT_TYPE_META_TAG = '<meta http-equiv="Content-Type" content="text/html; charset=utf-8">';
  61. /**
  62. * @var string
  63. */
  64. const DEFAULT_DOCUMENT_TYPE = '<!DOCTYPE html>';
  65. /**
  66. * @var string
  67. */
  68. private $html = '';
  69. /**
  70. * @var string
  71. */
  72. private $css = '';
  73. /**
  74. * @var bool[]
  75. */
  76. private $excludedSelectors = array();
  77. /**
  78. * @var string[]
  79. */
  80. private $unprocessableHtmlTags = array('wbr');
  81. /**
  82. * @var bool[]
  83. */
  84. private $allowedMediaTypes = array('all' => true, 'screen' => true, 'print' => true);
  85. /**
  86. * @var array[]
  87. */
  88. private $caches = array(self::CACHE_KEY_CSS => array(), self::CACHE_KEY_SELECTOR => array(), self::CACHE_KEY_XPATH => array(), self::CACHE_KEY_CSS_DECLARATIONS_BLOCK => array(), self::CACHE_KEY_COMBINED_STYLES => array());
  89. /**
  90. * the visited nodes with the XPath paths as array keys
  91. *
  92. * @var \DOMElement[]
  93. */
  94. private $visitedNodes = array();
  95. /**
  96. * the styles to apply to the nodes with the XPath paths as array keys for the outer array
  97. * and the attribute names/values as key/value pairs for the inner array
  98. *
  99. * @var array[]
  100. */
  101. private $styleAttributesForNodes = array();
  102. /**
  103. * Determines whether the "style" attributes of tags in the the HTML passed to this class should be preserved.
  104. * If set to false, the value of the style attributes will be discarded.
  105. *
  106. * @var bool
  107. */
  108. private $isInlineStyleAttributesParsingEnabled = true;
  109. /**
  110. * Determines whether the <style> blocks in the HTML passed to this class should be parsed.
  111. *
  112. * If set to true, the <style> blocks will be removed from the HTML and their contents will be applied to the HTML
  113. * via inline styles.
  114. *
  115. * If set to false, the <style> blocks will be left as they are in the HTML.
  116. *
  117. * @var bool
  118. */
  119. private $isStyleBlocksParsingEnabled = true;
  120. /**
  121. * Determines whether elements with the `display: none` property are
  122. * removed from the DOM.
  123. *
  124. * @var bool
  125. */
  126. private $shouldKeepInvisibleNodes = true;
  127. /**
  128. * The constructor.
  129. *
  130. * @param string $html the HTML to emogrify, must be UTF-8-encoded
  131. * @param string $css the CSS to merge, must be UTF-8-encoded
  132. */
  133. public function __construct($html = '', $css = '')
  134. {
  135. $this->setHtml($html);
  136. $this->setCss($css);
  137. }
  138. /**
  139. * The destructor.
  140. */
  141. public function __destruct()
  142. {
  143. $this->purgeVisitedNodes();
  144. }
  145. /**
  146. * Sets the HTML to emogrify.
  147. *
  148. * @param string $html the HTML to emogrify, must be UTF-8-encoded
  149. *
  150. * @return void
  151. */
  152. public function setHtml($html)
  153. {
  154. $this->html = $html;
  155. }
  156. /**
  157. * Sets the CSS to merge with the HTML.
  158. *
  159. * @param string $css the CSS to merge, must be UTF-8-encoded
  160. *
  161. * @return void
  162. */
  163. public function setCss($css)
  164. {
  165. $this->css = $css;
  166. }
  167. /**
  168. * Applies $this->css to $this->html and returns the HTML with the CSS
  169. * applied.
  170. *
  171. * This method places the CSS inline.
  172. *
  173. * @return string
  174. *
  175. * @throws \BadMethodCallException
  176. */
  177. public function emogrify()
  178. {
  179. if ($this->html === '') {
  180. throw new \BadMethodCallException('Please set some HTML first before calling emogrify.', 1390393096);
  181. }
  182. $xmlDocument = $this->createXmlDocument();
  183. $this->process($xmlDocument);
  184. return $xmlDocument->saveHTML();
  185. }
  186. /**
  187. * Applies $this->css to $this->html and returns only the HTML content
  188. * within the <body> tag.
  189. *
  190. * This method places the CSS inline.
  191. *
  192. * @return string
  193. *
  194. * @throws \BadMethodCallException
  195. */
  196. public function emogrifyBodyContent()
  197. {
  198. if ($this->html === '') {
  199. throw new \BadMethodCallException('Please set some HTML first before calling emogrify.', 1390393096);
  200. }
  201. $xmlDocument = $this->createXmlDocument();
  202. $this->process($xmlDocument);
  203. $innerDocument = new \DOMDocument();
  204. foreach ($xmlDocument->documentElement->getElementsByTagName('body')->item(0)->childNodes as $childNode) {
  205. $innerDocument->appendChild($innerDocument->importNode($childNode, true));
  206. }
  207. return $innerDocument->saveHTML();
  208. }
  209. /**
  210. * Applies $this->css to $xmlDocument.
  211. *
  212. * This method places the CSS inline.
  213. *
  214. * @param \DOMDocument $xmlDocument
  215. *
  216. * @return void
  217. */
  218. protected function process(\DOMDocument $xmlDocument)
  219. {
  220. $xpath = new \DOMXPath($xmlDocument);
  221. $this->clearAllCaches();
  222. // Before be begin processing the CSS file, parse the document and normalize all existing CSS attributes.
  223. // This changes 'DISPLAY: none' to 'display: none'.
  224. // We wouldn't have to do this if DOMXPath supported XPath 2.0.
  225. // Also store a reference of nodes with existing inline styles so we don't overwrite them.
  226. $this->purgeVisitedNodes();
  227. $nodesWithStyleAttributes = $xpath->query('//*[@style]');
  228. if ($nodesWithStyleAttributes !== false) {
  229. /** @var \DOMElement $node */
  230. foreach ($nodesWithStyleAttributes as $node) {
  231. if ($this->isInlineStyleAttributesParsingEnabled) {
  232. $this->normalizeStyleAttributes($node);
  233. } else {
  234. $node->removeAttribute('style');
  235. }
  236. }
  237. }
  238. // grab any existing style blocks from the html and append them to the existing CSS
  239. // (these blocks should be appended so as to have precedence over conflicting styles in the existing CSS)
  240. $allCss = $this->css;
  241. if ($this->isStyleBlocksParsingEnabled) {
  242. $allCss .= $this->getCssFromAllStyleNodes($xpath);
  243. }
  244. $cssParts = $this->splitCssAndMediaQuery($allCss);
  245. $excludedNodes = $this->getNodesToExclude($xpath);
  246. $cssRules = $this->parseCssRules($cssParts['css']);
  247. foreach ($cssRules as $cssRule) {
  248. // query the body for the xpath selector
  249. $nodesMatchingCssSelectors = $xpath->query($this->translateCssToXpath($cssRule['selector']));
  250. // ignore invalid selectors
  251. if ($nodesMatchingCssSelectors === false) {
  252. continue;
  253. }
  254. /** @var \DOMElement $node */
  255. foreach ($nodesMatchingCssSelectors as $node) {
  256. if (in_array($node, $excludedNodes, true)) {
  257. continue;
  258. }
  259. // if it has a style attribute, get it, process it, and append (overwrite) new stuff
  260. if ($node->hasAttribute('style')) {
  261. // break it up into an associative array
  262. $oldStyleDeclarations = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
  263. } else {
  264. $oldStyleDeclarations = array();
  265. }
  266. $newStyleDeclarations = $this->parseCssDeclarationsBlock($cssRule['declarationsBlock']);
  267. $node->setAttribute('style', $this->generateStyleStringFromDeclarationsArrays($oldStyleDeclarations, $newStyleDeclarations));
  268. }
  269. }
  270. if ($this->isInlineStyleAttributesParsingEnabled) {
  271. $this->fillStyleAttributesWithMergedStyles();
  272. }
  273. if ($this->shouldKeepInvisibleNodes) {
  274. $this->removeInvisibleNodes($xpath);
  275. }
  276. $this->copyCssWithMediaToStyleNode($xmlDocument, $xpath, $cssParts['media']);
  277. }
  278. /**
  279. * Extracts and parses the individual rules from a CSS string.
  280. *
  281. * @param string $css a string of raw CSS code
  282. *
  283. * @return string[][] an array of string sub-arrays with the keys
  284. * "selector" (the CSS selector(s), e.g., "*" or "h1"),
  285. * "declarationsBLock" (the semicolon-separated CSS declarations for that selector(s),
  286. * e.g., "color: red; height: 4px;"),
  287. * and "line" (the line number e.g. 42)
  288. */
  289. private function parseCssRules($css)
  290. {
  291. $cssKey = md5($css);
  292. if (!isset($this->caches[self::CACHE_KEY_CSS][$cssKey])) {
  293. // process the CSS file for selectors and definitions
  294. preg_match_all('/(?:^|[\\s^{}]*)([^{]+){([^}]*)}/mis', $css, $matches, PREG_SET_ORDER);
  295. $cssRules = array();
  296. /** @var string[] $cssRule */
  297. foreach ($matches as $key => $cssRule) {
  298. $cssDeclaration = trim($cssRule[2]);
  299. if ($cssDeclaration === '') {
  300. continue;
  301. }
  302. $selectors = explode(',', $cssRule[1]);
  303. foreach ($selectors as $selector) {
  304. // don't process pseudo-elements and behavioral (dynamic) pseudo-classes;
  305. // only allow structural pseudo-classes
  306. if (strpos($selector, ':') !== false && !preg_match('/:\\S+\\-(child|type\\()/i', $selector)) {
  307. continue;
  308. }
  309. $cssRules[] = array('selector' => trim($selector), 'declarationsBlock' => $cssDeclaration, 'line' => $key);
  310. }
  311. }
  312. usort($cssRules, array($this, 'sortBySelectorPrecedence'));
  313. $this->caches[self::CACHE_KEY_CSS][$cssKey] = $cssRules;
  314. }
  315. return $this->caches[self::CACHE_KEY_CSS][$cssKey];
  316. }
  317. /**
  318. * Disables the parsing of inline styles.
  319. *
  320. * @return void
  321. */
  322. public function disableInlineStyleAttributesParsing()
  323. {
  324. $this->isInlineStyleAttributesParsingEnabled = false;
  325. }
  326. /**
  327. * Disables the parsing of <style> blocks.
  328. *
  329. * @return void
  330. */
  331. public function disableStyleBlocksParsing()
  332. {
  333. $this->isStyleBlocksParsingEnabled = false;
  334. }
  335. /**
  336. * Disables the removal of elements with `display: none` properties.
  337. *
  338. * @return void
  339. */
  340. public function disableInvisibleNodeRemoval()
  341. {
  342. $this->shouldKeepInvisibleNodes = false;
  343. }
  344. /**
  345. * Clears all caches.
  346. *
  347. * @return void
  348. */
  349. private function clearAllCaches()
  350. {
  351. $this->clearCache(self::CACHE_KEY_CSS);
  352. $this->clearCache(self::CACHE_KEY_SELECTOR);
  353. $this->clearCache(self::CACHE_KEY_XPATH);
  354. $this->clearCache(self::CACHE_KEY_CSS_DECLARATIONS_BLOCK);
  355. $this->clearCache(self::CACHE_KEY_COMBINED_STYLES);
  356. }
  357. /**
  358. * Clears a single cache by key.
  359. *
  360. * @param int $key the cache key, must be CACHE_KEY_CSS, CACHE_KEY_SELECTOR, CACHE_KEY_XPATH
  361. * or CACHE_KEY_CSS_DECLARATION_BLOCK
  362. *
  363. * @return void
  364. *
  365. * @throws \InvalidArgumentException
  366. */
  367. private function clearCache($key)
  368. {
  369. $allowedCacheKeys = array(self::CACHE_KEY_CSS, self::CACHE_KEY_SELECTOR, self::CACHE_KEY_XPATH, self::CACHE_KEY_CSS_DECLARATIONS_BLOCK, self::CACHE_KEY_COMBINED_STYLES);
  370. if (!in_array($key, $allowedCacheKeys, true)) {
  371. throw new \InvalidArgumentException('Invalid cache key: ' . $key, 1391822035);
  372. }
  373. $this->caches[$key] = array();
  374. }
  375. /**
  376. * Purges the visited nodes.
  377. *
  378. * @return void
  379. */
  380. private function purgeVisitedNodes()
  381. {
  382. $this->visitedNodes = array();
  383. $this->styleAttributesForNodes = array();
  384. }
  385. /**
  386. * Marks a tag for removal.
  387. *
  388. * There are some HTML tags that DOMDocument cannot process, and it will throw an error if it encounters them.
  389. * In particular, DOMDocument will complain if you try to use HTML5 tags in an XHTML document.
  390. *
  391. * Note: The tags will not be removed if they have any content.
  392. *
  393. * @param string $tagName the tag name, e.g., "p"
  394. *
  395. * @return void
  396. */
  397. public function addUnprocessableHtmlTag($tagName)
  398. {
  399. $this->unprocessableHtmlTags[] = $tagName;
  400. }
  401. /**
  402. * Drops a tag from the removal list.
  403. *
  404. * @param string $tagName the tag name, e.g., "p"
  405. *
  406. * @return void
  407. */
  408. public function removeUnprocessableHtmlTag($tagName)
  409. {
  410. $key = array_search($tagName, $this->unprocessableHtmlTags, true);
  411. if ($key !== false) {
  412. unset($this->unprocessableHtmlTags[$key]);
  413. }
  414. }
  415. /**
  416. * Marks a media query type to keep.
  417. *
  418. * @param string $mediaName the media type name, e.g., "braille"
  419. *
  420. * @return void
  421. */
  422. public function addAllowedMediaType($mediaName)
  423. {
  424. $this->allowedMediaTypes[$mediaName] = true;
  425. }
  426. /**
  427. * Drops a media query type from the allowed list.
  428. *
  429. * @param string $mediaName the tag name, e.g., "braille"
  430. *
  431. * @return void
  432. */
  433. public function removeAllowedMediaType($mediaName)
  434. {
  435. if (isset($this->allowedMediaTypes[$mediaName])) {
  436. unset($this->allowedMediaTypes[$mediaName]);
  437. }
  438. }
  439. /**
  440. * Adds a selector to exclude nodes from emogrification.
  441. *
  442. * Any nodes that match the selector will not have their style altered.
  443. *
  444. * @param string $selector the selector to exclude, e.g., ".editor"
  445. *
  446. * @return void
  447. */
  448. public function addExcludedSelector($selector)
  449. {
  450. $this->excludedSelectors[$selector] = true;
  451. }
  452. /**
  453. * No longer excludes the nodes matching this selector from emogrification.
  454. *
  455. * @param string $selector the selector to no longer exclude, e.g., ".editor"
  456. *
  457. * @return void
  458. */
  459. public function removeExcludedSelector($selector)
  460. {
  461. if (isset($this->excludedSelectors[$selector])) {
  462. unset($this->excludedSelectors[$selector]);
  463. }
  464. }
  465. /**
  466. * This removes styles from your email that contain display:none.
  467. * We need to look for display:none, but we need to do a case-insensitive search. Since DOMDocument only
  468. * supports XPath 1.0, lower-case() isn't available to us. We've thus far only set attributes to lowercase,
  469. * not attribute values. Consequently, we need to translate() the letters that would be in 'NONE' ("NOE")
  470. * to lowercase.
  471. *
  472. * @param \DOMXPath $xpath
  473. *
  474. * @return void
  475. */
  476. private function removeInvisibleNodes(\DOMXPath $xpath)
  477. {
  478. $nodesWithStyleDisplayNone = $xpath->query('//*[contains(translate(translate(@style," ",""),"NOE","noe"),"display:none")]');
  479. if ($nodesWithStyleDisplayNone->length === 0) {
  480. return;
  481. }
  482. // The checks on parentNode and is_callable below ensure that if we've deleted the parent node,
  483. // we don't try to call removeChild on a nonexistent child node
  484. /** @var \DOMNode $node */
  485. foreach ($nodesWithStyleDisplayNone as $node) {
  486. if ($node->parentNode && is_callable(array($node->parentNode, 'removeChild'))) {
  487. $node->parentNode->removeChild($node);
  488. }
  489. }
  490. }
  491. /**
  492. * Normalizes the value of the "style" attribute and saves it.
  493. *
  494. * @param \DOMElement $node
  495. *
  496. * @return void
  497. */
  498. private function normalizeStyleAttributes(\DOMElement $node)
  499. {
  500. $normalizedOriginalStyle = preg_replace_callback('/[A-z\\-]+(?=\\:)/S', function (array $m) {
  501. return strtolower($m[0]);
  502. }, $node->getAttribute('style'));
  503. // in order to not overwrite existing style attributes in the HTML, we
  504. // have to save the original HTML styles
  505. $nodePath = $node->getNodePath();
  506. if (!isset($this->styleAttributesForNodes[$nodePath])) {
  507. $this->styleAttributesForNodes[$nodePath] = $this->parseCssDeclarationsBlock($normalizedOriginalStyle);
  508. $this->visitedNodes[$nodePath] = $node;
  509. }
  510. $node->setAttribute('style', $normalizedOriginalStyle);
  511. }
  512. /**
  513. * Merges styles from styles attributes and style nodes and applies them to the attribute nodes
  514. *
  515. * @return void
  516. */
  517. private function fillStyleAttributesWithMergedStyles()
  518. {
  519. foreach ($this->styleAttributesForNodes as $nodePath => $styleAttributesForNode) {
  520. $node = $this->visitedNodes[$nodePath];
  521. $currentStyleAttributes = $this->parseCssDeclarationsBlock($node->getAttribute('style'));
  522. $node->setAttribute('style', $this->generateStyleStringFromDeclarationsArrays($currentStyleAttributes, $styleAttributesForNode));
  523. }
  524. }
  525. /**
  526. * This method merges old or existing name/value array with new name/value array
  527. * and then generates a string of the combined style suitable for placing inline.
  528. * This becomes the single point for CSS string generation allowing for consistent
  529. * CSS output no matter where the CSS originally came from.
  530. *
  531. * @param string[] $oldStyles
  532. * @param string[] $newStyles
  533. *
  534. * @return string
  535. */
  536. private function generateStyleStringFromDeclarationsArrays(array $oldStyles, array $newStyles)
  537. {
  538. $combinedStyles = array_merge($oldStyles, $newStyles);
  539. $cacheKey = serialize($combinedStyles);
  540. if (isset($this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey])) {
  541. return $this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey];
  542. }
  543. foreach ($oldStyles as $attributeName => $attributeValue) {
  544. if (isset($newStyles[$attributeName]) && strtolower(substr($attributeValue, -10)) === '!important') {
  545. $combinedStyles[$attributeName] = $attributeValue;
  546. }
  547. }
  548. $style = '';
  549. foreach ($combinedStyles as $attributeName => $attributeValue) {
  550. $style .= strtolower(trim($attributeName)) . ': ' . trim($attributeValue) . '; ';
  551. }
  552. $trimmedStyle = rtrim($style);
  553. $this->caches[self::CACHE_KEY_COMBINED_STYLES][$cacheKey] = $trimmedStyle;
  554. return $trimmedStyle;
  555. }
  556. /**
  557. * Applies $css to $xmlDocument, limited to the media queries that actually apply to the document.
  558. *
  559. * @param \DOMDocument $xmlDocument the document to match against
  560. * @param \DOMXPath $xpath
  561. * @param string $css a string of CSS
  562. *
  563. * @return void
  564. */
  565. private function copyCssWithMediaToStyleNode(\DOMDocument $xmlDocument, \DOMXPath $xpath, $css)
  566. {
  567. if ($css === '') {
  568. return;
  569. }
  570. $mediaQueriesRelevantForDocument = array();
  571. foreach ($this->extractMediaQueriesFromCss($css) as $mediaQuery) {
  572. foreach ($this->parseCssRules($mediaQuery['css']) as $selector) {
  573. if ($this->existsMatchForCssSelector($xpath, $selector['selector'])) {
  574. $mediaQueriesRelevantForDocument[] = $mediaQuery['query'];
  575. break;
  576. }
  577. }
  578. }
  579. $this->addStyleElementToDocument($xmlDocument, implode($mediaQueriesRelevantForDocument));
  580. }
  581. /**
  582. * Extracts the media queries from $css.
  583. *
  584. * @param string $css
  585. *
  586. * @return string[][] numeric array with string sub-arrays with the keys "css" and "query"
  587. */
  588. private function extractMediaQueriesFromCss($css)
  589. {
  590. preg_match_all('#(?<query>@media[^{]*\\{(?<css>(.*?)\\})(\\s*)\\})#s', $css, $mediaQueries);
  591. $result = array();
  592. foreach (array_keys($mediaQueries['css']) as $key) {
  593. $result[] = array('css' => $mediaQueries['css'][$key], 'query' => $mediaQueries['query'][$key]);
  594. }
  595. return $result;
  596. }
  597. /**
  598. * Checks whether there is at least one matching element for $cssSelector.
  599. *
  600. * @param \DOMXPath $xpath
  601. * @param string $cssSelector
  602. *
  603. * @return bool
  604. */
  605. private function existsMatchForCssSelector(\DOMXPath $xpath, $cssSelector)
  606. {
  607. $nodesMatchingSelector = $xpath->query($this->translateCssToXpath($cssSelector));
  608. return $nodesMatchingSelector !== false && $nodesMatchingSelector->length !== 0;
  609. }
  610. /**
  611. * Returns CSS content.
  612. *
  613. * @param \DOMXPath $xpath
  614. *
  615. * @return string
  616. */
  617. private function getCssFromAllStyleNodes(\DOMXPath $xpath)
  618. {
  619. $styleNodes = $xpath->query('//style');
  620. if ($styleNodes === false) {
  621. return '';
  622. }
  623. $css = '';
  624. /** @var \DOMNode $styleNode */
  625. foreach ($styleNodes as $styleNode) {
  626. $css .= '
  627. ' . $styleNode->nodeValue;
  628. $styleNode->parentNode->removeChild($styleNode);
  629. }
  630. return $css;
  631. }
  632. /**
  633. * Adds a style element with $css to $document.
  634. *
  635. * This method is protected to allow overriding.
  636. *
  637. * @see https://github.com/jjriv/emogrifier/issues/103
  638. *
  639. * @param \DOMDocument $document
  640. * @param string $css
  641. *
  642. * @return void
  643. */
  644. protected function addStyleElementToDocument(\DOMDocument $document, $css)
  645. {
  646. $styleElement = $document->createElement('style', $css);
  647. $styleAttribute = $document->createAttribute('type');
  648. $styleAttribute->value = 'text/css';
  649. $styleElement->appendChild($styleAttribute);
  650. $head = $this->getOrCreateHeadElement($document);
  651. $head->appendChild($styleElement);
  652. }
  653. /**
  654. * Returns the existing or creates a new head element in $document.
  655. *
  656. * @param \DOMDocument $document
  657. *
  658. * @return \DOMNode the head element
  659. */
  660. private function getOrCreateHeadElement(\DOMDocument $document)
  661. {
  662. $head = $document->getElementsByTagName('head')->item(0);
  663. if ($head === null) {
  664. $head = $document->createElement('head');
  665. $html = $document->getElementsByTagName('html')->item(0);
  666. $html->insertBefore($head, $document->getElementsByTagName('body')->item(0));
  667. }
  668. return $head;
  669. }
  670. /**
  671. * Splits input CSS code to an array where:
  672. *
  673. * - key "css" will be contains clean CSS code
  674. * - key "media" will be contains all valuable media queries
  675. *
  676. * Example:
  677. *
  678. * The CSS code
  679. *
  680. * "@import "file.css"; h1 { color:red; } @media { h1 {}} @media tv { h1 {}}"
  681. *
  682. * will be parsed into the following array:
  683. *
  684. * "css" => "h1 { color:red; }"
  685. * "media" => "@media { h1 {}}"
  686. *
  687. * @param string $css
  688. *
  689. * @return string[]
  690. */
  691. private function splitCssAndMediaQuery($css)
  692. {
  693. $cssWithoutComments = preg_replace('/\\/\\*.*\\*\\//sU', '', $css);
  694. $mediaTypesExpression = '';
  695. if (!empty($this->allowedMediaTypes)) {
  696. $mediaTypesExpression = '|' . implode('|', array_keys($this->allowedMediaTypes));
  697. }
  698. $media = '';
  699. $cssForAllowedMediaTypes = preg_replace_callback('#@media\\s+(?:only\\s)?(?:[\\s{\\(]' . $mediaTypesExpression . ')\\s?[^{]+{.*}\\s*}\\s*#misU', function ($matches) use(&$media) {
  700. $media .= $matches[0];
  701. }, $cssWithoutComments);
  702. // filter the CSS
  703. $search = array('import directives' => '/^\\s*@import\\s[^;]+;/misU', 'remaining media enclosures' => '/^\\s*@media\\s[^{]+{(.*)}\\s*}\\s/misU');
  704. $cleanedCss = preg_replace($search, '', $cssForAllowedMediaTypes);
  705. return array('css' => $cleanedCss, 'media' => $media);
  706. }
  707. /**
  708. * Creates a DOMDocument instance with the current HTML.
  709. *
  710. * @return \DOMDocument
  711. */
  712. private function createXmlDocument()
  713. {
  714. $xmlDocument = new \DOMDocument();
  715. $xmlDocument->encoding = 'UTF-8';
  716. $xmlDocument->strictErrorChecking = false;
  717. $xmlDocument->formatOutput = true;
  718. $libXmlState = libxml_use_internal_errors(true);
  719. $xmlDocument->loadHTML($this->getUnifiedHtml());
  720. libxml_clear_errors();
  721. libxml_use_internal_errors($libXmlState);
  722. $xmlDocument->normalizeDocument();
  723. return $xmlDocument;
  724. }
  725. /**
  726. * Returns the HTML with the unprocessable HTML tags removed and
  727. * with added document type and Content-Type meta tag if needed.
  728. *
  729. * @return string the unified HTML
  730. *
  731. * @throws \BadMethodCallException
  732. */
  733. private function getUnifiedHtml()
  734. {
  735. $htmlWithoutUnprocessableTags = $this->removeUnprocessableTags($this->html);
  736. $htmlWithDocumentType = $this->ensureDocumentType($htmlWithoutUnprocessableTags);
  737. return $this->addContentTypeMetaTag($htmlWithDocumentType);
  738. }
  739. /**
  740. * Removes the unprocessable tags from $html (if this feature is enabled).
  741. *
  742. * @param string $html
  743. *
  744. * @return string the reworked HTML with the unprocessable tags removed
  745. */
  746. private function removeUnprocessableTags($html)
  747. {
  748. if (empty($this->unprocessableHtmlTags)) {
  749. return $html;
  750. }
  751. $unprocessableHtmlTags = implode('|', $this->unprocessableHtmlTags);
  752. return preg_replace('/<\\/?(' . $unprocessableHtmlTags . ')[^>]*>/i', '', $html);
  753. }
  754. /**
  755. * Makes sure that the passed HTML has a document type.
  756. *
  757. * @param string $html
  758. *
  759. * @return string HTML with document type
  760. */
  761. private function ensureDocumentType($html)
  762. {
  763. $hasDocumentType = stripos($html, '<!DOCTYPE') !== false;
  764. if ($hasDocumentType) {
  765. return $html;
  766. }
  767. return self::DEFAULT_DOCUMENT_TYPE . $html;
  768. }
  769. /**
  770. * Adds a Content-Type meta tag for the charset.
  771. *
  772. * @param string $html
  773. *
  774. * @return string the HTML with the meta tag added
  775. */
  776. private function addContentTypeMetaTag($html)
  777. {
  778. $hasContentTypeMetaTag = stristr($html, 'Content-Type') !== false;
  779. if ($hasContentTypeMetaTag) {
  780. return $html;
  781. }
  782. // We are trying to insert the meta tag to the right spot in the DOM.
  783. // If we just prepended it to the HTML, we would lose attributes set to the HTML tag.
  784. $hasHeadTag = stripos($html, '<head') !== false;
  785. $hasHtmlTag = stripos($html, '<html') !== false;
  786. if ($hasHeadTag) {
  787. $reworkedHtml = preg_replace('/<head(.*?)>/i', '<head$1>' . self::CONTENT_TYPE_META_TAG, $html);
  788. } elseif ($hasHtmlTag) {
  789. $reworkedHtml = preg_replace('/<html(.*?)>/i', '<html$1><head>' . self::CONTENT_TYPE_META_TAG . '</head>', $html);
  790. } else {
  791. $reworkedHtml = self::CONTENT_TYPE_META_TAG . $html;
  792. }
  793. return $reworkedHtml;
  794. }
  795. /**
  796. * @param string[] $a
  797. * @param string[] $b
  798. *
  799. * @return int
  800. */
  801. private function sortBySelectorPrecedence(array $a, array $b)
  802. {
  803. $precedenceA = $this->getCssSelectorPrecedence($a['selector']);
  804. $precedenceB = $this->getCssSelectorPrecedence($b['selector']);
  805. // We want these sorted in ascending order so selectors with lesser precedence get processed first and
  806. // selectors with greater precedence get sorted last.
  807. $precedenceForEquals = $a['line'] < $b['line'] ? -1 : 1;
  808. $precedenceForNotEquals = $precedenceA < $precedenceB ? -1 : 1;
  809. return $precedenceA === $precedenceB ? $precedenceForEquals : $precedenceForNotEquals;
  810. }
  811. /**
  812. * @param string $selector
  813. *
  814. * @return int
  815. */
  816. private function getCssSelectorPrecedence($selector)
  817. {
  818. $selectorKey = md5($selector);
  819. if (!isset($this->caches[self::CACHE_KEY_SELECTOR][$selectorKey])) {
  820. $precedence = 0;
  821. $value = 100;
  822. // ids: worth 100, classes: worth 10, elements: worth 1
  823. $search = array('\\#', '\\.', '');
  824. foreach ($search as $s) {
  825. if (trim($selector) === '') {
  826. break;
  827. }
  828. $number = 0;
  829. $selector = preg_replace('/' . $s . '\\w+/', '', $selector, -1, $number);
  830. $precedence += $value * $number;
  831. $value /= 10;
  832. }
  833. $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey] = $precedence;
  834. }
  835. return $this->caches[self::CACHE_KEY_SELECTOR][$selectorKey];
  836. }
  837. /**
  838. * Maps a CSS selector to an XPath query string.
  839. *
  840. * @see http://plasmasturm.org/log/444/
  841. *
  842. * @param string $cssSelector a CSS selector
  843. *
  844. * @return string the corresponding XPath selector
  845. */
  846. private function translateCssToXpath($cssSelector)
  847. {
  848. $paddedSelector = ' ' . $cssSelector . ' ';
  849. $lowercasePaddedSelector = preg_replace_callback('/\\s+\\w+\\s+/', function (array $matches) {
  850. return strtolower($matches[0]);
  851. }, $paddedSelector);
  852. $trimmedLowercaseSelector = trim($lowercasePaddedSelector);
  853. $xpathKey = md5($trimmedLowercaseSelector);
  854. if (!isset($this->caches[self::CACHE_KEY_XPATH][$xpathKey])) {
  855. $cssSelectorMatches = array('child' => '/\\s+>\\s+/', 'adjacent sibling' => '/\\s+\\+\\s+/', 'descendant' => '/\\s+/', ':first-child' => '/([^\\/]+):first-child/i', ':last-child' => '/([^\\/]+):last-child/i', 'attribute only' => '/^\\[(\\w+|\\w+\\=[\'"]?\\w+[\'"]?)\\]/', 'attribute' => '/(\\w)\\[(\\w+)\\]/', 'exact attribute' => '/(\\w)\\[(\\w+)\\=[\'"]?(\\w+)[\'"]?\\]/');
  856. $xPathReplacements = array('child' => '/', 'adjacent sibling' => '/following-sibling::*[1]/self::', 'descendant' => '//', ':first-child' => '\\1/*[1]', ':last-child' => '\\1/*[last()]', 'attribute only' => '*[@\\1]', 'attribute' => '\\1[@\\2]', 'exact attribute' => '\\1[@\\2="\\3"]');
  857. $roughXpath = '//' . preg_replace($cssSelectorMatches, $xPathReplacements, $trimmedLowercaseSelector);
  858. $xpathWithIdAttributeMatchers = preg_replace_callback(self::ID_ATTRIBUTE_MATCHER, array($this, 'matchIdAttributes'), $roughXpath);
  859. $xpathWithIdAttributeAndClassMatchers = preg_replace_callback(self::CLASS_ATTRIBUTE_MATCHER, array($this, 'matchClassAttributes'), $xpathWithIdAttributeMatchers);
  860. // Advanced selectors are going to require a bit more advanced emogrification.
  861. // When we required PHP 5.3, we could do this with closures.
  862. $xpathWithIdAttributeAndClassMatchers = preg_replace_callback('/([^\\/]+):nth-child\\(\\s*(odd|even|[+\\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i', array($this, 'translateNthChild'), $xpathWithIdAttributeAndClassMatchers);
  863. $finalXpath = preg_replace_callback('/([^\\/]+):nth-of-type\\(\\s*(odd|even|[+\\-]?\\d|[+\\-]?\\d?n(\\s*[+\\-]\\s*\\d)?)\\s*\\)/i', array($this, 'translateNthOfType'), $xpathWithIdAttributeAndClassMatchers);
  864. $this->caches[self::CACHE_KEY_SELECTOR][$xpathKey] = $finalXpath;
  865. }
  866. return $this->caches[self::CACHE_KEY_SELECTOR][$xpathKey];
  867. }
  868. /**
  869. * @param string[] $match
  870. *
  871. * @return string
  872. */
  873. private function matchIdAttributes(array $match)
  874. {
  875. return ($match[1] !== '' ? $match[1] : '*') . '[@id="' . $match[2] . '"]';
  876. }
  877. /**
  878. * @param string[] $match
  879. *
  880. * @return string
  881. */
  882. private function matchClassAttributes(array $match)
  883. {
  884. return ($match[1] !== '' ? $match[1] : '*') . '[contains(concat(" ",@class," "),concat(" ","' . implode('"," "))][contains(concat(" ",@class," "),concat(" ","', explode('.', substr($match[2], 1))) . '"," "))]';
  885. }
  886. /**
  887. * @param string[] $match
  888. *
  889. * @return string
  890. */
  891. private function translateNthChild(array $match)
  892. {
  893. $parseResult = $this->parseNth($match);
  894. if (isset($parseResult[self::MULTIPLIER])) {
  895. if ($parseResult[self::MULTIPLIER] < 0) {
  896. $parseResult[self::MULTIPLIER] = abs($parseResult[self::MULTIPLIER]);
  897. $xPathExpression = sprintf('*[(last() - position()) mod %u = %u]/self::%s', $parseResult[self::MULTIPLIER], $parseResult[self::INDEX], $match[1]);
  898. } else {
  899. $xPathExpression = sprintf('*[position() mod %u = %u]/self::%s', $parseResult[self::MULTIPLIER], $parseResult[self::INDEX], $match[1]);
  900. }
  901. } else {
  902. $xPathExpression = sprintf('*[%u]/self::%s', $parseResult[self::INDEX], $match[1]);
  903. }
  904. return $xPathExpression;
  905. }
  906. /**
  907. * @param string[] $match
  908. *
  909. * @return string
  910. */
  911. private function translateNthOfType(array $match)
  912. {
  913. $parseResult = $this->parseNth($match);
  914. if (isset($parseResult[self::MULTIPLIER])) {
  915. if ($parseResult[self::MULTIPLIER] < 0) {
  916. $parseResult[self::MULTIPLIER] = abs($parseResult[self::MULTIPLIER]);
  917. $xPathExpression = sprintf('%s[(last() - position()) mod %u = %u]', $match[1], $parseResult[self::MULTIPLIER], $parseResult[self::INDEX]);
  918. } else {
  919. $xPathExpression = sprintf('%s[position() mod %u = %u]', $match[1], $parseResult[self::MULTIPLIER], $parseResult[self::INDEX]);
  920. }
  921. } else {
  922. $xPathExpression = sprintf('%s[%u]', $match[1], $parseResult[self::INDEX]);
  923. }
  924. return $xPathExpression;
  925. }
  926. /**
  927. * @param string[] $match
  928. *
  929. * @return int[]
  930. */
  931. private function parseNth(array $match)
  932. {
  933. if (in_array(strtolower($match[2]), array('even', 'odd'), true)) {
  934. // we have "even" or "odd"
  935. $index = strtolower($match[2]) === 'even' ? 0 : 1;
  936. return array(self::MULTIPLIER => 2, self::INDEX => $index);
  937. }
  938. if (stripos($match[2], 'n') === false) {
  939. // if there is a multiplier
  940. $index = (int) str_replace(' ', '', $match[2]);
  941. return array(self::INDEX => $index);
  942. }
  943. if (isset($match[3])) {
  944. $multipleTerm = str_replace($match[3], '', $match[2]);
  945. $index = (int) str_replace(' ', '', $match[3]);
  946. } else {
  947. $multipleTerm = $match[2];
  948. $index = 0;
  949. }
  950. $multiplier = str_ireplace('n', '', $multipleTerm);
  951. if ($multiplier === '') {
  952. $multiplier = 1;
  953. } elseif ($multiplier === '0') {
  954. return array(self::INDEX => $index);
  955. } else {
  956. $multiplier = (int) $multiplier;
  957. }
  958. while ($index < 0) {
  959. $index += abs($multiplier);
  960. }
  961. return array(self::MULTIPLIER => $multiplier, self::INDEX => $index);
  962. }
  963. /**
  964. * Parses a CSS declaration block into property name/value pairs.
  965. *
  966. * Example:
  967. *
  968. * The declaration block
  969. *
  970. * "color: #000; font-weight: bold;"
  971. *
  972. * will be parsed into the following array:
  973. *
  974. * "color" => "#000"
  975. * "font-weight" => "bold"
  976. *
  977. * @param string $cssDeclarationsBlock the CSS declarations block without the curly braces, may be empty
  978. *
  979. * @return string[]
  980. * the CSS declarations with the property names as array keys and the property values as array values
  981. */
  982. private function parseCssDeclarationsBlock($cssDeclarationsBlock)
  983. {
  984. if (isset($this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock])) {
  985. return $this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock];
  986. }
  987. $properties = array();
  988. $declarations = preg_split('/;(?!base64|charset)/', $cssDeclarationsBlock);
  989. foreach ($declarations as $declaration) {
  990. $matches = array();
  991. if (!preg_match('/^([A-Za-z\\-]+)\\s*:\\s*(.+)$/', trim($declaration), $matches)) {
  992. continue;
  993. }
  994. $propertyName = strtolower($matches[1]);
  995. $propertyValue = $matches[2];
  996. $properties[$propertyName] = $propertyValue;
  997. }
  998. $this->caches[self::CACHE_KEY_CSS_DECLARATIONS_BLOCK][$cssDeclarationsBlock] = $properties;
  999. return $properties;
  1000. }
  1001. /**
  1002. * Find the nodes that are not to be emogrified.
  1003. *
  1004. * @param \DOMXPath $xpath
  1005. *
  1006. * @return \DOMElement[]
  1007. */
  1008. private function getNodesToExclude(\DOMXPath $xpath)
  1009. {
  1010. $excludedNodes = array();
  1011. foreach (array_keys($this->excludedSelectors) as $selectorToExclude) {
  1012. foreach ($xpath->query($this->translateCssToXpath($selectorToExclude)) as $node) {
  1013. $excludedNodes[] = $node;
  1014. }
  1015. }
  1016. return $excludedNodes;
  1017. }
  1018. }