Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
| Total | |
95.97% |
119 / 124 |
|
69.23% |
9 / 13 |
CRAP | |
0.00% |
0 / 1 |
| PictureTags | |
95.97% |
119 / 124 |
|
69.23% |
9 / 13 |
59 | |
0.00% |
0 / 1 |
| __construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
| replaceUrl | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
| replaceUrlOr | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
| lazyGet | |
86.67% |
13 / 15 |
|
0.00% |
0 / 1 |
13.40 | |||
| findAttributesWithNameOrPrefixed | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
| textToUTF8WithNonAsciiEncoded | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
| getAttributes | |
95.00% |
19 / 20 |
|
0.00% |
0 / 1 |
7 | |||
| createAttributes | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
3.04 | |||
| replaceCallback | |
97.87% |
46 / 47 |
|
0.00% |
0 / 1 |
18 | |||
| removePictureTagsTemporarily | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| insertPictureTagsBack | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
| replaceHtml | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
4 | |||
| replace | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
| 1 | <?php |
| 2 | |
| 3 | namespace DOMUtilForWebP; |
| 4 | |
| 5 | //use Sunra\PhpSimple\HtmlDomParser; |
| 6 | use KubAT\PhpSimple\HtmlDomParser; |
| 7 | |
| 8 | /** |
| 9 | * Class PictureTags - convert an <img> tag to a <picture> tag and add the webp versions of the images |
| 10 | * Code is based on code from the ShortPixel plugin, which in turn used code from Responsify WP plugin |
| 11 | * |
| 12 | * It works like this: |
| 13 | * |
| 14 | * 1. Remove existing <picture> tags and their content - replace with tokens in order to reinsert later |
| 15 | * 2. Process <img> tags. |
| 16 | * - The tags are found with regex. |
| 17 | * - The attributes are parsed with DOMDocument if it exists, otherwise with the Simple Html Dom library, |
| 18 | * which is included inside this library |
| 19 | * 3. Re-insert the existing <picture> tags |
| 20 | * |
| 21 | * This procedure is very gentle and needle-like. No need for a complete parse - so invalid HTML is no big issue |
| 22 | * |
| 23 | * PS: |
| 24 | * https://packagist.org/packages/masterminds/html5 |
| 25 | */ |
| 26 | |
| 27 | |
| 28 | class PictureTags |
| 29 | { |
| 30 | |
| 31 | /** |
| 32 | * Empty constructor for preventing child classes from creating constructors. |
| 33 | * |
| 34 | * We do this because otherwise the "new static()" call inside the ::replace() method |
| 35 | * would be unsafe. See #21 |
| 36 | * @return void |
| 37 | */ |
| 38 | final public function __construct() |
| 39 | { |
| 40 | $this->existingPictureTags = []; |
| 41 | } |
| 42 | |
| 43 | private $existingPictureTags; |
| 44 | |
| 45 | public function replaceUrl($url) |
| 46 | { |
| 47 | if (!preg_match('#(png|jpe?g)$#', $url)) { |
| 48 | return; |
| 49 | } |
| 50 | return $url . '.webp'; |
| 51 | } |
| 52 | |
| 53 | public function replaceUrlOr($url, $returnValueIfDenied) |
| 54 | { |
| 55 | $url = $this->replaceUrl($url); |
| 56 | return (isset($url) ? $url : $returnValueIfDenied); |
| 57 | } |
| 58 | |
| 59 | /** |
| 60 | * Look for attributes such as "data-lazy-src" and "data-src" and prefer them over "src" |
| 61 | * |
| 62 | * @param array $attributes an array of attributes for the element |
| 63 | * @param string $attrName ie "src", "srcset" or "sizes" |
| 64 | * |
| 65 | * @return array an array with "value" key and "attrName" key. ("value" is the value of the attribute and |
| 66 | * "attrName" is the name of the attribute used) |
| 67 | * |
| 68 | */ |
| 69 | private static function lazyGet($attributes, $attrName) |
| 70 | { |
| 71 | return array( |
| 72 | 'value' => |
| 73 | (isset($attributes['data-lazy-' . $attrName]) && strlen($attributes['data-lazy-' . $attrName])) ? |
| 74 | trim($attributes['data-lazy-' . $attrName]) |
| 75 | : (isset($attributes['data-' . $attrName]) && strlen($attributes['data-' . $attrName]) ? |
| 76 | trim($attributes['data-' . $attrName]) |
| 77 | : (isset($attributes[$attrName]) && strlen($attributes[$attrName]) ? |
| 78 | trim($attributes[$attrName]) : false)), |
| 79 | 'attrName' => |
| 80 | (isset($attributes['data-lazy-' . $attrName]) && strlen($attributes['data-lazy-' . $attrName])) ? |
| 81 | 'data-lazy-' . $attrName |
| 82 | : (isset($attributes['data-' . $attrName]) && strlen($attributes['data-' . $attrName]) ? |
| 83 | 'data-' . $attrName |
| 84 | : (isset($attributes[$attrName]) && strlen($attributes[$attrName]) ? $attrName : false)) |
| 85 | ); |
| 86 | } |
| 87 | |
| 88 | /** |
| 89 | * Look for attribute such as "src", but also with prefixes such as "data-lazy-src" and "data-src" |
| 90 | * |
| 91 | * @param array $attributes an array of all attributes for the element |
| 92 | * @param string $attrName ie "src", "srcset" or "sizes" |
| 93 | * |
| 94 | * @return array an array with "value" key and "attrName" key. ("value" is the value of the attribute and |
| 95 | * "attrName" is the name of the attribute used) |
| 96 | * |
| 97 | */ |
| 98 | private static function findAttributesWithNameOrPrefixed($attributes, $attrName) |
| 99 | { |
| 100 | $tryThesePrefixes = ['', 'data-lazy-', 'data-']; |
| 101 | $result = []; |
| 102 | foreach ($tryThesePrefixes as $prefix) { |
| 103 | $name = $prefix . $attrName; |
| 104 | if (isset($attributes[$name]) && strlen($attributes[$name])) { |
| 105 | /*$result[] = [ |
| 106 | 'value' => trim($attributes[$name]), |
| 107 | 'attrName' => $name, |
| 108 | ];*/ |
| 109 | $result[$name] = trim($attributes[$name]); |
| 110 | } |
| 111 | } |
| 112 | return $result; |
| 113 | } |
| 114 | |
| 115 | /** |
| 116 | * Convert to UTF-8 and encode chars outside of ascii-range |
| 117 | * |
| 118 | * Input: html that might be in any character encoding and might contain non-ascii characters |
| 119 | * Output: html in UTF-8 encding, where non-ascii characters are encoded |
| 120 | * |
| 121 | */ |
| 122 | private static function textToUTF8WithNonAsciiEncoded($html) |
| 123 | { |
| 124 | if (function_exists("mb_convert_encoding")) { |
| 125 | $html = mb_convert_encoding($html, 'UTF-8'); |
| 126 | $html = mb_encode_numericentity($html, array (0x7f, 0xffff, 0, 0xffff), 'UTF-8'); |
| 127 | } |
| 128 | return $html; |
| 129 | } |
| 130 | |
| 131 | private static function getAttributes($html) |
| 132 | { |
| 133 | if (class_exists('\\DOMDocument')) { |
| 134 | $dom = new \DOMDocument(); |
| 135 | |
| 136 | if (function_exists("mb_encode_numericentity")) { |
| 137 | // I'm in doubt if I should add the following line (see #41) |
| 138 | // $html = mb_convert_encoding($html, 'UTF-8'); |
| 139 | $html = mb_encode_numericentity($html, array (0x7f, 0xffff, 0, 0xffff)); // #41 |
| 140 | } |
| 141 | |
| 142 | @$dom->loadHTML($html); |
| 143 | $image = $dom->getElementsByTagName('img')->item(0); |
| 144 | $attributes = []; |
| 145 | foreach ($image->attributes as $attr) { |
| 146 | $attributes[$attr->nodeName] = $attr->nodeValue; |
| 147 | } |
| 148 | return $attributes; |
| 149 | } else { |
| 150 | // Convert to UTF-8 because HtmlDomParser::str_get_html needs to be told the |
| 151 | // encoding. As UTF-8 might conflict with the charset set in the meta, we must |
| 152 | // encode all characters outside the ascii-range. |
| 153 | // It would perhaps have been better to try to guess the encoding rather than |
| 154 | // changing it (see #39), but I'm reluctant to introduce changes. |
| 155 | $html = self::textToUTF8WithNonAsciiEncoded($html); |
| 156 | $dom = HtmlDomParser::str_get_html($html, false, true, 'UTF-8', false); |
| 157 | if ($dom !== false) { |
| 158 | $elems = $dom->find('img,IMG'); |
| 159 | foreach ($elems as $index => $elem) { |
| 160 | $attributes = []; |
| 161 | foreach ($elem->getAllAttributes() as $attrName => $attrValue) { |
| 162 | $attributes[strtolower($attrName)] = $attrValue; |
| 163 | } |
| 164 | return $attributes; |
| 165 | } |
| 166 | } |
| 167 | return []; |
| 168 | } |
| 169 | } |
| 170 | |
| 171 | /** |
| 172 | * Makes a string with all attributes. |
| 173 | * |
| 174 | * @param array $attribute_array |
| 175 | * @return string |
| 176 | */ |
| 177 | private static function createAttributes($attribute_array) |
| 178 | { |
| 179 | $attributes = ''; |
| 180 | foreach ($attribute_array as $attribute => $value) { |
| 181 | $attributes .= $attribute . '="' . $value . '" '; |
| 182 | } |
| 183 | if ($attributes == '') { |
| 184 | return ''; |
| 185 | } |
| 186 | // Removes the extra space after the last attribute. Add space before |
| 187 | return ' ' . substr($attributes, 0, -1); |
| 188 | } |
| 189 | |
| 190 | /** |
| 191 | * Replace <img> tag with <picture> tag. |
| 192 | */ |
| 193 | private function replaceCallback($match) |
| 194 | { |
| 195 | $imgTag = $match[0]; |
| 196 | |
| 197 | // Do nothing with images that have the 'webpexpress-processed' class. |
| 198 | if (strpos($imgTag, 'webpexpress-processed')) { |
| 199 | return $imgTag; |
| 200 | } |
| 201 | $imgAttributes = self::getAttributes($imgTag); |
| 202 | |
| 203 | $srcInfo = self::lazyGet($imgAttributes, 'src'); |
| 204 | $srcsetInfo = self::lazyGet($imgAttributes, 'srcset'); |
| 205 | $sizesInfo = self::lazyGet($imgAttributes, 'sizes'); |
| 206 | |
| 207 | $srcSetAttributes = self::findAttributesWithNameOrPrefixed($imgAttributes, 'srcset'); |
| 208 | $srcAttributes = self::findAttributesWithNameOrPrefixed($imgAttributes, 'src'); |
| 209 | |
| 210 | if ((!isset($srcSetAttributes['srcset'])) && (!isset($srcAttributes['src']))) { |
| 211 | // better not mess with this html... |
| 212 | return $imgTag; |
| 213 | } |
| 214 | |
| 215 | // add the exclude class so if this content is processed again in other filter, |
| 216 | // the img is not converted again in picture |
| 217 | $imgAttributes['class'] = (isset($imgAttributes['class']) ? $imgAttributes['class'] . " " : "") . |
| 218 | "webpexpress-processed"; |
| 219 | |
| 220 | // Process srcset (also data-srcset etc) |
| 221 | $atLeastOneWebp = false; |
| 222 | $sourceTagAttributes = []; |
| 223 | foreach ($srcSetAttributes as $attrName => $attrValue) { |
| 224 | $srcsetArr = explode(', ', $attrValue); |
| 225 | $srcsetArrWebP = []; |
| 226 | foreach ($srcsetArr as $i => $srcSetEntry) { |
| 227 | // $srcSetEntry is ie "http://example.com/image.jpg 520w" |
| 228 | $result = preg_split('/\s+/', trim($srcSetEntry)); |
| 229 | $src = trim($srcSetEntry); |
| 230 | $width = null; |
| 231 | if ($result && count($result) >= 2) { |
| 232 | list($src, $width) = $result; |
| 233 | } |
| 234 | |
| 235 | $webpUrl = $this->replaceUrlOr($src, false); |
| 236 | if ($webpUrl == false) { |
| 237 | // We want ALL of the sizes as webp. |
| 238 | // If we cannot have that, it is better to abort! - See #42 |
| 239 | return $imgTag; |
| 240 | } else { |
| 241 | if (substr($src, 0, 5) != 'data:') { |
| 242 | $atLeastOneWebp = true; |
| 243 | $srcsetArrWebP[] = $webpUrl . (isset($width) ? ' ' . $width : ''); |
| 244 | } |
| 245 | } |
| 246 | } |
| 247 | $sourceTagAttributes[$attrName] = implode(', ', $srcsetArrWebP); |
| 248 | } |
| 249 | |
| 250 | foreach ($srcAttributes as $attrName => $attrValue) { |
| 251 | if (substr($attrValue, 0, 5) == 'data:') { |
| 252 | // ignore tags with data urls, such as <img src="data:... |
| 253 | return $imgTag; |
| 254 | } |
| 255 | // Make sure not to override existing srcset with src |
| 256 | if (!isset($sourceTagAttributes[$attrName . 'set'])) { |
| 257 | $srcWebP = $this->replaceUrlOr($attrValue, false); |
| 258 | if ($srcWebP !== false) { |
| 259 | $atLeastOneWebp = true; |
| 260 | } |
| 261 | $sourceTagAttributes[$attrName . 'set'] = $srcWebP; |
| 262 | } |
| 263 | } |
| 264 | |
| 265 | if ($sizesInfo['value']) { |
| 266 | $sourceTagAttributes[$sizesInfo['attrName']] = $sizesInfo['value']; |
| 267 | } |
| 268 | |
| 269 | if (!$atLeastOneWebp) { |
| 270 | // We have no webps for you, so no reason to create <picture> tag |
| 271 | return $imgTag; |
| 272 | } |
| 273 | |
| 274 | return '<picture>' |
| 275 | . '<source' . self::createAttributes($sourceTagAttributes) . ' type="image/webp">' |
| 276 | . '<img' . self::createAttributes($imgAttributes) . '>' |
| 277 | . '</picture>'; |
| 278 | } |
| 279 | |
| 280 | /* |
| 281 | * |
| 282 | */ |
| 283 | public function removePictureTagsTemporarily($content) |
| 284 | { |
| 285 | //print_r($content); |
| 286 | $this->existingPictureTags[] = $content[0]; |
| 287 | return 'PICTURE_TAG_' . (count($this->existingPictureTags) - 1) . '_'; |
| 288 | } |
| 289 | |
| 290 | /* |
| 291 | * |
| 292 | */ |
| 293 | public function insertPictureTagsBack($content) |
| 294 | { |
| 295 | $numberString = $content[1]; |
| 296 | $numberInt = intval($numberString); |
| 297 | return $this->existingPictureTags[$numberInt]; |
| 298 | } |
| 299 | |
| 300 | /** |
| 301 | * |
| 302 | */ |
| 303 | public function replaceHtml($content) |
| 304 | { |
| 305 | if (!class_exists('\\DOMDocument') && function_exists('mb_detect_encoding')) { |
| 306 | // PS: Correctly identifying Windows-1251 encoding only works on some systems |
| 307 | // But at least I'm not aware of any false positives |
| 308 | if (mb_detect_encoding($content, ["ASCII", "UTF8", "Windows-1251"]) == 'Windows-1251') { |
| 309 | $content = mb_convert_encoding($content, 'UTF-8', 'Windows-1251'); |
| 310 | } |
| 311 | } |
| 312 | |
| 313 | $this->existingPictureTags = []; |
| 314 | |
| 315 | // Tempororily remove existing <picture> tags |
| 316 | $content = preg_replace_callback( |
| 317 | '/<picture[^>]*>.*?<\/picture>/is', |
| 318 | array($this, 'removePictureTagsTemporarily'), |
| 319 | $content |
| 320 | ); |
| 321 | |
| 322 | // Replace "<img>" tags |
| 323 | $content = preg_replace_callback('/<img[^>]*>/i', array($this, 'replaceCallback'), $content); |
| 324 | |
| 325 | // Re-insert <picture> tags that was removed |
| 326 | $content = preg_replace_callback('/PICTURE_TAG_(\d+)_/', array($this, 'insertPictureTagsBack'), $content); |
| 327 | |
| 328 | return $content; |
| 329 | } |
| 330 | |
| 331 | /* Main replacer function */ |
| 332 | public static function replace($html) |
| 333 | { |
| 334 | $pt = new static(); |
| 335 | return $pt->replaceHtml($html); |
| 336 | } |
| 337 | } |