Code Coverage |
||||||||||
Lines |
Functions and Methods |
Classes and Traits |
||||||||
Total | |
95.97% |
119 / 124 |
|
69.23% |
9 / 13 |
CRAP | |
0.00% |
0 / 1 |
PictureTags | |
95.97% |
119 / 124 |
|
69.23% |
9 / 13 |
59 | |
0.00% |
0 / 1 |
__construct | |
100.00% |
1 / 1 |
|
100.00% |
1 / 1 |
1 | |||
replaceUrl | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
2 | |||
replaceUrlOr | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
2 | |||
lazyGet | |
86.67% |
13 / 15 |
|
0.00% |
0 / 1 |
13.40 | |||
findAttributesWithNameOrPrefixed | |
100.00% |
7 / 7 |
|
100.00% |
1 / 1 |
4 | |||
textToUTF8WithNonAsciiEncoded | |
100.00% |
4 / 4 |
|
100.00% |
1 / 1 |
2 | |||
getAttributes | |
95.00% |
19 / 20 |
|
0.00% |
0 / 1 |
7 | |||
createAttributes | |
83.33% |
5 / 6 |
|
0.00% |
0 / 1 |
3.04 | |||
replaceCallback | |
97.87% |
46 / 47 |
|
0.00% |
0 / 1 |
18 | |||
removePictureTagsTemporarily | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 | |||
insertPictureTagsBack | |
100.00% |
3 / 3 |
|
100.00% |
1 / 1 |
1 | |||
replaceHtml | |
100.00% |
12 / 12 |
|
100.00% |
1 / 1 |
4 | |||
replace | |
100.00% |
2 / 2 |
|
100.00% |
1 / 1 |
1 |
1 | <?php |
2 | |
3 | namespace DOMUtilForWebP; |
4 | |
5 | //use Sunra\PhpSimple\HtmlDomParser; |
6 | use KubAT\PhpSimple\HtmlDomParser; |
7 | |
8 | /** |
9 | * Class PictureTags - convert an <img> tag to a <picture> tag and add the webp versions of the images |
10 | * Code is based on code from the ShortPixel plugin, which in turn used code from Responsify WP plugin |
11 | * |
12 | * It works like this: |
13 | * |
14 | * 1. Remove existing <picture> tags and their content - replace with tokens in order to reinsert later |
15 | * 2. Process <img> tags. |
16 | * - The tags are found with regex. |
17 | * - The attributes are parsed with DOMDocument if it exists, otherwise with the Simple Html Dom library, |
18 | * which is included inside this library |
19 | * 3. Re-insert the existing <picture> tags |
20 | * |
21 | * This procedure is very gentle and needle-like. No need for a complete parse - so invalid HTML is no big issue |
22 | * |
23 | * PS: |
24 | * https://packagist.org/packages/masterminds/html5 |
25 | */ |
26 | |
27 | |
28 | class PictureTags |
29 | { |
30 | |
31 | /** |
32 | * Empty constructor for preventing child classes from creating constructors. |
33 | * |
34 | * We do this because otherwise the "new static()" call inside the ::replace() method |
35 | * would be unsafe. See #21 |
36 | * @return void |
37 | */ |
38 | final public function __construct() |
39 | { |
40 | $this->existingPictureTags = []; |
41 | } |
42 | |
43 | private $existingPictureTags; |
44 | |
45 | public function replaceUrl($url) |
46 | { |
47 | if (!preg_match('#(png|jpe?g)$#', $url)) { |
48 | return; |
49 | } |
50 | return $url . '.webp'; |
51 | } |
52 | |
53 | public function replaceUrlOr($url, $returnValueIfDenied) |
54 | { |
55 | $url = $this->replaceUrl($url); |
56 | return (isset($url) ? $url : $returnValueIfDenied); |
57 | } |
58 | |
59 | /** |
60 | * Look for attributes such as "data-lazy-src" and "data-src" and prefer them over "src" |
61 | * |
62 | * @param array $attributes an array of attributes for the element |
63 | * @param string $attrName ie "src", "srcset" or "sizes" |
64 | * |
65 | * @return array an array with "value" key and "attrName" key. ("value" is the value of the attribute and |
66 | * "attrName" is the name of the attribute used) |
67 | * |
68 | */ |
69 | private static function lazyGet($attributes, $attrName) |
70 | { |
71 | return array( |
72 | 'value' => |
73 | (isset($attributes['data-lazy-' . $attrName]) && strlen($attributes['data-lazy-' . $attrName])) ? |
74 | trim($attributes['data-lazy-' . $attrName]) |
75 | : (isset($attributes['data-' . $attrName]) && strlen($attributes['data-' . $attrName]) ? |
76 | trim($attributes['data-' . $attrName]) |
77 | : (isset($attributes[$attrName]) && strlen($attributes[$attrName]) ? |
78 | trim($attributes[$attrName]) : false)), |
79 | 'attrName' => |
80 | (isset($attributes['data-lazy-' . $attrName]) && strlen($attributes['data-lazy-' . $attrName])) ? |
81 | 'data-lazy-' . $attrName |
82 | : (isset($attributes['data-' . $attrName]) && strlen($attributes['data-' . $attrName]) ? |
83 | 'data-' . $attrName |
84 | : (isset($attributes[$attrName]) && strlen($attributes[$attrName]) ? $attrName : false)) |
85 | ); |
86 | } |
87 | |
88 | /** |
89 | * Look for attribute such as "src", but also with prefixes such as "data-lazy-src" and "data-src" |
90 | * |
91 | * @param array $attributes an array of all attributes for the element |
92 | * @param string $attrName ie "src", "srcset" or "sizes" |
93 | * |
94 | * @return array an array with "value" key and "attrName" key. ("value" is the value of the attribute and |
95 | * "attrName" is the name of the attribute used) |
96 | * |
97 | */ |
98 | private static function findAttributesWithNameOrPrefixed($attributes, $attrName) |
99 | { |
100 | $tryThesePrefixes = ['', 'data-lazy-', 'data-']; |
101 | $result = []; |
102 | foreach ($tryThesePrefixes as $prefix) { |
103 | $name = $prefix . $attrName; |
104 | if (isset($attributes[$name]) && strlen($attributes[$name])) { |
105 | /*$result[] = [ |
106 | 'value' => trim($attributes[$name]), |
107 | 'attrName' => $name, |
108 | ];*/ |
109 | $result[$name] = trim($attributes[$name]); |
110 | } |
111 | } |
112 | return $result; |
113 | } |
114 | |
115 | /** |
116 | * Convert to UTF-8 and encode chars outside of ascii-range |
117 | * |
118 | * Input: html that might be in any character encoding and might contain non-ascii characters |
119 | * Output: html in UTF-8 encding, where non-ascii characters are encoded |
120 | * |
121 | */ |
122 | private static function textToUTF8WithNonAsciiEncoded($html) |
123 | { |
124 | if (function_exists("mb_convert_encoding")) { |
125 | $html = mb_convert_encoding($html, 'UTF-8'); |
126 | $html = mb_encode_numericentity($html, array (0x7f, 0xffff, 0, 0xffff), 'UTF-8'); |
127 | } |
128 | return $html; |
129 | } |
130 | |
131 | private static function getAttributes($html) |
132 | { |
133 | if (class_exists('\\DOMDocument')) { |
134 | $dom = new \DOMDocument(); |
135 | |
136 | if (function_exists("mb_encode_numericentity")) { |
137 | // I'm in doubt if I should add the following line (see #41) |
138 | // $html = mb_convert_encoding($html, 'UTF-8'); |
139 | $html = mb_encode_numericentity($html, array (0x7f, 0xffff, 0, 0xffff)); // #41 |
140 | } |
141 | |
142 | @$dom->loadHTML($html); |
143 | $image = $dom->getElementsByTagName('img')->item(0); |
144 | $attributes = []; |
145 | foreach ($image->attributes as $attr) { |
146 | $attributes[$attr->nodeName] = $attr->nodeValue; |
147 | } |
148 | return $attributes; |
149 | } else { |
150 | // Convert to UTF-8 because HtmlDomParser::str_get_html needs to be told the |
151 | // encoding. As UTF-8 might conflict with the charset set in the meta, we must |
152 | // encode all characters outside the ascii-range. |
153 | // It would perhaps have been better to try to guess the encoding rather than |
154 | // changing it (see #39), but I'm reluctant to introduce changes. |
155 | $html = self::textToUTF8WithNonAsciiEncoded($html); |
156 | $dom = HtmlDomParser::str_get_html($html, false, true, 'UTF-8', false); |
157 | if ($dom !== false) { |
158 | $elems = $dom->find('img,IMG'); |
159 | foreach ($elems as $index => $elem) { |
160 | $attributes = []; |
161 | foreach ($elem->getAllAttributes() as $attrName => $attrValue) { |
162 | $attributes[strtolower($attrName)] = $attrValue; |
163 | } |
164 | return $attributes; |
165 | } |
166 | } |
167 | return []; |
168 | } |
169 | } |
170 | |
171 | /** |
172 | * Makes a string with all attributes. |
173 | * |
174 | * @param array $attribute_array |
175 | * @return string |
176 | */ |
177 | private static function createAttributes($attribute_array) |
178 | { |
179 | $attributes = ''; |
180 | foreach ($attribute_array as $attribute => $value) { |
181 | $attributes .= $attribute . '="' . $value . '" '; |
182 | } |
183 | if ($attributes == '') { |
184 | return ''; |
185 | } |
186 | // Removes the extra space after the last attribute. Add space before |
187 | return ' ' . substr($attributes, 0, -1); |
188 | } |
189 | |
190 | /** |
191 | * Replace <img> tag with <picture> tag. |
192 | */ |
193 | private function replaceCallback($match) |
194 | { |
195 | $imgTag = $match[0]; |
196 | |
197 | // Do nothing with images that have the 'webpexpress-processed' class. |
198 | if (strpos($imgTag, 'webpexpress-processed')) { |
199 | return $imgTag; |
200 | } |
201 | $imgAttributes = self::getAttributes($imgTag); |
202 | |
203 | $srcInfo = self::lazyGet($imgAttributes, 'src'); |
204 | $srcsetInfo = self::lazyGet($imgAttributes, 'srcset'); |
205 | $sizesInfo = self::lazyGet($imgAttributes, 'sizes'); |
206 | |
207 | $srcSetAttributes = self::findAttributesWithNameOrPrefixed($imgAttributes, 'srcset'); |
208 | $srcAttributes = self::findAttributesWithNameOrPrefixed($imgAttributes, 'src'); |
209 | |
210 | if ((!isset($srcSetAttributes['srcset'])) && (!isset($srcAttributes['src']))) { |
211 | // better not mess with this html... |
212 | return $imgTag; |
213 | } |
214 | |
215 | // add the exclude class so if this content is processed again in other filter, |
216 | // the img is not converted again in picture |
217 | $imgAttributes['class'] = (isset($imgAttributes['class']) ? $imgAttributes['class'] . " " : "") . |
218 | "webpexpress-processed"; |
219 | |
220 | // Process srcset (also data-srcset etc) |
221 | $atLeastOneWebp = false; |
222 | $sourceTagAttributes = []; |
223 | foreach ($srcSetAttributes as $attrName => $attrValue) { |
224 | $srcsetArr = explode(', ', $attrValue); |
225 | $srcsetArrWebP = []; |
226 | foreach ($srcsetArr as $i => $srcSetEntry) { |
227 | // $srcSetEntry is ie "http://example.com/image.jpg 520w" |
228 | $result = preg_split('/\s+/', trim($srcSetEntry)); |
229 | $src = trim($srcSetEntry); |
230 | $width = null; |
231 | if ($result && count($result) >= 2) { |
232 | list($src, $width) = $result; |
233 | } |
234 | |
235 | $webpUrl = $this->replaceUrlOr($src, false); |
236 | if ($webpUrl == false) { |
237 | // We want ALL of the sizes as webp. |
238 | // If we cannot have that, it is better to abort! - See #42 |
239 | return $imgTag; |
240 | } else { |
241 | if (substr($src, 0, 5) != 'data:') { |
242 | $atLeastOneWebp = true; |
243 | $srcsetArrWebP[] = $webpUrl . (isset($width) ? ' ' . $width : ''); |
244 | } |
245 | } |
246 | } |
247 | $sourceTagAttributes[$attrName] = implode(', ', $srcsetArrWebP); |
248 | } |
249 | |
250 | foreach ($srcAttributes as $attrName => $attrValue) { |
251 | if (substr($attrValue, 0, 5) == 'data:') { |
252 | // ignore tags with data urls, such as <img src="data:... |
253 | return $imgTag; |
254 | } |
255 | // Make sure not to override existing srcset with src |
256 | if (!isset($sourceTagAttributes[$attrName . 'set'])) { |
257 | $srcWebP = $this->replaceUrlOr($attrValue, false); |
258 | if ($srcWebP !== false) { |
259 | $atLeastOneWebp = true; |
260 | } |
261 | $sourceTagAttributes[$attrName . 'set'] = $srcWebP; |
262 | } |
263 | } |
264 | |
265 | if ($sizesInfo['value']) { |
266 | $sourceTagAttributes[$sizesInfo['attrName']] = $sizesInfo['value']; |
267 | } |
268 | |
269 | if (!$atLeastOneWebp) { |
270 | // We have no webps for you, so no reason to create <picture> tag |
271 | return $imgTag; |
272 | } |
273 | |
274 | return '<picture>' |
275 | . '<source' . self::createAttributes($sourceTagAttributes) . ' type="image/webp">' |
276 | . '<img' . self::createAttributes($imgAttributes) . '>' |
277 | . '</picture>'; |
278 | } |
279 | |
280 | /* |
281 | * |
282 | */ |
283 | public function removePictureTagsTemporarily($content) |
284 | { |
285 | //print_r($content); |
286 | $this->existingPictureTags[] = $content[0]; |
287 | return 'PICTURE_TAG_' . (count($this->existingPictureTags) - 1) . '_'; |
288 | } |
289 | |
290 | /* |
291 | * |
292 | */ |
293 | public function insertPictureTagsBack($content) |
294 | { |
295 | $numberString = $content[1]; |
296 | $numberInt = intval($numberString); |
297 | return $this->existingPictureTags[$numberInt]; |
298 | } |
299 | |
300 | /** |
301 | * |
302 | */ |
303 | public function replaceHtml($content) |
304 | { |
305 | if (!class_exists('\\DOMDocument') && function_exists('mb_detect_encoding')) { |
306 | // PS: Correctly identifying Windows-1251 encoding only works on some systems |
307 | // But at least I'm not aware of any false positives |
308 | if (mb_detect_encoding($content, ["ASCII", "UTF8", "Windows-1251"]) == 'Windows-1251') { |
309 | $content = mb_convert_encoding($content, 'UTF-8', 'Windows-1251'); |
310 | } |
311 | } |
312 | |
313 | $this->existingPictureTags = []; |
314 | |
315 | // Tempororily remove existing <picture> tags |
316 | $content = preg_replace_callback( |
317 | '/<picture[^>]*>.*?<\/picture>/is', |
318 | array($this, 'removePictureTagsTemporarily'), |
319 | $content |
320 | ); |
321 | |
322 | // Replace "<img>" tags |
323 | $content = preg_replace_callback('/<img[^>]*>/i', array($this, 'replaceCallback'), $content); |
324 | |
325 | // Re-insert <picture> tags that was removed |
326 | $content = preg_replace_callback('/PICTURE_TAG_(\d+)_/', array($this, 'insertPictureTagsBack'), $content); |
327 | |
328 | return $content; |
329 | } |
330 | |
331 | /* Main replacer function */ |
332 | public static function replace($html) |
333 | { |
334 | $pt = new static(); |
335 | return $pt->replaceHtml($html); |
336 | } |
337 | } |