source: trunk/client/inc/hpdf5/spipu/html2pdf/src/Parsing/HtmlLexer.php@ 418

Last change on this file since 418 was 347, checked in by roby, 4 years ago

Aggiornamento per compatibilità con php7.4

File size: 2.3 KB
Line 
1<?php
2/**
3 * Html2Pdf Library - parsing Html class
4 *
5 * HTML => PDF converter
6 * distributed under the OSL-3.0 License
7 *
8 * @package Html2pdf
9 * @author Laurent MINGUET <webmaster@html2pdf.fr>
10 * @copyright 2017 Laurent MINGUET
11 */
12namespace Spipu\Html2Pdf\Parsing;
13
14/**
15 * Class HtmlLexer
16 */
17class HtmlLexer
18{
19 /**
20 * Tokenize the HTML code
21 *
22 * @param string $html HTML code to tokenize
23 *
24 * @return Token[]
25 */
26 public function tokenize($html)
27 {
28 // initialise the array
29 $tokens = array();
30
31 // regexp to separate the tags from the texts
32 $reg = '/(<\/?\w[^<>]*>)|([^<]+|<)/is';
33 $commentRegex = '/(<!--.*-->)/isU';
34
35 // last match found
36 $str = '';
37 $offset = 0;
38 $line = 1;
39 $length = strlen($html);
40
41 // As it finds a match
42 while ($offset < $length) {
43 if (strpos($html, '<!--', $offset) === $offset
44 && preg_match($commentRegex, $html, $match, PREG_OFFSET_CAPTURE, $offset)
45 ) {
46 $line += substr_count($match[1][0], "\n");
47 $offset = $match[0][1] + strlen($match[0][0]);
48 continue;
49 }
50 preg_match($reg, $html, $parse, PREG_OFFSET_CAPTURE, $offset);
51 // if it is a tag
52 if ($parse[1][0]) {
53 // save the previous text if it exists
54 if ($str !== '') {
55 $tokens[] = new Token('txt', $str);
56 }
57
58 // save the tag, with the offset
59 $tokens[] = new Token('code', trim($parse[1][0]), $line);
60 $line += substr_count($parse[1][0], "\n");
61
62 // init the current text
63 $str = '';
64 } else { // else (if it is a text)
65 // add the new text to the current text
66 $str .= $parse[2][0];
67 $line += substr_count($parse[2][0], "\n");
68 }
69
70 // Update offset to the end of the match
71 $offset = $parse[0][1] + strlen($parse[0][0]);
72 unset($parse);
73 }
74 // if a text is present in the end, we save it
75 if ($str !== '') {
76 $tokens[] = new Token('txt', $str);
77 }
78
79 return $tokens;
80 }
81}
Note: See TracBrowser for help on using the repository browser.