Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

HtmlLexer.php@ 398

Last change on this file since 398 was 347, checked in by roby, 3 years ago
Aggiornamento per compatibilità con php7.4
File size: 2.3 KB

Line
1	<?php
2	/**
3	* Html2Pdf Library - parsing Html class
4	*
5	* HTML => PDF converter
6	* distributed under the OSL-3.0 License
7	*
8	* @package Html2pdf
9	* @author Laurent MINGUET <webmaster@html2pdf.fr>
10	* @copyright 2017 Laurent MINGUET
11	*/
12	namespace Spipu\Html2Pdf\Parsing;
13
14	/**
15	* Class HtmlLexer
16	*/
17	class HtmlLexer
18	{
19	/**
20	* Tokenize the HTML code
21	*
22	* @param string $html HTML code to tokenize
23	*
24	* @return Token[]
25	*/
26	public function tokenize($html)
27	{
28	// initialise the array
29	$tokens = array();
30
31	// regexp to separate the tags from the texts
32	$reg = '/(<\/?\w[^<>]*>)\|([^<]+\|<)/is';
33	$commentRegex = '/(<!--.*-->)/isU';
34
35	// last match found
36	$str = '';
37	$offset = 0;
38	$line = 1;
39	$length = strlen($html);
40
41	// As it finds a match
42	while ($offset < $length) {
43	if (strpos($html, '<!--', $offset) === $offset
44	&& preg_match($commentRegex, $html, $match, PREG_OFFSET_CAPTURE, $offset)
45	) {
46	$line += substr_count($match[1][0], "\n");
47	$offset = $match[0][1] + strlen($match[0][0]);
48	continue;
49	}
50	preg_match($reg, $html, $parse, PREG_OFFSET_CAPTURE, $offset);
51	// if it is a tag
52	if ($parse[1][0]) {
53	// save the previous text if it exists
54	if ($str !== '') {
55	$tokens[] = new Token('txt', $str);
56	}
57
58	// save the tag, with the offset
59	$tokens[] = new Token('code', trim($parse[1][0]), $line);
60	$line += substr_count($parse[1][0], "\n");
61
62	// init the current text
63	$str = '';
64	} else { // else (if it is a text)
65	// add the new text to the current text
66	$str .= $parse[2][0];
67	$line += substr_count($parse[2][0], "\n");
68	}
69
70	// Update offset to the end of the match
71	$offset = $parse[0][1] + strlen($parse[0][0]);
72	unset($parse);
73	}
74	// if a text is present in the end, we save it
75	if ($str !== '') {
76	$tokens[] = new Token('txt', $str);
77	}
78
79	return $tokens;
80	}
81	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/client/inc/hpdf5/spipu/html2pdf/src/Parsing/HtmlLexer.php@ 398

Download in other formats: