Context Navigation

HtmlLexer.php

Last change on this file was 347, checked in by roby, 4 years ago
Aggiornamento per compatibilità con php7.4
File size: 2.3 KB

Rev	Line
[347]	1	<?php
	2	/**
	3	* Html2Pdf Library - parsing Html class
	4	*
	5	* HTML => PDF converter
	6	* distributed under the OSL-3.0 License
	7	*
	8	* @package Html2pdf
	9	* @author Laurent MINGUET <webmaster@html2pdf.fr>
	10	* @copyright 2017 Laurent MINGUET
	11	*/
	12	namespace Spipu\Html2Pdf\Parsing;
	13
	14	/**
	15	* Class HtmlLexer
	16	*/
	17	class HtmlLexer
	18	{
	19	/**
	20	* Tokenize the HTML code
	21	*
	22	* @param string $html HTML code to tokenize
	23	*
	24	* @return Token[]
	25	*/
	26	public function tokenize($html)
	27	{
	28	// initialise the array
	29	$tokens = array();
	30
	31	// regexp to separate the tags from the texts
	32	$reg = '/(<\/?\w[^<>]*>)\|([^<]+\|<)/is';
	33	$commentRegex = '/(<!--.*-->)/isU';
	34
	35	// last match found
	36	$str = '';
	37	$offset = 0;
	38	$line = 1;
	39	$length = strlen($html);
	40
	41	// As it finds a match
	42	while ($offset < $length) {
	43	if (strpos($html, '<!--', $offset) === $offset
	44	&& preg_match($commentRegex, $html, $match, PREG_OFFSET_CAPTURE, $offset)
	45	) {
	46	$line += substr_count($match[1][0], "\n");
	47	$offset = $match[0][1] + strlen($match[0][0]);
	48	continue;
	49	}
	50	preg_match($reg, $html, $parse, PREG_OFFSET_CAPTURE, $offset);
	51	// if it is a tag
	52	if ($parse[1][0]) {
	53	// save the previous text if it exists
	54	if ($str !== '') {
	55	$tokens[] = new Token('txt', $str);
	56	}
	57
	58	// save the tag, with the offset
	59	$tokens[] = new Token('code', trim($parse[1][0]), $line);
	60	$line += substr_count($parse[1][0], "\n");
	61
	62	// init the current text
	63	$str = '';
	64	} else { // else (if it is a text)
	65	// add the new text to the current text
	66	$str .= $parse[2][0];
	67	$line += substr_count($parse[2][0], "\n");
	68	}
	69
	70	// Update offset to the end of the match
	71	$offset = $parse[0][1] + strlen($parse[0][0]);
	72	unset($parse);
	73	}
	74	// if a text is present in the end, we save it
	75	if ($str !== '') {
	76	$tokens[] = new Token('txt', $str);
	77	}
	78
	79	return $tokens;
	80	}
	81	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/client/inc/hpdf5/spipu/html2pdf/src/Parsing/HtmlLexer.php

Download in other formats: