PDF converter * distributed under the OSL-3.0 License * * @package Html2pdf * @author Laurent MINGUET * @copyright 2017 Laurent MINGUET */ namespace Spipu\Html2Pdf\Parsing; /** * Class HtmlLexer */ class HtmlLexer { /** * Tokenize the HTML code * * @param string $html HTML code to tokenize * * @return Token[] */ public function tokenize($html) { // initialise the array $tokens = array(); // regexp to separate the tags from the texts $reg = '/(<\/?\w[^<>]*>)|([^<]+|<)/is'; $commentRegex = '/()/isU'; // last match found $str = ''; $offset = 0; $line = 1; $length = strlen($html); // As it finds a match while ($offset < $length) { if (strpos($html, '