[44] | 1 | <?php
|
---|
| 2 | /**
|
---|
| 3 | * WordPress Diff bastard child of old MediaWiki Diff Formatter.
|
---|
| 4 | *
|
---|
| 5 | * Basically all that remains is the table structure and some method names.
|
---|
| 6 | *
|
---|
| 7 | * @package WordPress
|
---|
| 8 | * @subpackage Diff
|
---|
| 9 | */
|
---|
| 10 |
|
---|
| 11 | if ( !class_exists( 'Text_Diff' ) ) {
|
---|
| 12 | /** Text_Diff class */
|
---|
| 13 | require( dirname(__FILE__).'/Text/Diff.php' );
|
---|
| 14 | /** Text_Diff_Renderer class */
|
---|
| 15 | require( dirname(__FILE__).'/Text/Diff/Renderer.php' );
|
---|
| 16 | /** Text_Diff_Renderer_inline class */
|
---|
| 17 | require( dirname(__FILE__).'/Text/Diff/Renderer/inline.php' );
|
---|
| 18 | }
|
---|
| 19 |
|
---|
| 20 | /**
|
---|
| 21 | * Table renderer to display the diff lines.
|
---|
| 22 | *
|
---|
| 23 | * @since 2.6.0
|
---|
| 24 | * @uses Text_Diff_Renderer Extends
|
---|
| 25 | */
|
---|
| 26 | class WP_Text_Diff_Renderer_Table extends Text_Diff_Renderer {
|
---|
| 27 |
|
---|
| 28 | /**
|
---|
| 29 | * @see Text_Diff_Renderer::_leading_context_lines
|
---|
| 30 | * @var int
|
---|
| 31 | * @access protected
|
---|
| 32 | * @since 2.6.0
|
---|
| 33 | */
|
---|
| 34 | var $_leading_context_lines = 10000;
|
---|
| 35 |
|
---|
| 36 | /**
|
---|
| 37 | * @see Text_Diff_Renderer::_trailing_context_lines
|
---|
| 38 | * @var int
|
---|
| 39 | * @access protected
|
---|
| 40 | * @since 2.6.0
|
---|
| 41 | */
|
---|
| 42 | var $_trailing_context_lines = 10000;
|
---|
| 43 |
|
---|
| 44 | /**
|
---|
| 45 | * {@internal Missing Description}}
|
---|
| 46 | *
|
---|
| 47 | * @var float
|
---|
| 48 | * @access protected
|
---|
| 49 | * @since 2.6.0
|
---|
| 50 | */
|
---|
| 51 | var $_diff_threshold = 0.6;
|
---|
| 52 |
|
---|
| 53 | /**
|
---|
| 54 | * Inline display helper object name.
|
---|
| 55 | *
|
---|
| 56 | * @var string
|
---|
| 57 | * @access protected
|
---|
| 58 | * @since 2.6.0
|
---|
| 59 | */
|
---|
| 60 | var $inline_diff_renderer = 'WP_Text_Diff_Renderer_inline';
|
---|
| 61 |
|
---|
| 62 | /**
|
---|
| 63 | * PHP4 Constructor - Call parent constructor with params array.
|
---|
| 64 | *
|
---|
| 65 | * This will set class properties based on the key value pairs in the array.
|
---|
| 66 | *
|
---|
| 67 | * @since unknown
|
---|
| 68 | *
|
---|
| 69 | * @param array $params
|
---|
| 70 | */
|
---|
| 71 | function Text_Diff_Renderer_Table( $params = array() ) {
|
---|
| 72 | $parent = get_parent_class($this);
|
---|
| 73 | $this->$parent( $params );
|
---|
| 74 | }
|
---|
| 75 |
|
---|
| 76 | /**
|
---|
| 77 | * @ignore
|
---|
| 78 | *
|
---|
| 79 | * @param string $header
|
---|
| 80 | * @return string
|
---|
| 81 | */
|
---|
| 82 | function _startBlock( $header ) {
|
---|
| 83 | return '';
|
---|
| 84 | }
|
---|
| 85 |
|
---|
| 86 | /**
|
---|
| 87 | * @ignore
|
---|
| 88 | *
|
---|
| 89 | * @param array $lines
|
---|
| 90 | * @param string $prefix
|
---|
| 91 | */
|
---|
| 92 | function _lines( $lines, $prefix=' ' ) {
|
---|
| 93 | }
|
---|
| 94 |
|
---|
| 95 | /**
|
---|
| 96 | * @ignore
|
---|
| 97 | *
|
---|
| 98 | * @param string $line HTML-escape the value.
|
---|
| 99 | * @return string
|
---|
| 100 | */
|
---|
| 101 | function addedLine( $line ) {
|
---|
| 102 | return "<td>+</td><td class='diff-addedline'>{$line}</td>";
|
---|
| 103 | }
|
---|
| 104 |
|
---|
| 105 | /**
|
---|
| 106 | * @ignore
|
---|
| 107 | *
|
---|
| 108 | * @param string $line HTML-escape the value.
|
---|
| 109 | * @return string
|
---|
| 110 | */
|
---|
| 111 | function deletedLine( $line ) {
|
---|
| 112 | return "<td>-</td><td class='diff-deletedline'>{$line}</td>";
|
---|
| 113 | }
|
---|
| 114 |
|
---|
| 115 | /**
|
---|
| 116 | * @ignore
|
---|
| 117 | *
|
---|
| 118 | * @param string $line HTML-escape the value.
|
---|
| 119 | * @return string
|
---|
| 120 | */
|
---|
| 121 | function contextLine( $line ) {
|
---|
| 122 | return "<td> </td><td class='diff-context'>{$line}</td>";
|
---|
| 123 | }
|
---|
| 124 |
|
---|
| 125 | /**
|
---|
| 126 | * @ignore
|
---|
| 127 | *
|
---|
| 128 | * @return string
|
---|
| 129 | */
|
---|
| 130 | function emptyLine() {
|
---|
| 131 | return '<td colspan="2"> </td>';
|
---|
| 132 | }
|
---|
| 133 |
|
---|
| 134 | /**
|
---|
| 135 | * @ignore
|
---|
| 136 | * @access private
|
---|
| 137 | *
|
---|
| 138 | * @param array $lines
|
---|
| 139 | * @param bool $encode
|
---|
| 140 | * @return string
|
---|
| 141 | */
|
---|
| 142 | function _added( $lines, $encode = true ) {
|
---|
| 143 | $r = '';
|
---|
| 144 | foreach ($lines as $line) {
|
---|
| 145 | if ( $encode )
|
---|
| 146 | $line = htmlspecialchars( $line );
|
---|
| 147 | $r .= '<tr>' . $this->emptyLine() . $this->addedLine( $line ) . "</tr>\n";
|
---|
| 148 | }
|
---|
| 149 | return $r;
|
---|
| 150 | }
|
---|
| 151 |
|
---|
| 152 | /**
|
---|
| 153 | * @ignore
|
---|
| 154 | * @access private
|
---|
| 155 | *
|
---|
| 156 | * @param array $lines
|
---|
| 157 | * @param bool $encode
|
---|
| 158 | * @return string
|
---|
| 159 | */
|
---|
| 160 | function _deleted( $lines, $encode = true ) {
|
---|
| 161 | $r = '';
|
---|
| 162 | foreach ($lines as $line) {
|
---|
| 163 | if ( $encode )
|
---|
| 164 | $line = htmlspecialchars( $line );
|
---|
| 165 | $r .= '<tr>' . $this->deletedLine( $line ) . $this->emptyLine() . "</tr>\n";
|
---|
| 166 | }
|
---|
| 167 | return $r;
|
---|
| 168 | }
|
---|
| 169 |
|
---|
| 170 | /**
|
---|
| 171 | * @ignore
|
---|
| 172 | * @access private
|
---|
| 173 | *
|
---|
| 174 | * @param array $lines
|
---|
| 175 | * @param bool $encode
|
---|
| 176 | * @return string
|
---|
| 177 | */
|
---|
| 178 | function _context( $lines, $encode = true ) {
|
---|
| 179 | $r = '';
|
---|
| 180 | foreach ($lines as $line) {
|
---|
| 181 | if ( $encode )
|
---|
| 182 | $line = htmlspecialchars( $line );
|
---|
| 183 | $r .= '<tr>' .
|
---|
| 184 | $this->contextLine( $line ) . $this->contextLine( $line ) . "</tr>\n";
|
---|
| 185 | }
|
---|
| 186 | return $r;
|
---|
| 187 | }
|
---|
| 188 |
|
---|
| 189 | /**
|
---|
| 190 | * Process changed lines to do word-by-word diffs for extra highlighting.
|
---|
| 191 | *
|
---|
| 192 | * (TRAC style) sometimes these lines can actually be deleted or added rows.
|
---|
| 193 | * We do additional processing to figure that out
|
---|
| 194 | *
|
---|
| 195 | * @access private
|
---|
| 196 | * @since 2.6.0
|
---|
| 197 | *
|
---|
| 198 | * @param array $orig
|
---|
| 199 | * @param array $final
|
---|
| 200 | * @return string
|
---|
| 201 | */
|
---|
| 202 | function _changed( $orig, $final ) {
|
---|
| 203 | $r = '';
|
---|
| 204 |
|
---|
| 205 | // Does the aforementioned additional processing
|
---|
| 206 | // *_matches tell what rows are "the same" in orig and final. Those pairs will be diffed to get word changes
|
---|
| 207 | // match is numeric: an index in other column
|
---|
| 208 | // match is 'X': no match. It is a new row
|
---|
| 209 | // *_rows are column vectors for the orig column and the final column.
|
---|
| 210 | // row >= 0: an indix of the $orig or $final array
|
---|
| 211 | // row < 0: a blank row for that column
|
---|
| 212 | list($orig_matches, $final_matches, $orig_rows, $final_rows) = $this->interleave_changed_lines( $orig, $final );
|
---|
| 213 |
|
---|
| 214 |
|
---|
| 215 | // These will hold the word changes as determined by an inline diff
|
---|
| 216 | $orig_diffs = array();
|
---|
| 217 | $final_diffs = array();
|
---|
| 218 |
|
---|
| 219 | // Compute word diffs for each matched pair using the inline diff
|
---|
| 220 | foreach ( $orig_matches as $o => $f ) {
|
---|
| 221 | if ( is_numeric($o) && is_numeric($f) ) {
|
---|
| 222 | $text_diff = new Text_Diff( 'auto', array( array($orig[$o]), array($final[$f]) ) );
|
---|
| 223 | $renderer = new $this->inline_diff_renderer;
|
---|
| 224 | $diff = $renderer->render( $text_diff );
|
---|
| 225 |
|
---|
| 226 | // If they're too different, don't include any <ins> or <dels>
|
---|
| 227 | if ( $diff_count = preg_match_all( '!(<ins>.*?</ins>|<del>.*?</del>)!', $diff, $diff_matches ) ) {
|
---|
| 228 | // length of all text between <ins> or <del>
|
---|
| 229 | $stripped_matches = strlen(strip_tags( join(' ', $diff_matches[0]) ));
|
---|
| 230 | // since we count lengith of text between <ins> or <del> (instead of picking just one),
|
---|
| 231 | // we double the length of chars not in those tags.
|
---|
| 232 | $stripped_diff = strlen(strip_tags( $diff )) * 2 - $stripped_matches;
|
---|
| 233 | $diff_ratio = $stripped_matches / $stripped_diff;
|
---|
| 234 | if ( $diff_ratio > $this->_diff_threshold )
|
---|
| 235 | continue; // Too different. Don't save diffs.
|
---|
| 236 | }
|
---|
| 237 |
|
---|
| 238 | // Un-inline the diffs by removing del or ins
|
---|
| 239 | $orig_diffs[$o] = preg_replace( '|<ins>.*?</ins>|', '', $diff );
|
---|
| 240 | $final_diffs[$f] = preg_replace( '|<del>.*?</del>|', '', $diff );
|
---|
| 241 | }
|
---|
| 242 | }
|
---|
| 243 |
|
---|
| 244 | foreach ( array_keys($orig_rows) as $row ) {
|
---|
| 245 | // Both columns have blanks. Ignore them.
|
---|
| 246 | if ( $orig_rows[$row] < 0 && $final_rows[$row] < 0 )
|
---|
| 247 | continue;
|
---|
| 248 |
|
---|
| 249 | // If we have a word based diff, use it. Otherwise, use the normal line.
|
---|
| 250 | $orig_line = isset($orig_diffs[$orig_rows[$row]])
|
---|
| 251 | ? $orig_diffs[$orig_rows[$row]]
|
---|
| 252 | : htmlspecialchars($orig[$orig_rows[$row]]);
|
---|
| 253 | $final_line = isset($final_diffs[$final_rows[$row]])
|
---|
| 254 | ? $final_diffs[$final_rows[$row]]
|
---|
| 255 | : htmlspecialchars($final[$final_rows[$row]]);
|
---|
| 256 |
|
---|
| 257 | if ( $orig_rows[$row] < 0 ) { // Orig is blank. This is really an added row.
|
---|
| 258 | $r .= $this->_added( array($final_line), false );
|
---|
| 259 | } elseif ( $final_rows[$row] < 0 ) { // Final is blank. This is really a deleted row.
|
---|
| 260 | $r .= $this->_deleted( array($orig_line), false );
|
---|
| 261 | } else { // A true changed row.
|
---|
| 262 | $r .= '<tr>' . $this->deletedLine( $orig_line ) . $this->addedLine( $final_line ) . "</tr>\n";
|
---|
| 263 | }
|
---|
| 264 | }
|
---|
| 265 |
|
---|
| 266 | return $r;
|
---|
| 267 | }
|
---|
| 268 |
|
---|
| 269 | /**
|
---|
| 270 | * Takes changed blocks and matches which rows in orig turned into which rows in final.
|
---|
| 271 | *
|
---|
| 272 | * Returns
|
---|
| 273 | * *_matches ( which rows match with which )
|
---|
| 274 | * *_rows ( order of rows in each column interleaved with blank rows as
|
---|
| 275 | * necessary )
|
---|
| 276 | *
|
---|
| 277 | * @since 2.6.0
|
---|
| 278 | *
|
---|
| 279 | * @param unknown_type $orig
|
---|
| 280 | * @param unknown_type $final
|
---|
| 281 | * @return unknown
|
---|
| 282 | */
|
---|
| 283 | function interleave_changed_lines( $orig, $final ) {
|
---|
| 284 |
|
---|
| 285 | // Contains all pairwise string comparisons. Keys are such that this need only be a one dimensional array.
|
---|
| 286 | $matches = array();
|
---|
| 287 | foreach ( array_keys($orig) as $o ) {
|
---|
| 288 | foreach ( array_keys($final) as $f ) {
|
---|
| 289 | $matches["$o,$f"] = $this->compute_string_distance( $orig[$o], $final[$f] );
|
---|
| 290 | }
|
---|
| 291 | }
|
---|
| 292 | asort($matches); // Order by string distance.
|
---|
| 293 |
|
---|
| 294 | $orig_matches = array();
|
---|
| 295 | $final_matches = array();
|
---|
| 296 |
|
---|
| 297 | foreach ( $matches as $keys => $difference ) {
|
---|
| 298 | list($o, $f) = explode(',', $keys);
|
---|
| 299 | $o = (int) $o;
|
---|
| 300 | $f = (int) $f;
|
---|
| 301 |
|
---|
| 302 | // Already have better matches for these guys
|
---|
| 303 | if ( isset($orig_matches[$o]) && isset($final_matches[$f]) )
|
---|
| 304 | continue;
|
---|
| 305 |
|
---|
| 306 | // First match for these guys. Must be best match
|
---|
| 307 | if ( !isset($orig_matches[$o]) && !isset($final_matches[$f]) ) {
|
---|
| 308 | $orig_matches[$o] = $f;
|
---|
| 309 | $final_matches[$f] = $o;
|
---|
| 310 | continue;
|
---|
| 311 | }
|
---|
| 312 |
|
---|
| 313 | // Best match of this final is already taken? Must mean this final is a new row.
|
---|
| 314 | if ( isset($orig_matches[$o]) )
|
---|
| 315 | $final_matches[$f] = 'x';
|
---|
| 316 |
|
---|
| 317 | // Best match of this orig is already taken? Must mean this orig is a deleted row.
|
---|
| 318 | elseif ( isset($final_matches[$f]) )
|
---|
| 319 | $orig_matches[$o] = 'x';
|
---|
| 320 | }
|
---|
| 321 |
|
---|
| 322 | // We read the text in this order
|
---|
| 323 | ksort($orig_matches);
|
---|
| 324 | ksort($final_matches);
|
---|
| 325 |
|
---|
| 326 |
|
---|
| 327 | // Stores rows and blanks for each column.
|
---|
| 328 | $orig_rows = $orig_rows_copy = array_keys($orig_matches);
|
---|
| 329 | $final_rows = array_keys($final_matches);
|
---|
| 330 |
|
---|
| 331 | // Interleaves rows with blanks to keep matches aligned.
|
---|
| 332 | // We may end up with some extraneous blank rows, but we'll just ignore them later.
|
---|
| 333 | foreach ( $orig_rows_copy as $orig_row ) {
|
---|
| 334 | $final_pos = array_search($orig_matches[$orig_row], $final_rows, true);
|
---|
| 335 | $orig_pos = (int) array_search($orig_row, $orig_rows, true);
|
---|
| 336 |
|
---|
| 337 | if ( false === $final_pos ) { // This orig is paired with a blank final.
|
---|
| 338 | array_splice( $final_rows, $orig_pos, 0, -1 );
|
---|
| 339 | } elseif ( $final_pos < $orig_pos ) { // This orig's match is up a ways. Pad final with blank rows.
|
---|
| 340 | $diff_pos = $final_pos - $orig_pos;
|
---|
| 341 | while ( $diff_pos < 0 )
|
---|
| 342 | array_splice( $final_rows, $orig_pos, 0, $diff_pos++ );
|
---|
| 343 | } elseif ( $final_pos > $orig_pos ) { // This orig's match is down a ways. Pad orig with blank rows.
|
---|
| 344 | $diff_pos = $orig_pos - $final_pos;
|
---|
| 345 | while ( $diff_pos < 0 )
|
---|
| 346 | array_splice( $orig_rows, $orig_pos, 0, $diff_pos++ );
|
---|
| 347 | }
|
---|
| 348 | }
|
---|
| 349 |
|
---|
| 350 |
|
---|
| 351 | // Pad the ends with blank rows if the columns aren't the same length
|
---|
| 352 | $diff_count = count($orig_rows) - count($final_rows);
|
---|
| 353 | if ( $diff_count < 0 ) {
|
---|
| 354 | while ( $diff_count < 0 )
|
---|
| 355 | array_push($orig_rows, $diff_count++);
|
---|
| 356 | } elseif ( $diff_count > 0 ) {
|
---|
| 357 | $diff_count = -1 * $diff_count;
|
---|
| 358 | while ( $diff_count < 0 )
|
---|
| 359 | array_push($final_rows, $diff_count++);
|
---|
| 360 | }
|
---|
| 361 |
|
---|
| 362 | return array($orig_matches, $final_matches, $orig_rows, $final_rows);
|
---|
| 363 |
|
---|
| 364 | /*
|
---|
| 365 | // Debug
|
---|
| 366 | echo "\n\n\n\n\n";
|
---|
| 367 |
|
---|
| 368 | echo "-- DEBUG Matches: Orig -> Final --";
|
---|
| 369 |
|
---|
| 370 | foreach ( $orig_matches as $o => $f ) {
|
---|
| 371 | echo "\n\n\n\n\n";
|
---|
| 372 | echo "ORIG: $o, FINAL: $f\n";
|
---|
| 373 | var_dump($orig[$o],$final[$f]);
|
---|
| 374 | }
|
---|
| 375 | echo "\n\n\n\n\n";
|
---|
| 376 |
|
---|
| 377 | echo "-- DEBUG Matches: Final -> Orig --";
|
---|
| 378 |
|
---|
| 379 | foreach ( $final_matches as $f => $o ) {
|
---|
| 380 | echo "\n\n\n\n\n";
|
---|
| 381 | echo "FINAL: $f, ORIG: $o\n";
|
---|
| 382 | var_dump($final[$f],$orig[$o]);
|
---|
| 383 | }
|
---|
| 384 | echo "\n\n\n\n\n";
|
---|
| 385 |
|
---|
| 386 | echo "-- DEBUG Rows: Orig -- Final --";
|
---|
| 387 |
|
---|
| 388 | echo "\n\n\n\n\n";
|
---|
| 389 | foreach ( $orig_rows as $row => $o ) {
|
---|
| 390 | if ( $o < 0 )
|
---|
| 391 | $o = 'X';
|
---|
| 392 | $f = $final_rows[$row];
|
---|
| 393 | if ( $f < 0 )
|
---|
| 394 | $f = 'X';
|
---|
| 395 | echo "$o -- $f\n";
|
---|
| 396 | }
|
---|
| 397 | echo "\n\n\n\n\n";
|
---|
| 398 |
|
---|
| 399 | echo "-- END DEBUG --";
|
---|
| 400 |
|
---|
| 401 | echo "\n\n\n\n\n";
|
---|
| 402 |
|
---|
| 403 | return array($orig_matches, $final_matches, $orig_rows, $final_rows);
|
---|
| 404 | */
|
---|
| 405 | }
|
---|
| 406 |
|
---|
| 407 | /**
|
---|
| 408 | * Computes a number that is intended to reflect the "distance" between two strings.
|
---|
| 409 | *
|
---|
| 410 | * @since 2.6.0
|
---|
| 411 | *
|
---|
| 412 | * @param string $string1
|
---|
| 413 | * @param string $string2
|
---|
| 414 | * @return int
|
---|
| 415 | */
|
---|
| 416 | function compute_string_distance( $string1, $string2 ) {
|
---|
| 417 | // Vectors containing character frequency for all chars in each string
|
---|
| 418 | $chars1 = count_chars($string1);
|
---|
| 419 | $chars2 = count_chars($string2);
|
---|
| 420 |
|
---|
| 421 | // L1-norm of difference vector.
|
---|
| 422 | $difference = array_sum( array_map( array(&$this, 'difference'), $chars1, $chars2 ) );
|
---|
| 423 |
|
---|
| 424 | // $string1 has zero length? Odd. Give huge penalty by not dividing.
|
---|
| 425 | if ( !$string1 )
|
---|
| 426 | return $difference;
|
---|
| 427 |
|
---|
| 428 | // Return distance per charcter (of string1)
|
---|
| 429 | return $difference / strlen($string1);
|
---|
| 430 | }
|
---|
| 431 |
|
---|
| 432 | /**
|
---|
| 433 | * @ignore
|
---|
| 434 | * @since 2.6.0
|
---|
| 435 | *
|
---|
| 436 | * @param int $a
|
---|
| 437 | * @param int $b
|
---|
| 438 | * @return int
|
---|
| 439 | */
|
---|
| 440 | function difference( $a, $b ) {
|
---|
| 441 | return abs( $a - $b );
|
---|
| 442 | }
|
---|
| 443 |
|
---|
| 444 | }
|
---|
| 445 |
|
---|
| 446 | /**
|
---|
| 447 | * Better word splitting than the PEAR package provides.
|
---|
| 448 | *
|
---|
| 449 | * @since 2.6.0
|
---|
| 450 | * @uses Text_Diff_Renderer_inline Extends
|
---|
| 451 | */
|
---|
| 452 | class WP_Text_Diff_Renderer_inline extends Text_Diff_Renderer_inline {
|
---|
| 453 |
|
---|
| 454 | /**
|
---|
| 455 | * @ignore
|
---|
| 456 | * @since 2.6.0
|
---|
| 457 | *
|
---|
| 458 | * @param string $string
|
---|
| 459 | * @param string $newlineEscape
|
---|
| 460 | * @return string
|
---|
| 461 | */
|
---|
| 462 | function _splitOnWords($string, $newlineEscape = "\n") {
|
---|
| 463 | $string = str_replace("\0", '', $string);
|
---|
| 464 | $words = preg_split( '/([^\w])/u', $string, -1, PREG_SPLIT_DELIM_CAPTURE );
|
---|
| 465 | $words = str_replace( "\n", $newlineEscape, $words );
|
---|
| 466 | return $words;
|
---|
| 467 | }
|
---|
| 468 |
|
---|
| 469 | }
|
---|
| 470 |
|
---|
| 471 | ?>
|
---|