[44] | 1 | <?php
|
---|
| 2 | /**
|
---|
| 3 | * General API for generating and formatting diffs - the differences between
|
---|
| 4 | * two sequences of strings.
|
---|
| 5 | *
|
---|
| 6 | * The original PHP version of this code was written by Geoffrey T. Dairiki
|
---|
| 7 | * <dairiki@dairiki.org>, and is used/adapted with his permission.
|
---|
| 8 | *
|
---|
| 9 | * $Horde: framework/Text_Diff/Diff.php,v 1.26 2008/01/04 10:07:49 jan Exp $
|
---|
| 10 | *
|
---|
| 11 | * Copyright 2004 Geoffrey T. Dairiki <dairiki@dairiki.org>
|
---|
| 12 | * Copyright 2004-2008 The Horde Project (http://www.horde.org/)
|
---|
| 13 | *
|
---|
| 14 | * See the enclosed file COPYING for license information (LGPL). If you did
|
---|
| 15 | * not receive this file, see http://opensource.org/licenses/lgpl-license.php.
|
---|
| 16 | *
|
---|
| 17 | * @package Text_Diff
|
---|
| 18 | * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
|
---|
| 19 | */
|
---|
| 20 | class Text_Diff {
|
---|
| 21 |
|
---|
| 22 | /**
|
---|
| 23 | * Array of changes.
|
---|
| 24 | *
|
---|
| 25 | * @var array
|
---|
| 26 | */
|
---|
| 27 | var $_edits;
|
---|
| 28 |
|
---|
| 29 | /**
|
---|
| 30 | * Computes diffs between sequences of strings.
|
---|
| 31 | *
|
---|
| 32 | * @param string $engine Name of the diffing engine to use. 'auto'
|
---|
| 33 | * will automatically select the best.
|
---|
| 34 | * @param array $params Parameters to pass to the diffing engine.
|
---|
| 35 | * Normally an array of two arrays, each
|
---|
| 36 | * containing the lines from a file.
|
---|
| 37 | */
|
---|
| 38 | function Text_Diff($engine, $params)
|
---|
| 39 | {
|
---|
| 40 | // Backward compatibility workaround.
|
---|
| 41 | if (!is_string($engine)) {
|
---|
| 42 | $params = array($engine, $params);
|
---|
| 43 | $engine = 'auto';
|
---|
| 44 | }
|
---|
| 45 |
|
---|
| 46 | if ($engine == 'auto') {
|
---|
| 47 | $engine = extension_loaded('xdiff') ? 'xdiff' : 'native';
|
---|
| 48 | } else {
|
---|
| 49 | $engine = basename($engine);
|
---|
| 50 | }
|
---|
| 51 |
|
---|
| 52 | // WP #7391
|
---|
| 53 | require_once dirname(__FILE__).'/Diff/Engine/' . $engine . '.php';
|
---|
| 54 | $class = 'Text_Diff_Engine_' . $engine;
|
---|
| 55 | $diff_engine = new $class();
|
---|
| 56 |
|
---|
| 57 | $this->_edits = call_user_func_array(array($diff_engine, 'diff'), $params);
|
---|
| 58 | }
|
---|
| 59 |
|
---|
| 60 | /**
|
---|
| 61 | * Returns the array of differences.
|
---|
| 62 | */
|
---|
| 63 | function getDiff()
|
---|
| 64 | {
|
---|
| 65 | return $this->_edits;
|
---|
| 66 | }
|
---|
| 67 |
|
---|
| 68 | /**
|
---|
| 69 | * Computes a reversed diff.
|
---|
| 70 | *
|
---|
| 71 | * Example:
|
---|
| 72 | * <code>
|
---|
| 73 | * $diff = new Text_Diff($lines1, $lines2);
|
---|
| 74 | * $rev = $diff->reverse();
|
---|
| 75 | * </code>
|
---|
| 76 | *
|
---|
| 77 | * @return Text_Diff A Diff object representing the inverse of the
|
---|
| 78 | * original diff. Note that we purposely don't return a
|
---|
| 79 | * reference here, since this essentially is a clone()
|
---|
| 80 | * method.
|
---|
| 81 | */
|
---|
| 82 | function reverse()
|
---|
| 83 | {
|
---|
| 84 | if (version_compare(zend_version(), '2', '>')) {
|
---|
| 85 | $rev = clone($this);
|
---|
| 86 | } else {
|
---|
| 87 | $rev = $this;
|
---|
| 88 | }
|
---|
| 89 | $rev->_edits = array();
|
---|
| 90 | foreach ($this->_edits as $edit) {
|
---|
| 91 | $rev->_edits[] = $edit->reverse();
|
---|
| 92 | }
|
---|
| 93 | return $rev;
|
---|
| 94 | }
|
---|
| 95 |
|
---|
| 96 | /**
|
---|
| 97 | * Checks for an empty diff.
|
---|
| 98 | *
|
---|
| 99 | * @return boolean True if two sequences were identical.
|
---|
| 100 | */
|
---|
| 101 | function isEmpty()
|
---|
| 102 | {
|
---|
| 103 | foreach ($this->_edits as $edit) {
|
---|
| 104 | if (!is_a($edit, 'Text_Diff_Op_copy')) {
|
---|
| 105 | return false;
|
---|
| 106 | }
|
---|
| 107 | }
|
---|
| 108 | return true;
|
---|
| 109 | }
|
---|
| 110 |
|
---|
| 111 | /**
|
---|
| 112 | * Computes the length of the Longest Common Subsequence (LCS).
|
---|
| 113 | *
|
---|
| 114 | * This is mostly for diagnostic purposes.
|
---|
| 115 | *
|
---|
| 116 | * @return integer The length of the LCS.
|
---|
| 117 | */
|
---|
| 118 | function lcs()
|
---|
| 119 | {
|
---|
| 120 | $lcs = 0;
|
---|
| 121 | foreach ($this->_edits as $edit) {
|
---|
| 122 | if (is_a($edit, 'Text_Diff_Op_copy')) {
|
---|
| 123 | $lcs += count($edit->orig);
|
---|
| 124 | }
|
---|
| 125 | }
|
---|
| 126 | return $lcs;
|
---|
| 127 | }
|
---|
| 128 |
|
---|
| 129 | /**
|
---|
| 130 | * Gets the original set of lines.
|
---|
| 131 | *
|
---|
| 132 | * This reconstructs the $from_lines parameter passed to the constructor.
|
---|
| 133 | *
|
---|
| 134 | * @return array The original sequence of strings.
|
---|
| 135 | */
|
---|
| 136 | function getOriginal()
|
---|
| 137 | {
|
---|
| 138 | $lines = array();
|
---|
| 139 | foreach ($this->_edits as $edit) {
|
---|
| 140 | if ($edit->orig) {
|
---|
| 141 | array_splice($lines, count($lines), 0, $edit->orig);
|
---|
| 142 | }
|
---|
| 143 | }
|
---|
| 144 | return $lines;
|
---|
| 145 | }
|
---|
| 146 |
|
---|
| 147 | /**
|
---|
| 148 | * Gets the final set of lines.
|
---|
| 149 | *
|
---|
| 150 | * This reconstructs the $to_lines parameter passed to the constructor.
|
---|
| 151 | *
|
---|
| 152 | * @return array The sequence of strings.
|
---|
| 153 | */
|
---|
| 154 | function getFinal()
|
---|
| 155 | {
|
---|
| 156 | $lines = array();
|
---|
| 157 | foreach ($this->_edits as $edit) {
|
---|
| 158 | if ($edit->final) {
|
---|
| 159 | array_splice($lines, count($lines), 0, $edit->final);
|
---|
| 160 | }
|
---|
| 161 | }
|
---|
| 162 | return $lines;
|
---|
| 163 | }
|
---|
| 164 |
|
---|
| 165 | /**
|
---|
| 166 | * Removes trailing newlines from a line of text. This is meant to be used
|
---|
| 167 | * with array_walk().
|
---|
| 168 | *
|
---|
| 169 | * @param string $line The line to trim.
|
---|
| 170 | * @param integer $key The index of the line in the array. Not used.
|
---|
| 171 | */
|
---|
| 172 | function trimNewlines(&$line, $key)
|
---|
| 173 | {
|
---|
| 174 | $line = str_replace(array("\n", "\r"), '', $line);
|
---|
| 175 | }
|
---|
| 176 |
|
---|
| 177 | /**
|
---|
| 178 | * Determines the location of the system temporary directory.
|
---|
| 179 | *
|
---|
| 180 | * @static
|
---|
| 181 | *
|
---|
| 182 | * @access protected
|
---|
| 183 | *
|
---|
| 184 | * @return string A directory name which can be used for temp files.
|
---|
| 185 | * Returns false if one could not be found.
|
---|
| 186 | */
|
---|
| 187 | function _getTempDir()
|
---|
| 188 | {
|
---|
| 189 | $tmp_locations = array('/tmp', '/var/tmp', 'c:\WUTemp', 'c:\temp',
|
---|
| 190 | 'c:\windows\temp', 'c:\winnt\temp');
|
---|
| 191 |
|
---|
| 192 | /* Try PHP's upload_tmp_dir directive. */
|
---|
| 193 | $tmp = ini_get('upload_tmp_dir');
|
---|
| 194 |
|
---|
| 195 | /* Otherwise, try to determine the TMPDIR environment variable. */
|
---|
| 196 | if (!strlen($tmp)) {
|
---|
| 197 | $tmp = getenv('TMPDIR');
|
---|
| 198 | }
|
---|
| 199 |
|
---|
| 200 | /* If we still cannot determine a value, then cycle through a list of
|
---|
| 201 | * preset possibilities. */
|
---|
| 202 | while (!strlen($tmp) && count($tmp_locations)) {
|
---|
| 203 | $tmp_check = array_shift($tmp_locations);
|
---|
| 204 | if (@is_dir($tmp_check)) {
|
---|
| 205 | $tmp = $tmp_check;
|
---|
| 206 | }
|
---|
| 207 | }
|
---|
| 208 |
|
---|
| 209 | /* If it is still empty, we have failed, so return false; otherwise
|
---|
| 210 | * return the directory determined. */
|
---|
| 211 | return strlen($tmp) ? $tmp : false;
|
---|
| 212 | }
|
---|
| 213 |
|
---|
| 214 | /**
|
---|
| 215 | * Checks a diff for validity.
|
---|
| 216 | *
|
---|
| 217 | * This is here only for debugging purposes.
|
---|
| 218 | */
|
---|
| 219 | function _check($from_lines, $to_lines)
|
---|
| 220 | {
|
---|
| 221 | if (serialize($from_lines) != serialize($this->getOriginal())) {
|
---|
| 222 | trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
|
---|
| 223 | }
|
---|
| 224 | if (serialize($to_lines) != serialize($this->getFinal())) {
|
---|
| 225 | trigger_error("Reconstructed final doesn't match", E_USER_ERROR);
|
---|
| 226 | }
|
---|
| 227 |
|
---|
| 228 | $rev = $this->reverse();
|
---|
| 229 | if (serialize($to_lines) != serialize($rev->getOriginal())) {
|
---|
| 230 | trigger_error("Reversed original doesn't match", E_USER_ERROR);
|
---|
| 231 | }
|
---|
| 232 | if (serialize($from_lines) != serialize($rev->getFinal())) {
|
---|
| 233 | trigger_error("Reversed final doesn't match", E_USER_ERROR);
|
---|
| 234 | }
|
---|
| 235 |
|
---|
| 236 | $prevtype = null;
|
---|
| 237 | foreach ($this->_edits as $edit) {
|
---|
| 238 | if ($prevtype == get_class($edit)) {
|
---|
| 239 | trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
|
---|
| 240 | }
|
---|
| 241 | $prevtype = get_class($edit);
|
---|
| 242 | }
|
---|
| 243 |
|
---|
| 244 | return true;
|
---|
| 245 | }
|
---|
| 246 |
|
---|
| 247 | }
|
---|
| 248 |
|
---|
| 249 | /**
|
---|
| 250 | * @package Text_Diff
|
---|
| 251 | * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
|
---|
| 252 | */
|
---|
| 253 | class Text_MappedDiff extends Text_Diff {
|
---|
| 254 |
|
---|
| 255 | /**
|
---|
| 256 | * Computes a diff between sequences of strings.
|
---|
| 257 | *
|
---|
| 258 | * This can be used to compute things like case-insensitve diffs, or diffs
|
---|
| 259 | * which ignore changes in white-space.
|
---|
| 260 | *
|
---|
| 261 | * @param array $from_lines An array of strings.
|
---|
| 262 | * @param array $to_lines An array of strings.
|
---|
| 263 | * @param array $mapped_from_lines This array should have the same size
|
---|
| 264 | * number of elements as $from_lines. The
|
---|
| 265 | * elements in $mapped_from_lines and
|
---|
| 266 | * $mapped_to_lines are what is actually
|
---|
| 267 | * compared when computing the diff.
|
---|
| 268 | * @param array $mapped_to_lines This array should have the same number
|
---|
| 269 | * of elements as $to_lines.
|
---|
| 270 | */
|
---|
| 271 | function Text_MappedDiff($from_lines, $to_lines,
|
---|
| 272 | $mapped_from_lines, $mapped_to_lines)
|
---|
| 273 | {
|
---|
| 274 | assert(count($from_lines) == count($mapped_from_lines));
|
---|
| 275 | assert(count($to_lines) == count($mapped_to_lines));
|
---|
| 276 |
|
---|
| 277 | parent::Text_Diff($mapped_from_lines, $mapped_to_lines);
|
---|
| 278 |
|
---|
| 279 | $xi = $yi = 0;
|
---|
| 280 | for ($i = 0; $i < count($this->_edits); $i++) {
|
---|
| 281 | $orig = &$this->_edits[$i]->orig;
|
---|
| 282 | if (is_array($orig)) {
|
---|
| 283 | $orig = array_slice($from_lines, $xi, count($orig));
|
---|
| 284 | $xi += count($orig);
|
---|
| 285 | }
|
---|
| 286 |
|
---|
| 287 | $final = &$this->_edits[$i]->final;
|
---|
| 288 | if (is_array($final)) {
|
---|
| 289 | $final = array_slice($to_lines, $yi, count($final));
|
---|
| 290 | $yi += count($final);
|
---|
| 291 | }
|
---|
| 292 | }
|
---|
| 293 | }
|
---|
| 294 |
|
---|
| 295 | }
|
---|
| 296 |
|
---|
| 297 | /**
|
---|
| 298 | * @package Text_Diff
|
---|
| 299 | * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
|
---|
| 300 | *
|
---|
| 301 | * @access private
|
---|
| 302 | */
|
---|
| 303 | class Text_Diff_Op {
|
---|
| 304 |
|
---|
| 305 | var $orig;
|
---|
| 306 | var $final;
|
---|
| 307 |
|
---|
| 308 | function &reverse()
|
---|
| 309 | {
|
---|
| 310 | trigger_error('Abstract method', E_USER_ERROR);
|
---|
| 311 | }
|
---|
| 312 |
|
---|
| 313 | function norig()
|
---|
| 314 | {
|
---|
| 315 | return $this->orig ? count($this->orig) : 0;
|
---|
| 316 | }
|
---|
| 317 |
|
---|
| 318 | function nfinal()
|
---|
| 319 | {
|
---|
| 320 | return $this->final ? count($this->final) : 0;
|
---|
| 321 | }
|
---|
| 322 |
|
---|
| 323 | }
|
---|
| 324 |
|
---|
| 325 | /**
|
---|
| 326 | * @package Text_Diff
|
---|
| 327 | * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
|
---|
| 328 | *
|
---|
| 329 | * @access private
|
---|
| 330 | */
|
---|
| 331 | class Text_Diff_Op_copy extends Text_Diff_Op {
|
---|
| 332 |
|
---|
| 333 | function Text_Diff_Op_copy($orig, $final = false)
|
---|
| 334 | {
|
---|
| 335 | if (!is_array($final)) {
|
---|
| 336 | $final = $orig;
|
---|
| 337 | }
|
---|
| 338 | $this->orig = $orig;
|
---|
| 339 | $this->final = $final;
|
---|
| 340 | }
|
---|
| 341 |
|
---|
| 342 | function &reverse()
|
---|
| 343 | {
|
---|
| 344 | $reverse = &new Text_Diff_Op_copy($this->final, $this->orig);
|
---|
| 345 | return $reverse;
|
---|
| 346 | }
|
---|
| 347 |
|
---|
| 348 | }
|
---|
| 349 |
|
---|
| 350 | /**
|
---|
| 351 | * @package Text_Diff
|
---|
| 352 | * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
|
---|
| 353 | *
|
---|
| 354 | * @access private
|
---|
| 355 | */
|
---|
| 356 | class Text_Diff_Op_delete extends Text_Diff_Op {
|
---|
| 357 |
|
---|
| 358 | function Text_Diff_Op_delete($lines)
|
---|
| 359 | {
|
---|
| 360 | $this->orig = $lines;
|
---|
| 361 | $this->final = false;
|
---|
| 362 | }
|
---|
| 363 |
|
---|
| 364 | function &reverse()
|
---|
| 365 | {
|
---|
| 366 | $reverse = &new Text_Diff_Op_add($this->orig);
|
---|
| 367 | return $reverse;
|
---|
| 368 | }
|
---|
| 369 |
|
---|
| 370 | }
|
---|
| 371 |
|
---|
| 372 | /**
|
---|
| 373 | * @package Text_Diff
|
---|
| 374 | * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
|
---|
| 375 | *
|
---|
| 376 | * @access private
|
---|
| 377 | */
|
---|
| 378 | class Text_Diff_Op_add extends Text_Diff_Op {
|
---|
| 379 |
|
---|
| 380 | function Text_Diff_Op_add($lines)
|
---|
| 381 | {
|
---|
| 382 | $this->final = $lines;
|
---|
| 383 | $this->orig = false;
|
---|
| 384 | }
|
---|
| 385 |
|
---|
| 386 | function &reverse()
|
---|
| 387 | {
|
---|
| 388 | $reverse = &new Text_Diff_Op_delete($this->final);
|
---|
| 389 | return $reverse;
|
---|
| 390 | }
|
---|
| 391 |
|
---|
| 392 | }
|
---|
| 393 |
|
---|
| 394 | /**
|
---|
| 395 | * @package Text_Diff
|
---|
| 396 | * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
|
---|
| 397 | *
|
---|
| 398 | * @access private
|
---|
| 399 | */
|
---|
| 400 | class Text_Diff_Op_change extends Text_Diff_Op {
|
---|
| 401 |
|
---|
| 402 | function Text_Diff_Op_change($orig, $final)
|
---|
| 403 | {
|
---|
| 404 | $this->orig = $orig;
|
---|
| 405 | $this->final = $final;
|
---|
| 406 | }
|
---|
| 407 |
|
---|
| 408 | function &reverse()
|
---|
| 409 | {
|
---|
| 410 | $reverse = &new Text_Diff_Op_change($this->final, $this->orig);
|
---|
| 411 | return $reverse;
|
---|
| 412 | }
|
---|
| 413 |
|
---|
| 414 | }
|
---|