1 | <?php
|
---|
2 | /**
|
---|
3 | * General API for generating and formatting diffs - the differences between
|
---|
4 | * two sequences of strings.
|
---|
5 | *
|
---|
6 | * The original PHP version of this code was written by Geoffrey T. Dairiki
|
---|
7 | * <dairiki@dairiki.org>, and is used/adapted with his permission.
|
---|
8 | *
|
---|
9 | * $Horde: framework/Text_Diff/Diff.php,v 1.26 2008/01/04 10:07:49 jan Exp $
|
---|
10 | *
|
---|
11 | * Copyright 2004 Geoffrey T. Dairiki <dairiki@dairiki.org>
|
---|
12 | * Copyright 2004-2008 The Horde Project (http://www.horde.org/)
|
---|
13 | *
|
---|
14 | * See the enclosed file COPYING for license information (LGPL). If you did
|
---|
15 | * not receive this file, see http://opensource.org/licenses/lgpl-license.php.
|
---|
16 | *
|
---|
17 | * @package Text_Diff
|
---|
18 | * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
|
---|
19 | */
|
---|
20 | class Text_Diff {
|
---|
21 |
|
---|
22 | /**
|
---|
23 | * Array of changes.
|
---|
24 | *
|
---|
25 | * @var array
|
---|
26 | */
|
---|
27 | var $_edits;
|
---|
28 |
|
---|
29 | /**
|
---|
30 | * Computes diffs between sequences of strings.
|
---|
31 | *
|
---|
32 | * @param string $engine Name of the diffing engine to use. 'auto'
|
---|
33 | * will automatically select the best.
|
---|
34 | * @param array $params Parameters to pass to the diffing engine.
|
---|
35 | * Normally an array of two arrays, each
|
---|
36 | * containing the lines from a file.
|
---|
37 | */
|
---|
38 | function Text_Diff($engine, $params)
|
---|
39 | {
|
---|
40 | // Backward compatibility workaround.
|
---|
41 | if (!is_string($engine)) {
|
---|
42 | $params = array($engine, $params);
|
---|
43 | $engine = 'auto';
|
---|
44 | }
|
---|
45 |
|
---|
46 | if ($engine == 'auto') {
|
---|
47 | $engine = extension_loaded('xdiff') ? 'xdiff' : 'native';
|
---|
48 | } else {
|
---|
49 | $engine = basename($engine);
|
---|
50 | }
|
---|
51 |
|
---|
52 | // WP #7391
|
---|
53 | require_once dirname(__FILE__).'/Diff/Engine/' . $engine . '.php';
|
---|
54 | $class = 'Text_Diff_Engine_' . $engine;
|
---|
55 | $diff_engine = new $class();
|
---|
56 |
|
---|
57 | $this->_edits = call_user_func_array(array($diff_engine, 'diff'), $params);
|
---|
58 | }
|
---|
59 |
|
---|
60 | /**
|
---|
61 | * Returns the array of differences.
|
---|
62 | */
|
---|
63 | function getDiff()
|
---|
64 | {
|
---|
65 | return $this->_edits;
|
---|
66 | }
|
---|
67 |
|
---|
68 | /**
|
---|
69 | * Computes a reversed diff.
|
---|
70 | *
|
---|
71 | * Example:
|
---|
72 | * <code>
|
---|
73 | * $diff = new Text_Diff($lines1, $lines2);
|
---|
74 | * $rev = $diff->reverse();
|
---|
75 | * </code>
|
---|
76 | *
|
---|
77 | * @return Text_Diff A Diff object representing the inverse of the
|
---|
78 | * original diff. Note that we purposely don't return a
|
---|
79 | * reference here, since this essentially is a clone()
|
---|
80 | * method.
|
---|
81 | */
|
---|
82 | function reverse()
|
---|
83 | {
|
---|
84 | if (version_compare(zend_version(), '2', '>')) {
|
---|
85 | $rev = clone($this);
|
---|
86 | } else {
|
---|
87 | $rev = $this;
|
---|
88 | }
|
---|
89 | $rev->_edits = array();
|
---|
90 | foreach ($this->_edits as $edit) {
|
---|
91 | $rev->_edits[] = $edit->reverse();
|
---|
92 | }
|
---|
93 | return $rev;
|
---|
94 | }
|
---|
95 |
|
---|
96 | /**
|
---|
97 | * Checks for an empty diff.
|
---|
98 | *
|
---|
99 | * @return boolean True if two sequences were identical.
|
---|
100 | */
|
---|
101 | function isEmpty()
|
---|
102 | {
|
---|
103 | foreach ($this->_edits as $edit) {
|
---|
104 | if (!is_a($edit, 'Text_Diff_Op_copy')) {
|
---|
105 | return false;
|
---|
106 | }
|
---|
107 | }
|
---|
108 | return true;
|
---|
109 | }
|
---|
110 |
|
---|
111 | /**
|
---|
112 | * Computes the length of the Longest Common Subsequence (LCS).
|
---|
113 | *
|
---|
114 | * This is mostly for diagnostic purposes.
|
---|
115 | *
|
---|
116 | * @return integer The length of the LCS.
|
---|
117 | */
|
---|
118 | function lcs()
|
---|
119 | {
|
---|
120 | $lcs = 0;
|
---|
121 | foreach ($this->_edits as $edit) {
|
---|
122 | if (is_a($edit, 'Text_Diff_Op_copy')) {
|
---|
123 | $lcs += count($edit->orig);
|
---|
124 | }
|
---|
125 | }
|
---|
126 | return $lcs;
|
---|
127 | }
|
---|
128 |
|
---|
129 | /**
|
---|
130 | * Gets the original set of lines.
|
---|
131 | *
|
---|
132 | * This reconstructs the $from_lines parameter passed to the constructor.
|
---|
133 | *
|
---|
134 | * @return array The original sequence of strings.
|
---|
135 | */
|
---|
136 | function getOriginal()
|
---|
137 | {
|
---|
138 | $lines = array();
|
---|
139 | foreach ($this->_edits as $edit) {
|
---|
140 | if ($edit->orig) {
|
---|
141 | array_splice($lines, count($lines), 0, $edit->orig);
|
---|
142 | }
|
---|
143 | }
|
---|
144 | return $lines;
|
---|
145 | }
|
---|
146 |
|
---|
147 | /**
|
---|
148 | * Gets the final set of lines.
|
---|
149 | *
|
---|
150 | * This reconstructs the $to_lines parameter passed to the constructor.
|
---|
151 | *
|
---|
152 | * @return array The sequence of strings.
|
---|
153 | */
|
---|
154 | function getFinal()
|
---|
155 | {
|
---|
156 | $lines = array();
|
---|
157 | foreach ($this->_edits as $edit) {
|
---|
158 | if ($edit->final) {
|
---|
159 | array_splice($lines, count($lines), 0, $edit->final);
|
---|
160 | }
|
---|
161 | }
|
---|
162 | return $lines;
|
---|
163 | }
|
---|
164 |
|
---|
165 | /**
|
---|
166 | * Removes trailing newlines from a line of text. This is meant to be used
|
---|
167 | * with array_walk().
|
---|
168 | *
|
---|
169 | * @param string $line The line to trim.
|
---|
170 | * @param integer $key The index of the line in the array. Not used.
|
---|
171 | */
|
---|
172 | function trimNewlines(&$line, $key)
|
---|
173 | {
|
---|
174 | $line = str_replace(array("\n", "\r"), '', $line);
|
---|
175 | }
|
---|
176 |
|
---|
177 | /**
|
---|
178 | * Determines the location of the system temporary directory.
|
---|
179 | *
|
---|
180 | * @static
|
---|
181 | *
|
---|
182 | * @access protected
|
---|
183 | *
|
---|
184 | * @return string A directory name which can be used for temp files.
|
---|
185 | * Returns false if one could not be found.
|
---|
186 | */
|
---|
187 | function _getTempDir()
|
---|
188 | {
|
---|
189 | $tmp_locations = array('/tmp', '/var/tmp', 'c:\WUTemp', 'c:\temp',
|
---|
190 | 'c:\windows\temp', 'c:\winnt\temp');
|
---|
191 |
|
---|
192 | /* Try PHP's upload_tmp_dir directive. */
|
---|
193 | $tmp = ini_get('upload_tmp_dir');
|
---|
194 |
|
---|
195 | /* Otherwise, try to determine the TMPDIR environment variable. */
|
---|
196 | if (!strlen($tmp)) {
|
---|
197 | $tmp = getenv('TMPDIR');
|
---|
198 | }
|
---|
199 |
|
---|
200 | /* If we still cannot determine a value, then cycle through a list of
|
---|
201 | * preset possibilities. */
|
---|
202 | while (!strlen($tmp) && count($tmp_locations)) {
|
---|
203 | $tmp_check = array_shift($tmp_locations);
|
---|
204 | if (@is_dir($tmp_check)) {
|
---|
205 | $tmp = $tmp_check;
|
---|
206 | }
|
---|
207 | }
|
---|
208 |
|
---|
209 | /* If it is still empty, we have failed, so return false; otherwise
|
---|
210 | * return the directory determined. */
|
---|
211 | return strlen($tmp) ? $tmp : false;
|
---|
212 | }
|
---|
213 |
|
---|
214 | /**
|
---|
215 | * Checks a diff for validity.
|
---|
216 | *
|
---|
217 | * This is here only for debugging purposes.
|
---|
218 | */
|
---|
219 | function _check($from_lines, $to_lines)
|
---|
220 | {
|
---|
221 | if (serialize($from_lines) != serialize($this->getOriginal())) {
|
---|
222 | trigger_error("Reconstructed original doesn't match", E_USER_ERROR);
|
---|
223 | }
|
---|
224 | if (serialize($to_lines) != serialize($this->getFinal())) {
|
---|
225 | trigger_error("Reconstructed final doesn't match", E_USER_ERROR);
|
---|
226 | }
|
---|
227 |
|
---|
228 | $rev = $this->reverse();
|
---|
229 | if (serialize($to_lines) != serialize($rev->getOriginal())) {
|
---|
230 | trigger_error("Reversed original doesn't match", E_USER_ERROR);
|
---|
231 | }
|
---|
232 | if (serialize($from_lines) != serialize($rev->getFinal())) {
|
---|
233 | trigger_error("Reversed final doesn't match", E_USER_ERROR);
|
---|
234 | }
|
---|
235 |
|
---|
236 | $prevtype = null;
|
---|
237 | foreach ($this->_edits as $edit) {
|
---|
238 | if ($prevtype == get_class($edit)) {
|
---|
239 | trigger_error("Edit sequence is non-optimal", E_USER_ERROR);
|
---|
240 | }
|
---|
241 | $prevtype = get_class($edit);
|
---|
242 | }
|
---|
243 |
|
---|
244 | return true;
|
---|
245 | }
|
---|
246 |
|
---|
247 | }
|
---|
248 |
|
---|
249 | /**
|
---|
250 | * @package Text_Diff
|
---|
251 | * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
|
---|
252 | */
|
---|
253 | class Text_MappedDiff extends Text_Diff {
|
---|
254 |
|
---|
255 | /**
|
---|
256 | * Computes a diff between sequences of strings.
|
---|
257 | *
|
---|
258 | * This can be used to compute things like case-insensitve diffs, or diffs
|
---|
259 | * which ignore changes in white-space.
|
---|
260 | *
|
---|
261 | * @param array $from_lines An array of strings.
|
---|
262 | * @param array $to_lines An array of strings.
|
---|
263 | * @param array $mapped_from_lines This array should have the same size
|
---|
264 | * number of elements as $from_lines. The
|
---|
265 | * elements in $mapped_from_lines and
|
---|
266 | * $mapped_to_lines are what is actually
|
---|
267 | * compared when computing the diff.
|
---|
268 | * @param array $mapped_to_lines This array should have the same number
|
---|
269 | * of elements as $to_lines.
|
---|
270 | */
|
---|
271 | function Text_MappedDiff($from_lines, $to_lines,
|
---|
272 | $mapped_from_lines, $mapped_to_lines)
|
---|
273 | {
|
---|
274 | assert(count($from_lines) == count($mapped_from_lines));
|
---|
275 | assert(count($to_lines) == count($mapped_to_lines));
|
---|
276 |
|
---|
277 | parent::Text_Diff($mapped_from_lines, $mapped_to_lines);
|
---|
278 |
|
---|
279 | $xi = $yi = 0;
|
---|
280 | for ($i = 0; $i < count($this->_edits); $i++) {
|
---|
281 | $orig = &$this->_edits[$i]->orig;
|
---|
282 | if (is_array($orig)) {
|
---|
283 | $orig = array_slice($from_lines, $xi, count($orig));
|
---|
284 | $xi += count($orig);
|
---|
285 | }
|
---|
286 |
|
---|
287 | $final = &$this->_edits[$i]->final;
|
---|
288 | if (is_array($final)) {
|
---|
289 | $final = array_slice($to_lines, $yi, count($final));
|
---|
290 | $yi += count($final);
|
---|
291 | }
|
---|
292 | }
|
---|
293 | }
|
---|
294 |
|
---|
295 | }
|
---|
296 |
|
---|
297 | /**
|
---|
298 | * @package Text_Diff
|
---|
299 | * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
|
---|
300 | *
|
---|
301 | * @access private
|
---|
302 | */
|
---|
303 | class Text_Diff_Op {
|
---|
304 |
|
---|
305 | var $orig;
|
---|
306 | var $final;
|
---|
307 |
|
---|
308 | function &reverse()
|
---|
309 | {
|
---|
310 | trigger_error('Abstract method', E_USER_ERROR);
|
---|
311 | }
|
---|
312 |
|
---|
313 | function norig()
|
---|
314 | {
|
---|
315 | return $this->orig ? count($this->orig) : 0;
|
---|
316 | }
|
---|
317 |
|
---|
318 | function nfinal()
|
---|
319 | {
|
---|
320 | return $this->final ? count($this->final) : 0;
|
---|
321 | }
|
---|
322 |
|
---|
323 | }
|
---|
324 |
|
---|
325 | /**
|
---|
326 | * @package Text_Diff
|
---|
327 | * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
|
---|
328 | *
|
---|
329 | * @access private
|
---|
330 | */
|
---|
331 | class Text_Diff_Op_copy extends Text_Diff_Op {
|
---|
332 |
|
---|
333 | function Text_Diff_Op_copy($orig, $final = false)
|
---|
334 | {
|
---|
335 | if (!is_array($final)) {
|
---|
336 | $final = $orig;
|
---|
337 | }
|
---|
338 | $this->orig = $orig;
|
---|
339 | $this->final = $final;
|
---|
340 | }
|
---|
341 |
|
---|
342 | function &reverse()
|
---|
343 | {
|
---|
344 | $reverse = &new Text_Diff_Op_copy($this->final, $this->orig);
|
---|
345 | return $reverse;
|
---|
346 | }
|
---|
347 |
|
---|
348 | }
|
---|
349 |
|
---|
350 | /**
|
---|
351 | * @package Text_Diff
|
---|
352 | * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
|
---|
353 | *
|
---|
354 | * @access private
|
---|
355 | */
|
---|
356 | class Text_Diff_Op_delete extends Text_Diff_Op {
|
---|
357 |
|
---|
358 | function Text_Diff_Op_delete($lines)
|
---|
359 | {
|
---|
360 | $this->orig = $lines;
|
---|
361 | $this->final = false;
|
---|
362 | }
|
---|
363 |
|
---|
364 | function &reverse()
|
---|
365 | {
|
---|
366 | $reverse = &new Text_Diff_Op_add($this->orig);
|
---|
367 | return $reverse;
|
---|
368 | }
|
---|
369 |
|
---|
370 | }
|
---|
371 |
|
---|
372 | /**
|
---|
373 | * @package Text_Diff
|
---|
374 | * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
|
---|
375 | *
|
---|
376 | * @access private
|
---|
377 | */
|
---|
378 | class Text_Diff_Op_add extends Text_Diff_Op {
|
---|
379 |
|
---|
380 | function Text_Diff_Op_add($lines)
|
---|
381 | {
|
---|
382 | $this->final = $lines;
|
---|
383 | $this->orig = false;
|
---|
384 | }
|
---|
385 |
|
---|
386 | function &reverse()
|
---|
387 | {
|
---|
388 | $reverse = &new Text_Diff_Op_delete($this->final);
|
---|
389 | return $reverse;
|
---|
390 | }
|
---|
391 |
|
---|
392 | }
|
---|
393 |
|
---|
394 | /**
|
---|
395 | * @package Text_Diff
|
---|
396 | * @author Geoffrey T. Dairiki <dairiki@dairiki.org>
|
---|
397 | *
|
---|
398 | * @access private
|
---|
399 | */
|
---|
400 | class Text_Diff_Op_change extends Text_Diff_Op {
|
---|
401 |
|
---|
402 | function Text_Diff_Op_change($orig, $final)
|
---|
403 | {
|
---|
404 | $this->orig = $orig;
|
---|
405 | $this->final = $final;
|
---|
406 | }
|
---|
407 |
|
---|
408 | function &reverse()
|
---|
409 | {
|
---|
410 | $reverse = &new Text_Diff_Op_change($this->final, $this->orig);
|
---|
411 | return $reverse;
|
---|
412 | }
|
---|
413 |
|
---|
414 | }
|
---|