utils_diff.class.php 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395
  1. <?php
  2. /* Copyright (C) 2016 Jean-François Ferry <hello@librethic.io>
  3. *
  4. * A class containing a diff implementation
  5. *
  6. * Created by Stephen Morley - http://stephenmorley.org/ - and released under the
  7. * terms of the CC0 1.0 Universal legal code:
  8. *
  9. * http://creativecommons.org/publicdomain/zero/1.0/legalcode
  10. */
  11. /**
  12. * A class containing functions for computing diffs and formatting the output.
  13. * We can compare 2 strings or 2 files (as one string or line by line)
  14. */
  15. class Diff
  16. {
  17. // define the constants
  18. const UNMODIFIED = 0;
  19. const DELETED = 1;
  20. const INSERTED = 2;
  21. /**
  22. * Returns the diff for two strings. The return value is an array, each of
  23. * whose values is an array containing two values: a line (or character, if
  24. * $compareCharacters is true), and one of the constants DIFF::UNMODIFIED (the
  25. * line or character is in both strings), DIFF::DELETED (the line or character
  26. * is only in the first string), and DIFF::INSERTED (the line or character is
  27. * only in the second string). The parameters are:
  28. *
  29. * @param string $string1 First string
  30. * @param string $string2 Second string
  31. * @param string $compareCharacters true to compare characters, and false to compare lines; this optional parameter defaults to false
  32. * @return array Array of diff
  33. */
  34. public static function compare($string1, $string2, $compareCharacters = false)
  35. {
  36. // initialise the sequences and comparison start and end positions
  37. $start = 0;
  38. if ($compareCharacters) {
  39. $sequence1 = $string1;
  40. $sequence2 = $string2;
  41. $end1 = strlen($string1) - 1;
  42. $end2 = strlen($string2) - 1;
  43. } else {
  44. $sequence1 = preg_split('/\R/', $string1);
  45. $sequence2 = preg_split('/\R/', $string2);
  46. $end1 = count($sequence1) - 1;
  47. $end2 = count($sequence2) - 1;
  48. }
  49. // skip any common prefix
  50. while ($start <= $end1 && $start <= $end2
  51. && $sequence1[$start] == $sequence2[$start]) {
  52. $start++;
  53. }
  54. // skip any common suffix
  55. while ($end1 >= $start && $end2 >= $start
  56. && $sequence1[$end1] == $sequence2[$end2]) {
  57. $end1--;
  58. $end2--;
  59. }
  60. // compute the table of longest common subsequence lengths
  61. $table = self::computeTable($sequence1, $sequence2, $start, $end1, $end2);
  62. // generate the partial diff
  63. $partialDiff = self::generatePartialDiff($table, $sequence1, $sequence2, $start);
  64. // generate the full diff
  65. $diff = array();
  66. for ($index = 0; $index < $start; $index++) {
  67. $diff[] = array($sequence1[$index], self::UNMODIFIED);
  68. }
  69. while (count($partialDiff) > 0) {
  70. $diff[] = array_pop($partialDiff);
  71. }
  72. $end2 = ($compareCharacters ? strlen($sequence1) : count($sequence1));
  73. for ($index = $end1 + 1; $index < $end2; $index++) {
  74. $diff[] = array($sequence1[$index], self::UNMODIFIED);
  75. }
  76. // return the diff
  77. return $diff;
  78. }
  79. /**
  80. * Returns the diff for two files. The parameters are:
  81. *
  82. * @param string $file1 Path to the first file
  83. * @param string $file2 Path to the second file
  84. * @param boolean $compareCharacters true to compare characters, and false to compare lines; this optional parameter defaults to false
  85. * @return array Array of diff
  86. */
  87. public static function compareFiles(
  88. $file1,
  89. $file2,
  90. $compareCharacters = false
  91. ) {
  92. // return the diff of the files
  93. return self::compare(
  94. file_get_contents($file1),
  95. file_get_contents($file2),
  96. $compareCharacters
  97. );
  98. }
  99. /**
  100. * Returns the table of longest common subsequence lengths for the specified sequences. The parameters are:
  101. *
  102. * @param string $sequence1 the first sequence
  103. * @param string $sequence2 the second sequence
  104. * @param string $start the starting index
  105. * @param string $end1 the ending index for the first sequence
  106. * @param string $end2 the ending index for the second sequence
  107. * @return array array of diff
  108. */
  109. private static function computeTable($sequence1, $sequence2, $start, $end1, $end2)
  110. {
  111. // determine the lengths to be compared
  112. $length1 = $end1 - $start + 1;
  113. $length2 = $end2 - $start + 1;
  114. // initialise the table
  115. $table = array(array_fill(0, $length2 + 1, 0));
  116. // loop over the rows
  117. for ($index1 = 1; $index1 <= $length1; $index1++) {
  118. // create the new row
  119. $table[$index1] = array(0);
  120. // loop over the columns
  121. for ($index2 = 1; $index2 <= $length2; $index2++) {
  122. // store the longest common subsequence length
  123. if ($sequence1[$index1 + $start - 1] == $sequence2[$index2 + $start - 1]
  124. ) {
  125. $table[$index1][$index2] = $table[$index1 - 1][$index2 - 1] + 1;
  126. } else {
  127. $table[$index1][$index2] = max($table[$index1 - 1][$index2], $table[$index1][$index2 - 1]);
  128. }
  129. }
  130. }
  131. // return the table
  132. return $table;
  133. }
  134. /**
  135. * Returns the partial diff for the specificed sequences, in reverse order.
  136. * The parameters are:
  137. *
  138. * @param string $table the table returned by the computeTable function
  139. * @param string $sequence1 the first sequence
  140. * @param string $sequence2 the second sequence
  141. * @param string $start the starting index
  142. * @return array array of diff
  143. */
  144. private static function generatePartialDiff($table, $sequence1, $sequence2, $start)
  145. {
  146. // initialise the diff
  147. $diff = array();
  148. // initialise the indices
  149. $index1 = count($table) - 1;
  150. $index2 = count($table[0]) - 1;
  151. // loop until there are no items remaining in either sequence
  152. while ($index1 > 0 || $index2 > 0) {
  153. // check what has happened to the items at these indices
  154. if ($index1 > 0 && $index2 > 0
  155. && $sequence1[$index1 + $start - 1] == $sequence2[$index2 + $start - 1]
  156. ) {
  157. // update the diff and the indices
  158. $diff[] = array($sequence1[$index1 + $start - 1], self::UNMODIFIED);
  159. $index1--;
  160. $index2--;
  161. } elseif ($index2 > 0
  162. && $table[$index1][$index2] == $table[$index1][$index2 - 1]
  163. ) {
  164. // update the diff and the indices
  165. $diff[] = array($sequence2[$index2 + $start - 1], self::INSERTED);
  166. $index2--;
  167. } else {
  168. // update the diff and the indices
  169. $diff[] = array($sequence1[$index1 + $start - 1], self::DELETED);
  170. $index1--;
  171. }
  172. }
  173. // return the diff
  174. return $diff;
  175. }
  176. /**
  177. * Returns a diff as a string, where unmodified lines are prefixed by ' ',
  178. * deletions are prefixed by '- ', and insertions are prefixed by '+ '. The
  179. * parameters are:
  180. *
  181. * @param array $diff the diff array
  182. * @param string $separator the separator between lines; this optional parameter defaults to "\n"
  183. * @return string String
  184. */
  185. public static function toString($diff, $separator = "\n")
  186. {
  187. // initialise the string
  188. $string = '';
  189. // loop over the lines in the diff
  190. foreach ($diff as $line) {
  191. // extend the string with the line
  192. switch ($line[1]) {
  193. case self::UNMODIFIED:
  194. $string .= ' '.$line[0];
  195. break;
  196. case self::DELETED:
  197. $string .= '- '.$line[0];
  198. break;
  199. case self::INSERTED:
  200. $string .= '+ '.$line[0];
  201. break;
  202. }
  203. // extend the string with the separator
  204. $string .= $separator;
  205. }
  206. // return the string
  207. return $string;
  208. }
  209. /**
  210. * Returns a diff as an HTML string, where unmodified lines are contained
  211. * within 'span' elements, deletions are contained within 'del' elements, and
  212. * insertions are contained within 'ins' elements. The parameters are:
  213. *
  214. * @param array $diff the diff array
  215. * @param string $separator the separator between lines; this optional parameter defaults to '<br>'
  216. * @return string HTML string
  217. */
  218. public static function toHTML($diff, $separator = '<br>')
  219. {
  220. // initialise the HTML
  221. $html = '';
  222. // loop over the lines in the diff
  223. $element = 'unknown';
  224. foreach ($diff as $line) {
  225. // extend the HTML with the line
  226. switch ($line[1]) {
  227. case self::UNMODIFIED:
  228. $element = 'span';
  229. break;
  230. case self::DELETED:
  231. $element = 'del';
  232. break;
  233. case self::INSERTED:
  234. $element = 'ins';
  235. break;
  236. }
  237. $html .= '<'.$element.'>'.dol_escape_htmltag($line[0]).'</'.$element.'>';
  238. // extend the HTML with the separator
  239. $html .= $separator;
  240. }
  241. // return the HTML
  242. return $html;
  243. }
  244. /**
  245. * Returns a diff as an HTML table. The parameters are:
  246. *
  247. * @param array $diff the diff array
  248. * @param string $indentation indentation to add to every line of the generated HTML; this optional parameter defaults to ''
  249. * @param string $separator the separator between lines; this optional parameter defaults to '<br>'
  250. * @return string HTML string
  251. */
  252. public static function toTable($diff, $indentation = '', $separator = '<br>')
  253. {
  254. // initialise the HTML
  255. $html = $indentation."<table class=\"diff\">\n";
  256. $rightCell = $leftCell = '';
  257. // loop over the lines in the diff
  258. $index = 0;
  259. $nbdiff = count($diff);
  260. while ($index < $nbdiff) {
  261. // determine the line type
  262. switch ($diff[$index][1]) {
  263. // display the content on the left and right
  264. case self::UNMODIFIED:
  265. $leftCell = self::getCellContent(
  266. $diff,
  267. $indentation,
  268. $separator,
  269. $index,
  270. self::UNMODIFIED
  271. );
  272. $rightCell = $leftCell;
  273. break;
  274. // display the deleted on the left and inserted content on the right
  275. case self::DELETED:
  276. $leftCell = self::getCellContent(
  277. $diff,
  278. $indentation,
  279. $separator,
  280. $index,
  281. self::DELETED
  282. );
  283. $rightCell = self::getCellContent(
  284. $diff,
  285. $indentation,
  286. $separator,
  287. $index,
  288. self::INSERTED
  289. );
  290. break;
  291. // display the inserted content on the right
  292. case self::INSERTED:
  293. $leftCell = '';
  294. $rightCell = self::getCellContent(
  295. $diff,
  296. $indentation,
  297. $separator,
  298. $index,
  299. self::INSERTED
  300. );
  301. break;
  302. }
  303. // extend the HTML with the new row
  304. $html .=
  305. $indentation
  306. . " <tr>\n"
  307. . $indentation
  308. . ' <td class="diff'
  309. . ($leftCell == $rightCell
  310. ? 'Unmodified'
  311. : ($leftCell == '' ? 'Blank' : 'Deleted'))
  312. . '">'
  313. . $leftCell
  314. . "</td>\n"
  315. . $indentation
  316. . ' <td class="diff'
  317. . ($leftCell == $rightCell
  318. ? 'Unmodified'
  319. : ($rightCell == '' ? 'Blank' : 'Inserted'))
  320. . '">'
  321. . $rightCell
  322. . "</td>\n"
  323. . $indentation
  324. . " </tr>\n";
  325. }
  326. // return the HTML
  327. return $html.$indentation."</table>\n";
  328. }
  329. /**
  330. * Returns the content of the cell, for use in the toTable function. The
  331. * parameters are:
  332. *
  333. * @param array $diff the diff array
  334. * @param string $indentation indentation to add to every line of the generated HTML
  335. * @param string $separator the separator between lines
  336. * @param string $index the current index, passes by reference
  337. * @param string $type the type of line
  338. * @return string HTML string
  339. */
  340. private static function getCellContent($diff, $indentation, $separator, &$index, $type)
  341. {
  342. // initialise the HTML
  343. $html = '';
  344. // loop over the matching lines, adding them to the HTML
  345. while ($index < count($diff) && $diff[$index][1] == $type) {
  346. $html .=
  347. '<span>'
  348. . htmlspecialchars($diff[$index][0])
  349. . '</span>'
  350. . $separator;
  351. $index++;
  352. }
  353. // return the HTML
  354. return $html;
  355. }
  356. }