User:X-Spider2/diff.js
From Homestar Runner Wiki
< User:X-Spider2(Difference between revisions)
| Line 1: | Line 1: | ||
| - | /* | + | // ==UserScript== |
| - | + | // @name JavaScript editor for Wikimedia websites (diff algorithm) | |
| - | Name: diff.js | + | // @namespace http://www.hrwiki.org/ |
| - | Version: 0.9.0 (May 13, 2006) | + | // @description Diff algorithm for a custom editor currently being used by HRWikian X-Spider2. |
| - | Info: http://en.wikipedia.org/wiki/User:Cacycle/diff | + | // @include *wiki* |
| - | Code: http://en.wikipedia.org/wiki/User:Cacycle/diff.js | + | // ==/UserScript== |
| - | + | // | |
| - | JavaScript diff algorithm by [[Wikipedia:User:Cacycle]] (http://en.wikipedia.org/wiki/User_talk:Cacycle). | + | // Name: diff.js |
| - | Outputs html/css-formatted new text with highlighted deletions, inserts, and block moves. | + | // Version: 0.9.0 (May 13, 2006) |
| - | + | // Info: http://en.wikipedia.org/wiki/User:Cacycle/diff | |
| - | The program uses cross-browser code and should work with all modern browsers. It has been tested with: | + | // Code: http://en.wikipedia.org/wiki/User:Cacycle/diff.js |
| - | * Mozilla Firefox 1.5.0.1 | + | // |
| - | * Mozilla SeaMonkey 1.0 | + | // JavaScript diff algorithm by [[Wikipedia:User:Cacycle]] (http://en.wikipedia.org/wiki/User_talk:Cacycle). |
| - | * Opera 8.53 | + | // Outputs html/css-formatted new text with highlighted deletions, inserts, and block moves. |
| - | * Internet Explorer 6.0.2900.2180 | + | // |
| - | + | // The program uses cross-browser code and should work with all modern browsers. It has been tested with: | |
| - | The code is currently under active development and might change rapidly. | + | // * Mozilla Firefox 1.5.0.1 |
| - | + | // * Mozilla SeaMonkey 1.0 | |
| - | An implementation of: | + | // * Opera 8.53 |
| - | Paul Heckel: A technique for isolating differences between files | + | // * Internet Explorer 6.0.2900.2180 |
| - | Communications of the ACM 21(4):264 (1978) | + | // |
| - | http://doi.acm.org/10.1145/359460.359467 | + | // The code is currently under active development and might change rapidly. |
| - | + | // | |
| - | * Word types have been optimized for Wikipedia source texts | + | // An implementation of: |
| - | * Additional post-pass 5 code for resolving islands caused by adding | + | // Paul Heckel: A technique for isolating differences between files |
| - | + | // Communications of the ACM 21(4):264 (1978) | |
| - | * Additional detection of block borders and color coding of moved blocks and their original position | + | // http://doi.acm.org/10.1145/359460.359467 |
| - | + | // | |
| - | This code is used by [[User:X-Spider2/editor.js]] ([[Wikipedia:User:Cacycle/editor]]). | + | // * Word types have been optimized for Wikipedia source texts |
| - | + | // * Additional post-pass 5 code for resolving islands caused by adding | |
| - | Usage: var htmlText = stringDiff(oldText, newText); | + | // two common words at the end of sequences of common words |
| - | + | // * Additional detection of block borders and color coding of moved blocks and their original position | |
| - | This code has been released into the public domain. | + | // |
| - | + | // This code is used by [[User:X-Spider2/editor.js]] ([[Wikipedia:User:Cacycle/editor]]). | |
| - | Datastructures: | + | // |
| - | + | // Usage: var htmlText = stringDiff(oldText, newText); | |
| - | text: an object that holds all text related datastructures | + | // |
| - | + | // This code has been released into the public domain. | |
| - | + | // | |
| - | + | // Datastructures: | |
| - | + | // | |
| - | + | // text: an object that holds all text related datastructures | |
| - | + | // .newWords: consecutive words of the new text (N) | |
| - | symbol['word']: symbol table for passes 1 - 3, holds words as a hash | + | // .oldWords: consecutive words of the old text (O) |
| - | + | // .newToOld: array of corresponding word number in old text (NA) | |
| - | + | // .oldToNew: array of corresponding word number in new text (OA) | |
| - | + | // .message: output message for testing purposes | |
| - | + | // | |
| - | + | // symbol['word']: symbol table for passes 1 - 3, holds words as a hash | |
| - | block: an object that holds block move information | + | // .newCtr: new word occurences counter (NC) |
| - | + | // .oldCtr: old word occurences counter (OC) | |
| - | + | // .toNew: table last old word number | |
| - | + | // .toOld: last new word number (OLNA) | |
| - | + | // | |
| - | + | // block: an object that holds block move information | |
| - | + | // blocks indexed after new text: | |
| - | + | // .newStart: new text word number of start of this block | |
| - | + | // .newLength: element number of this block including non-words | |
| - | + | // .newWords: true word number of this block | |
| - | + | // .newNumber: corresponding block index in old text | |
| - | + | // .newBlock: moved-block-number of a block that has been moved here | |
| - | + | // .newLeft: moved-block-number of a block that has been moved from this border leftwards | |
| - | + | // .newRight: moved-block-number of a block that has been moved from this border rightwards | |
| - | + | // .newLeftIndex: index number of a block that has been moved from this border leftwards | |
| - | + | // .newRightIndex: index number of a block that has been moved from this border rightwards | |
| - | + | // blocks indexed after old text: | |
| - | + | // .oldStart: word number of start of this block | |
| + | // .oldToNew: corresponding new text word number of start | ||
| + | // .oldLength: element number of this block including non-words | ||
| + | // .oldWords: true word number of this block | ||
| Line 73: | Line 76: | ||
var styleMoved = styleMoved || 'font-weight: bold; vertical-align: text-bottom; font-size: xx-small; padding: 0; border: solid 1px;'; | var styleMoved = styleMoved || 'font-weight: bold; vertical-align: text-bottom; font-size: xx-small; padding: 0; border: solid 1px;'; | ||
var styleBlock = styleBlock || [ | var styleBlock = styleBlock || [ | ||
| - | + | 'background-color: #ffff44;', | |
| - | + | 'background-color: #b0ff90;', | |
| - | + | 'background-color: #ffcc99;', | |
| - | + | 'background-color: #99ffff;', | |
| - | + | 'background-color: #99ccff;', | |
| - | + | 'background-color: #cc99ff;', | |
| - | + | 'background-color: #ff99cc;', | |
| - | + | 'background-color: #ffd040;', | |
| - | + | 'background-color: #d0d0d0;' | |
]; | ]; | ||
| Line 123: | Line 126: | ||
function StringDiff(oldText, newText) { | function StringDiff(oldText, newText) { | ||
| - | + | var text = {}; | |
| - | + | text.newWords = []; | |
| - | + | text.oldWords = []; | |
| - | + | text.newToOld = []; | |
| - | + | text.oldToNew = []; | |
| - | + | text.message = ''; | |
| - | + | var block = {}; | |
| - | + | var outText = ''; | |
// trap trivial changes: no change | // trap trivial changes: no change | ||
| - | + | if (oldText == newText) { | |
| - | + | outText = newText; | |
| - | + | outText = Escape(outText); | |
| - | + | outText = HtmlFormat(outText); | |
| - | + | return(outText); | |
| - | + | } | |
// trap trivial changes: old text deleted | // trap trivial changes: old text deleted | ||
| - | + | if ( (oldText == null) || (oldText.length == 0) ) { | |
| - | + | outText = newText; | |
| - | + | outText = Escape(outText); | |
| - | + | outText = HtmlFormat(outText); | |
| - | + | outText = htmlInsertStart + outText + htmlInsertEnd; | |
| - | + | return(outText); | |
| - | + | } | |
// trap trivial changes: new text deleted | // trap trivial changes: new text deleted | ||
| - | + | if ( (newText == null) || (newText.length == 0) ) { | |
| - | + | outText = oldText; | |
| - | + | outText = Escape(outText); | |
| - | + | outText = HtmlFormat(outText); | |
| - | + | outText = htmlDeleteStart + outText + htmlDeleteEnd; | |
| - | + | return(outText); | |
| - | + | } | |
// split new and old text into words | // split new and old text into words | ||
| - | + | SplitText(oldText, newText, text); | |
// calculate diff information | // calculate diff information | ||
| - | + | TextDiff(text); | |
//detect block borders and moved blocks | //detect block borders and moved blocks | ||
| - | + | DetectBlocks(text, block); | |
// process diff data into formatted html text | // process diff data into formatted html text | ||
| - | + | outText = DiffToHtml(text, block); | |
| - | + | return(outText); | |
} | } | ||
| Line 181: | Line 184: | ||
// convert strange spaces | // convert strange spaces | ||
| - | + | oldText = oldText.replace(/[\t\v\u00A0\u2028\u2029]+/g, ' '); | |
| - | + | newText = newText.replace(/[\t\v\u00A0\u2028\u2029]+/g, ' '); | |
// split old text into words | // split old text into words | ||
// / | | | | | | | | | | | | | | / | // / | | | | | | | | | | | | | | / | ||
| - | + | var pattern = /[\w]+|\[\[|\]\]|\{\{|\}\}|\n+| +|&\w+;|'''|''|=+|\{\||\|\}|\|\-|./g; | |
| - | + | var result; | |
| - | + | do { | |
| - | + | result = pattern.exec(oldText); | |
| - | + | if (result != null) { | |
| - | + | text.oldWords.push(result[0]); | |
| - | + | } | |
| - | + | } while (result != null); | |
// split new text into words | // split new text into words | ||
| - | + | do { | |
| - | + | result = pattern.exec(newText); | |
| - | + | if (result != null) { | |
| - | + | text.newWords.push(result[0]); | |
| - | + | } | |
| - | + | } while (result != null); | |
| - | + | return; | |
} | } | ||
| Line 215: | Line 218: | ||
function TextDiff(text, newStart, newEnd, oldStart, oldEnd, recursionLevel) { | function TextDiff(text, newStart, newEnd, oldStart, oldEnd, recursionLevel) { | ||
| - | + | symbol = new Object(); | |
| - | + | symbol.newCtr = []; | |
| - | + | symbol.oldCtr = []; | |
| - | + | symbol.toNew = []; | |
| - | + | symbol.toOld = []; | |
// set defaults | // set defaults | ||
| - | + | newStart = newStart || 0; | |
| - | + | newEnd = newEnd || text.newWords.length; | |
| - | + | oldStart = oldStart || 0; | |
| - | + | oldEnd = oldEnd || text.oldWords.length; | |
| - | + | recursionLevel = recursionLevel || 0; | |
// limit recursion depth | // limit recursion depth | ||
| - | + | if (recursionLevel > 10) { | |
| - | + | return; | |
| - | + | } | |
// pass 1: parse new text into symbol table s | // pass 1: parse new text into symbol table s | ||
| - | + | var word; | |
| - | + | for (i = newStart; i < newEnd; i ++) { | |
| - | + | word = text.newWords[i]; | |
// add new entry to symbol table | // add new entry to symbol table | ||
| - | + | if ( symbol[word] == null) { | |
| - | + | symbol[word] = { newCtr: 0, oldCtr: 0, toNew: null, toOld: null }; | |
| - | + | } | |
// increment symbol table word counter for new text | // increment symbol table word counter for new text | ||
| - | + | symbol[word].newCtr ++; | |
// add last word number in new text | // add last word number in new text | ||
| - | + | symbol[word].toNew = i; | |
| - | + | } | |
// pass 2: parse old text into symbol table | // pass 2: parse old text into symbol table | ||
| - | + | for (j = oldStart; j < oldEnd; j ++) { | |
| - | + | word = text.oldWords[j]; | |
// add new entry to symbol table | // add new entry to symbol table | ||
| - | + | if ( symbol[word] == null) { | |
| - | + | symbol[word] = { newCtr: 0, oldCtr: 0, toNew: null, toOld: null }; | |
| - | + | } | |
// increment symbol table word counter for old text | // increment symbol table word counter for old text | ||
| - | + | symbol[word].oldCtr ++; | |
// add last word number in old text | // add last word number in old text | ||
| - | + | symbol[word].toOld = j; | |
| - | + | } | |
// pass 3: connect unique words | // pass 3: connect unique words | ||
| - | + | for (i in symbol) { | |
// find words in the symbol table that occur only once in both versions | // find words in the symbol table that occur only once in both versions | ||
| - | + | if ( (symbol[i].newCtr == 1) && (symbol[i].oldCtr == 1) ) { | |
| - | + | var toNew = symbol[i].toNew; | |
| - | + | var toOld = symbol[i].toOld; | |
// do not use spaces as unique markers | // do not use spaces as unique markers | ||
| - | + | if ( ! /\s/.test( text.newWords[toNew] ) ) { | |
// connect from new to old and from old to new | // connect from new to old and from old to new | ||
| - | + | text.newToOld[toNew] = toOld; | |
| - | + | text.oldToNew[toOld] = toNew; | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
// pass 4: connect adjacent identical words downwards | // pass 4: connect adjacent identical words downwards | ||
| - | + | for (i = newStart; i < newEnd - 1; i ++) { | |
// find already connected pairs | // find already connected pairs | ||
| - | + | if (text.newToOld[i] != null) { | |
| - | + | j = text.newToOld[i]; | |
// check if the following words are not yet connected | // check if the following words are not yet connected | ||
| - | + | if ( (text.newToOld[i + 1] == null) && (text.oldToNew[j + 1] == null) ) { | |
// if the following words are the same connect them | // if the following words are the same connect them | ||
| - | + | if ( text.newWords[i + 1] == text.oldWords[j + 1] ) { | |
| - | + | text.newToOld[i + 1] = j + 1; | |
| - | + | text.oldToNew[j + 1] = i + 1; | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
// pass 5: connect adjacent identical words upwards | // pass 5: connect adjacent identical words upwards | ||
| - | + | for (i = newEnd - 1; i > newStart; i --) { | |
// find already connected pairs | // find already connected pairs | ||
| - | + | if (text.newToOld[i] != null) { | |
| - | + | j = text.newToOld[i]; | |
// check if the preceeding words are not yet connected | // check if the preceeding words are not yet connected | ||
| - | + | if ( (text.newToOld[i - 1] == null) && (text.oldToNew[j - 1] == null) ) { | |
// if the preceeding words are the same connect them | // if the preceeding words are the same connect them | ||
| - | + | if ( text.newWords[i - 1] == text.oldWords[j - 1] ) { | |
| - | + | text.newToOld[i - 1] = j - 1; | |
| - | + | text.oldToNew[j - 1] = i - 1; | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
// recursively diff still unresolved regions downwards | // recursively diff still unresolved regions downwards | ||
| - | + | if (recursiveDiff) { | |
| - | + | i = newStart; | |
| - | + | j = oldStart; | |
| - | + | while (i < newEnd) { | |
| - | + | if (text.newToOld[i - 1] != null) { | |
| - | + | j = text.newToOld[i - 1] + 1; | |
| - | + | } | |
// check for the start of an unresolved sequence | // check for the start of an unresolved sequence | ||
| - | + | if ( (text.newToOld[i] == null) && (text.oldToNew[j] == null) ) { | |
// determine the ends of the sequences | // determine the ends of the sequences | ||
| - | + | var iStart = i; | |
| - | + | var iEnd = i; | |
| - | + | while ( (text.newToOld[iEnd] == null) && (iEnd < newEnd) ) { | |
| - | + | iEnd ++; | |
| - | + | } | |
| - | + | var iLength = iEnd - iStart; | |
| - | + | var jStart = j; | |
| - | + | var jEnd = j; | |
| - | + | while ( (text.oldToNew[jEnd] == null) && (jEnd < oldEnd) ) { | |
| - | + | jEnd ++; | |
| - | + | } | |
| - | + | var jLength = jEnd - jStart; | |
// recursively diff the unresolved sequence | // recursively diff the unresolved sequence | ||
| - | + | if ( (iLength > 0) && (jLength > 0) ) { | |
| - | + | if ( (iLength > 1) || (jLength > 1) ) { | |
| - | + | if ( (iStart != newStart) || (iEnd != newEnd) || (jStart != oldStart) || (jEnd != oldEnd) ) { | |
| - | + | TextDiff(text, iStart, iEnd, jStart, jEnd, recursionLevel + 1); | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
| - | + | i = iEnd; | |
| - | + | } | |
| - | + | else { | |
| - | + | i ++; | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
// recursively diff still unresolved regions upwards | // recursively diff still unresolved regions upwards | ||
| - | + | if (recursiveDiff) { | |
| - | + | i = newEnd - 1; | |
| - | + | j = oldEnd - 1; | |
| - | + | while (i >= newStart) { | |
| - | + | if (text.newToOld[i + 1] != null) { | |
| - | + | j = text.newToOld[i + 1] - 1; | |
| - | + | } | |
// check for the start of an unresolved sequence | // check for the start of an unresolved sequence | ||
| - | + | if ( (text.newToOld[i] == null) && (text.oldToNew[j] == null) ) { | |
// determine the ends of the sequences | // determine the ends of the sequences | ||
| - | + | var iStart = i; | |
| - | + | var iEnd = i + 1; | |
| - | + | while ( (text.newToOld[iStart - 1] == null) && (iStart >= newStart) ) { | |
| - | + | iStart --; | |
| - | + | } | |
| - | + | var iLength = iEnd - iStart; | |
| - | + | var jStart = j; | |
| - | + | var jEnd = j + 1; | |
| - | + | while ( (text.oldToNew[jStart - 1] == null) && (jStart >= oldStart) ) { | |
| - | + | jStart --; | |
| - | + | } | |
| - | + | var jLength = jEnd - jStart; | |
// recursively diff the unresolved sequence | // recursively diff the unresolved sequence | ||
| - | + | if ( (iLength > 0) && (jLength > 0) ) { | |
| - | + | if ( (iLength > 1) || (jLength > 1) ) { | |
| - | + | if ( (iStart != newStart) || (iEnd != newEnd) || (jStart != oldStart) || (jEnd != oldEnd) ) { | |
| - | + | TextDiff(text, iStart, iEnd, jStart, jEnd, recursionLevel + 1); | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
| - | + | i = iStart - 1; | |
| - | + | } | |
| - | + | else { | |
| - | + | i --; | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
| - | + | return; | |
} | } | ||
| Line 426: | Line 429: | ||
function DiffToHtml(text, block) { | function DiffToHtml(text, block) { | ||
| - | + | var outText = text.message; | |
| - | + | var blockNumber = 0; | |
| - | + | i = 0; | |
| - | + | j = 0; | |
| - | + | do { | |
| - | + | var movedIndex = []; | |
| - | + | var movedBlock = []; | |
| - | + | var movedLeft = []; | |
| - | + | var blockText = ''; | |
| - | + | var identText = ''; | |
| - | + | var delText = ''; | |
| - | + | var insText = ''; | |
| - | + | var identStart = ''; | |
| - | + | var identEnd = ''; | |
// detect block boundary | // detect block boundary | ||
| - | + | if ( (text.newToOld[i] != j) || (blockNumber == 0 ) ) { | |
| - | + | if ( ( (text.newToOld[i] != null) || (i >= text.newWords.length) ) && ( (text.oldToNew[j] != null) || (j >= text.oldWords.length) ) ) { | |
// block moved right | // block moved right | ||
| - | + | var moved = block.newRight[blockNumber]; | |
| - | + | if (moved > 0) { | |
| - | + | var index = block.newRightIndex[blockNumber]; | |
| - | + | movedIndex.push(index); | |
| - | + | movedBlock.push(moved); | |
| - | + | movedLeft.push(false); | |
| - | + | } | |
// block moved left | // block moved left | ||
| - | + | moved = block.newLeft[blockNumber]; | |
| - | + | if (moved > 0) { | |
| - | + | var index = block.newLeftIndex[blockNumber]; | |
| - | + | movedIndex.push(index); | |
| - | + | movedBlock.push(moved); | |
| - | + | movedLeft.push(true); | |
| - | + | } | |
// check if a block starts here | // check if a block starts here | ||
| - | + | moved = block.newBlock[blockNumber]; | |
| - | + | if (moved > 0) { | |
// mark block as inserted text | // mark block as inserted text | ||
| - | + | if (block.newWords[blockNumber] < blockMinLength) { | |
| - | + | identStart = htmlInsertStart; | |
| - | + | identEnd = htmlInsertEnd; | |
| - | + | } | |
// mark block by color | // mark block by color | ||
| - | + | else { | |
| - | + | if (moved > styleBlock.length) { | |
| - | + | moved = styleBlock.length; | |
| - | + | } | |
| - | + | identStart = HtmlCustomize(htmlBlockStart, moved - 1); | |
| - | + | identEnd = htmlBlockEnd; | |
| - | + | } | |
| - | + | } | |
| - | + | if (i >= text.newWords.length) { | |
| - | + | i ++; | |
| - | + | } | |
| - | + | else { | |
| - | + | j = text.newToOld[i]; | |
| - | + | blockNumber ++; | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
// get the correct order if moved to the left as well as to the right from here | // get the correct order if moved to the left as well as to the right from here | ||
| - | + | if (movedIndex.length == 2) { | |
| - | + | if (movedIndex[0] > movedIndex[1]) { | |
| - | + | movedIndex.reverse(); | |
| - | + | movedBlock.reverse(); | |
| - | + | movedLeft.reverse(); | |
| - | + | } | |
| - | + | } | |
// handle left and right block moves from this position | // handle left and right block moves from this position | ||
| - | + | for (m = 0; m < movedIndex.length; m ++) { | |
// insert the block as deleted text | // insert the block as deleted text | ||
| - | + | if (block.newWords[ movedIndex[m] ] < blockMinLength) { | |
| - | + | var movedStart = block.newStart[ movedIndex[m] ]; | |
| - | + | var movedLength = block.newLength[ movedIndex[m] ]; | |
| - | + | var str = ''; | |
| - | + | for (n = movedStart; n < movedStart + movedLength; n ++) { | |
| - | + | str += text.newWords[n]; | |
| - | + | } | |
| - | + | str = str.replace(/\n/g, '¶<br>'); | |
| - | + | blockText += htmlDeleteStart + Escape(str) + htmlDeleteEnd; | |
| - | + | } | |
// add a placeholder / move direction indicator | // add a placeholder / move direction indicator | ||
| - | + | else { | |
| - | + | if (movedBlock[m] > styleBlock.length) { | |
| - | + | movedBlock[m] = styleBlock.length; | |
| - | + | } | |
| - | + | if (movedLeft[m]) { | |
| - | + | blockText += HtmlCustomize(htmlMovedLeft, movedBlock[m] - 1); | |
| - | + | } | |
| - | + | else { | |
| - | + | blockText += HtmlCustomize(htmlMovedRight, movedBlock[m] - 1); | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
// collect consecutive identical text | // collect consecutive identical text | ||
| - | + | while ( (i < text.newWords.length) && (j < text.oldWords.length) ) { | |
| - | + | if ( (text.newToOld[i] == null) || (text.oldToNew[j] == null) ) { | |
| - | + | break; | |
| - | + | } | |
| - | + | if (text.newToOld[i] != j) { | |
| - | + | break; | |
| - | + | } | |
| - | + | identText += text.newWords[i]; | |
| - | + | i ++; | |
| - | + | j ++; | |
| - | + | } | |
// collect consecutive deletions | // collect consecutive deletions | ||
| - | + | while ( (text.oldToNew[j] == null) && (j < text.oldWords.length) ) { | |
| - | + | delText += text.oldWords[j]; | |
| - | + | j ++; | |
| - | + | } | |
// collect consecutive inserts | // collect consecutive inserts | ||
| - | + | while ( (text.newToOld[i] == null) && (i < text.newWords.length) ) { | |
| - | + | insText += text.newWords[i]; | |
| - | + | i ++; | |
| - | + | } | |
// remove leading and trailing similarities betweein delText and ins from highlighting | // remove leading and trailing similarities betweein delText and ins from highlighting | ||
| - | + | var preText = ''; | |
| - | + | var postText = ''; | |
| - | + | if (wordDiff) { | |
| - | + | if ( (delText != '') && (insText != '') ) { | |
// remove leading similarities | // remove leading similarities | ||
| - | + | while ( delText.charAt(0) == insText.charAt(0) && (delText != '') && (insText != '') ) { | |
| - | + | preText = preText + delText.charAt(0); | |
| - | + | delText = delText.substr(1); | |
| - | + | insText = insText.substr(1); | |
| - | + | } | |
// remove trailing similarities | // remove trailing similarities | ||
| - | + | while ( delText.charAt(delText.length - 1) == insText.charAt(insText.length - 1) && (delText != '') && (insText != '') ) { | |
| - | + | postText = delText.charAt(delText.length - 1) + postText; | |
| - | + | delText = delText.substr(0, delText.length - 1); | |
| - | + | insText = insText.substr(0, insText.length - 1); | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
// output the identical text, deletions and inserts | // output the identical text, deletions and inserts | ||
| - | + | if (blockText != '') { | |
| - | + | outText += blockText; | |
| - | + | } | |
| - | + | if (identText != '') { | |
| - | + | outText += identStart + Escape(identText) + identEnd; | |
| - | + | } | |
| - | + | outText += preText; | |
| - | + | if (delText != '') { | |
| - | + | delText = htmlDeleteStart + Escape(delText) + htmlDeleteEnd; | |
| - | + | delText = delText.replace(/\n/g, '¶<br>'); | |
| - | + | outText += delText; | |
| - | + | } | |
| - | + | if (insText != '') { | |
| - | + | insText = htmlInsertStart + Escape(insText) + htmlInsertEnd; | |
| - | + | insText = insText.replace(/\n/g, '¶<br>'); | |
| - | + | outText += insText; | |
| - | + | } | |
| - | + | outText += postText; | |
| - | + | } while (i <= text.newWords.length); | |
| - | + | outText += '\n'; | |
| - | + | outText = HtmlFormat(outText); | |
| - | + | return(outText); | |
} | } | ||
| Line 612: | Line 615: | ||
function Escape(text) { | function Escape(text) { | ||
| - | + | text = text.replace(/&/g, "&"); | |
| - | + | text = text.replace(/</g, "<"); | |
| - | + | text = text.replace(/>/g, ">"); | |
| - | + | text = text.replace(/"/g, """); //" | |
| - | + | return(text); | |
} | } | ||
| Line 625: | Line 628: | ||
function HtmlCustomize(text, block) { | function HtmlCustomize(text, block) { | ||
| - | + | text = text.replace(/\{number\}/, block); | |
| - | + | text = text.replace(/\{block\}/, styleBlock[block]); | |
| - | + | return(text); | |
} | } | ||
| Line 638: | Line 641: | ||
function HtmlFormat(text) { | function HtmlFormat(text) { | ||
| - | + | text = text.replace(/ /g, ' '); | |
| - | + | text = text.replace(/\n/g, '<br>'); | |
| - | + | return(text); | |
} | } | ||
| Line 650: | Line 653: | ||
function DetectBlocks(text, block) { | function DetectBlocks(text, block) { | ||
| - | + | block.oldStart = []; | |
| - | + | block.oldToNew = []; | |
| - | + | block.oldLength = []; | |
| - | + | block.oldWords = []; | |
| - | + | block.newStart = []; | |
| - | + | block.newLength = []; | |
| - | + | block.newWords = []; | |
| - | + | block.newNumber = []; | |
| - | + | block.newBlock = []; | |
| - | + | block.newLeft = []; | |
| - | + | block.newRight = []; | |
| - | + | block.newLeftIndex = []; | |
| - | + | block.newRightIndex = []; | |
| - | + | var blockNumber = 0; | |
| - | + | var wordCounter = 0; | |
| - | + | var realWordCounter = 0; | |
// get old text block order | // get old text block order | ||
| - | + | if (showBlockMoves) { | |
| - | + | j = 0; | |
| - | + | i = 0; | |
| - | + | do { | |
// detect block boundaries on old text | // detect block boundaries on old text | ||
| - | + | if ( (text.oldToNew[j] != i) || (blockNumber == 0 ) ) { | |
| - | + | if ( ( (text.oldToNew[j] != null) || (j >= text.oldWords.length) ) && ( (text.newToOld[i] != null) || (i >= text.newWords.length) ) ) { | |
| - | + | if (blockNumber > 0) { | |
| - | + | block.oldLength[blockNumber - 1] = wordCounter; | |
| - | + | block.oldWords[blockNumber - 1] = realWordCounter; | |
| - | + | wordCounter = 0; | |
| - | + | realWordCounter = 0; | |
| - | + | } | |
| - | + | if (j >= text.oldWords.length) { | |
| - | + | j ++; | |
| - | + | } | |
| - | + | else { | |
| - | + | i = text.oldToNew[j]; | |
| - | + | block.oldStart[blockNumber] = j; | |
| - | + | block.oldToNew[blockNumber] = text.oldToNew[j]; | |
| - | + | blockNumber ++; | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
// jump over identical pairs | // jump over identical pairs | ||
| - | + | while ( (i < text.newWords.length) && (j < text.oldWords.length) ) { | |
| - | + | if ( (text.newToOld[i] == null) || (text.oldToNew[j] == null) ) { | |
| - | + | break; | |
| - | + | } | |
| - | + | if (text.oldToNew[j] != i) { | |
| - | + | break; | |
| - | + | } | |
| - | + | i ++; | |
| - | + | j ++; | |
| - | + | wordCounter ++; | |
| - | + | if ( /\w/.test( text.newWords[i] ) ) { | |
| - | + | realWordCounter ++; | |
| - | + | } | |
| - | + | } | |
// jump over consecutive deletions | // jump over consecutive deletions | ||
| - | + | while ( (text.oldToNew[j] == null) && (j < text.oldWords.length) ) { | |
| - | + | j ++; | |
| - | + | } | |
// jump over consecutive inserts | // jump over consecutive inserts | ||
| - | + | while ( (text.newToOld[i] == null) && (i < text.newWords.length) ) { | |
| - | + | i ++; | |
| - | + | } | |
| - | + | } while (j <= text.oldWords.length); | |
// get the block order in the new text | // get the block order in the new text | ||
| - | + | var lastMin; | |
| - | + | var currMinIndex; | |
| - | + | lastMin = null; | |
// sort the data by increasing start numbers into new text block info | // sort the data by increasing start numbers into new text block info | ||
| - | + | for (i = 0; i < blockNumber; i ++) { | |
| - | + | currMin = null; | |
| - | + | for (j = 0; j < blockNumber; j ++) { | |
| - | + | curr = block.oldToNew[j]; | |
| - | + | if ( (curr > lastMin) || (lastMin == null) ) { | |
| - | + | if ( (curr < currMin) || (currMin == null) ) { | |
| - | + | currMin = curr; | |
| - | + | currMinIndex = j; | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
| - | + | block.newStart[i] = block.oldToNew[currMinIndex]; | |
| - | + | block.newLength[i] = block.oldLength[currMinIndex]; | |
| - | + | block.newWords[i] = block.oldWords[currMinIndex]; | |
| - | + | block.newNumber[i] = currMinIndex; | |
| - | + | lastMin = currMin; | |
| - | + | } | |
// detect not moved blocks | // detect not moved blocks | ||
| - | + | for (i = 0; i < blockNumber; i ++) { | |
| - | + | if (block.newBlock[i] == null) { | |
| - | + | if (block.newNumber[i] == i) { | |
| - | + | block.newBlock[i] = 0; | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
// detect switches of neighbouring blocks | // detect switches of neighbouring blocks | ||
| - | + | for (i = 0; i < blockNumber - 1; i ++) { | |
| - | + | if ( (block.newBlock[i] == null) && (block.newBlock[i + 1] == null) ) { | |
| - | + | if (block.newNumber[i] - block.newNumber[i + 1] == 1) { | |
| - | + | if ( (block.newNumber[i + 1] - block.newNumber[i + 2] != 1) || (i + 2 >= blockNumber) ) { | |
// the shorter one is declared the moved one | // the shorter one is declared the moved one | ||
| - | + | if (block.newLength[i] < block.newLength[i + 1]) { | |
| - | + | block.newBlock[i] = 1; | |
| - | + | block.newBlock[i + 1] = 0; | |
| - | + | } | |
| - | + | else { | |
| - | + | block.newBlock[i] = 0; | |
| - | + | block.newBlock[i + 1] = 1; | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
// mark all others as moved and number the moved blocks | // mark all others as moved and number the moved blocks | ||
| - | + | j = 1; | |
| - | + | for (i = 0; i < blockNumber; i ++) { | |
| - | + | if ( (block.newBlock[i] == null) || (block.newBlock[i] == 1) ) { | |
| - | + | block.newBlock[i] = j++; | |
| - | + | } | |
| - | + | } | |
// check if a block has been moved from this block border | // check if a block has been moved from this block border | ||
| - | + | for (i = 0; i < blockNumber; i ++) { | |
| - | + | for (j = 0; j < blockNumber; j ++) { | |
| - | + | if (block.newNumber[j] == i) { | |
| - | + | if (block.newBlock[j] > 0) { | |
// block moved right | // block moved right | ||
| - | + | if (block.newNumber[j] < j) { | |
| - | + | block.newRight[i] = block.newBlock[j]; | |
| - | + | block.newRightIndex[i] = j; | |
| - | + | } | |
// block moved left | // block moved left | ||
| - | + | else { | |
| - | + | block.newLeft[i + 1] = block.newBlock[j]; | |
| - | + | block.newLeftIndex[i + 1] = j; | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
| - | + | } | |
| - | + | return; | |
} | } | ||
Current revision as of 03:32, 4 July 2006
// ==UserScript==
// @name JavaScript editor for Wikimedia websites (diff algorithm)
// @namespace http://www.hrwiki.org/
// @description Diff algorithm for a custom editor currently being used by HRWikian X-Spider2.
// @include *wiki*
// ==/UserScript==
//
// Name: diff.js
// Version: 0.9.0 (May 13, 2006)
// Info: http://en.wikipedia.org/wiki/User:Cacycle/diff
// Code: http://en.wikipedia.org/wiki/User:Cacycle/diff.js
//
// JavaScript diff algorithm by [[Wikipedia:User:Cacycle]] (http://en.wikipedia.org/wiki/User_talk:Cacycle).
// Outputs html/css-formatted new text with highlighted deletions, inserts, and block moves.
//
// The program uses cross-browser code and should work with all modern browsers. It has been tested with:
// * Mozilla Firefox 1.5.0.1
// * Mozilla SeaMonkey 1.0
// * Opera 8.53
// * Internet Explorer 6.0.2900.2180
//
// The code is currently under active development and might change rapidly.
//
// An implementation of:
// Paul Heckel: A technique for isolating differences between files
// Communications of the ACM 21(4):264 (1978)
// http://doi.acm.org/10.1145/359460.359467
//
// * Word types have been optimized for Wikipedia source texts
// * Additional post-pass 5 code for resolving islands caused by adding
// two common words at the end of sequences of common words
// * Additional detection of block borders and color coding of moved blocks and their original position
//
// This code is used by [[User:X-Spider2/editor.js]] ([[Wikipedia:User:Cacycle/editor]]).
//
// Usage: var htmlText = stringDiff(oldText, newText);
//
// This code has been released into the public domain.
//
// Datastructures:
//
// text: an object that holds all text related datastructures
// .newWords: consecutive words of the new text (N)
// .oldWords: consecutive words of the old text (O)
// .newToOld: array of corresponding word number in old text (NA)
// .oldToNew: array of corresponding word number in new text (OA)
// .message: output message for testing purposes
//
// symbol['word']: symbol table for passes 1 - 3, holds words as a hash
// .newCtr: new word occurences counter (NC)
// .oldCtr: old word occurences counter (OC)
// .toNew: table last old word number
// .toOld: last new word number (OLNA)
//
// block: an object that holds block move information
// blocks indexed after new text:
// .newStart: new text word number of start of this block
// .newLength: element number of this block including non-words
// .newWords: true word number of this block
// .newNumber: corresponding block index in old text
// .newBlock: moved-block-number of a block that has been moved here
// .newLeft: moved-block-number of a block that has been moved from this border leftwards
// .newRight: moved-block-number of a block that has been moved from this border rightwards
// .newLeftIndex: index number of a block that has been moved from this border leftwards
// .newRightIndex: index number of a block that has been moved from this border rightwards
// blocks indexed after old text:
// .oldStart: word number of start of this block
// .oldToNew: corresponding new text word number of start
// .oldLength: element number of this block including non-words
// .oldWords: true word number of this block
// css for change indicators
var styleDelete = styleDelete || 'font-weight: normal; text-decoration: none; color: #ffffff; background-color: #990033;';
var styleInsert = styleInsert || 'font-weight: normal; text-decoration: none; color: #ffffff; background-color: #009933;';
var styleMoved = styleMoved || 'font-weight: bold; vertical-align: text-bottom; font-size: xx-small; padding: 0; border: solid 1px;';
var styleBlock = styleBlock || [
'background-color: #ffff44;',
'background-color: #b0ff90;',
'background-color: #ffcc99;',
'background-color: #99ffff;',
'background-color: #99ccff;',
'background-color: #cc99ff;',
'background-color: #ff99cc;',
'background-color: #ffd040;',
'background-color: #d0d0d0;'
];
// html for change indicators, {number} is replaced by the block number, {block} is replaced by the block style
var htmlMovedRight = htmlMovedRight || '<input type="button" value=">" style="' + styleMoved + ' {block}">';
var htmlMovedLeft = htmlMovedLeft || '<input type="button" value="<" style="' + styleMoved + ' {block}">';
var htmlBlockStart = htmlBlockStart || '<span style="{block}">';
var htmlBlockEnd = htmlBlockEnd || '</span>';
var htmlDeleteStart = htmlDeleteStart || '<span style="' + styleDelete + '">';
var htmlDeleteEnd = htmlDeleteEnd || '</span>';
var htmlInsertStart = htmlInsertStart || '<span style="' + styleInsert + '">';
var htmlInsertEnd = htmlInsertEnd || '</span>';
// minimal number of real words for a moved block (0 for always displaying block move indicators)
var blockMinLength = blockMinLength || 3;
// exclude identical sequence starts and endings from change marking
var wordDiff = wordDiff || true;
// enable recursive diff to resolve problematic sequences
var recursiveDiff = recursiveDiff || true;
// enable block move display
var showBlockMoves = showBlockMoves || true;
// global variables
var i;
var j;
var m;
var n;
// StringDiff: main program
// input: oldText, newText, strings containing the texts
// returns: html diff
function StringDiff(oldText, newText) {
var text = {};
text.newWords = [];
text.oldWords = [];
text.newToOld = [];
text.oldToNew = [];
text.message = '';
var block = {};
var outText = '';
// trap trivial changes: no change
if (oldText == newText) {
outText = newText;
outText = Escape(outText);
outText = HtmlFormat(outText);
return(outText);
}
// trap trivial changes: old text deleted
if ( (oldText == null) || (oldText.length == 0) ) {
outText = newText;
outText = Escape(outText);
outText = HtmlFormat(outText);
outText = htmlInsertStart + outText + htmlInsertEnd;
return(outText);
}
// trap trivial changes: new text deleted
if ( (newText == null) || (newText.length == 0) ) {
outText = oldText;
outText = Escape(outText);
outText = HtmlFormat(outText);
outText = htmlDeleteStart + outText + htmlDeleteEnd;
return(outText);
}
// split new and old text into words
SplitText(oldText, newText, text);
// calculate diff information
TextDiff(text);
//detect block borders and moved blocks
DetectBlocks(text, block);
// process diff data into formatted html text
outText = DiffToHtml(text, block);
return(outText);
}
// SplitText: split new and old text into words
// input: oldText, newText, strings containing the texts
// changes: text.newWords and text.oldWords, arrays containing the texts in arrays of words
function SplitText(oldText, newText, text) {
// convert strange spaces
oldText = oldText.replace(/[\t\v\u00A0\u2028\u2029]+/g, ' ');
newText = newText.replace(/[\t\v\u00A0\u2028\u2029]+/g, ' ');
// split old text into words
// / | | | | | | | | | | | | | | /
var pattern = /[\w]+|\[\[|\]\]|\{\{|\}\}|\n+| +|&\w+;|'''|''|=+|\{\||\|\}|\|\-|./g;
var result;
do {
result = pattern.exec(oldText);
if (result != null) {
text.oldWords.push(result[0]);
}
} while (result != null);
// split new text into words
do {
result = pattern.exec(newText);
if (result != null) {
text.newWords.push(result[0]);
}
} while (result != null);
return;
}
// TextDiff: calculate diff information
// input: text.newWords and text.oldWords, arrays containing the texts in arrays of words
// optionally for recursive calls: newStart, newEnd, oldStart, oldEnd, recursionLevel
// changes: text.newToOld and text.oldToNew, containing the line numbers in the other version
function TextDiff(text, newStart, newEnd, oldStart, oldEnd, recursionLevel) {
symbol = new Object();
symbol.newCtr = [];
symbol.oldCtr = [];
symbol.toNew = [];
symbol.toOld = [];
// set defaults
newStart = newStart || 0;
newEnd = newEnd || text.newWords.length;
oldStart = oldStart || 0;
oldEnd = oldEnd || text.oldWords.length;
recursionLevel = recursionLevel || 0;
// limit recursion depth
if (recursionLevel > 10) {
return;
}
// pass 1: parse new text into symbol table s
var word;
for (i = newStart; i < newEnd; i ++) {
word = text.newWords[i];
// add new entry to symbol table
if ( symbol[word] == null) {
symbol[word] = { newCtr: 0, oldCtr: 0, toNew: null, toOld: null };
}
// increment symbol table word counter for new text
symbol[word].newCtr ++;
// add last word number in new text
symbol[word].toNew = i;
}
// pass 2: parse old text into symbol table
for (j = oldStart; j < oldEnd; j ++) {
word = text.oldWords[j];
// add new entry to symbol table
if ( symbol[word] == null) {
symbol[word] = { newCtr: 0, oldCtr: 0, toNew: null, toOld: null };
}
// increment symbol table word counter for old text
symbol[word].oldCtr ++;
// add last word number in old text
symbol[word].toOld = j;
}
// pass 3: connect unique words
for (i in symbol) {
// find words in the symbol table that occur only once in both versions
if ( (symbol[i].newCtr == 1) && (symbol[i].oldCtr == 1) ) {
var toNew = symbol[i].toNew;
var toOld = symbol[i].toOld;
// do not use spaces as unique markers
if ( ! /\s/.test( text.newWords[toNew] ) ) {
// connect from new to old and from old to new
text.newToOld[toNew] = toOld;
text.oldToNew[toOld] = toNew;
}
}
}
// pass 4: connect adjacent identical words downwards
for (i = newStart; i < newEnd - 1; i ++) {
// find already connected pairs
if (text.newToOld[i] != null) {
j = text.newToOld[i];
// check if the following words are not yet connected
if ( (text.newToOld[i + 1] == null) && (text.oldToNew[j + 1] == null) ) {
// if the following words are the same connect them
if ( text.newWords[i + 1] == text.oldWords[j + 1] ) {
text.newToOld[i + 1] = j + 1;
text.oldToNew[j + 1] = i + 1;
}
}
}
}
// pass 5: connect adjacent identical words upwards
for (i = newEnd - 1; i > newStart; i --) {
// find already connected pairs
if (text.newToOld[i] != null) {
j = text.newToOld[i];
// check if the preceeding words are not yet connected
if ( (text.newToOld[i - 1] == null) && (text.oldToNew[j - 1] == null) ) {
// if the preceeding words are the same connect them
if ( text.newWords[i - 1] == text.oldWords[j - 1] ) {
text.newToOld[i - 1] = j - 1;
text.oldToNew[j - 1] = i - 1;
}
}
}
}
// recursively diff still unresolved regions downwards
if (recursiveDiff) {
i = newStart;
j = oldStart;
while (i < newEnd) {
if (text.newToOld[i - 1] != null) {
j = text.newToOld[i - 1] + 1;
}
// check for the start of an unresolved sequence
if ( (text.newToOld[i] == null) && (text.oldToNew[j] == null) ) {
// determine the ends of the sequences
var iStart = i;
var iEnd = i;
while ( (text.newToOld[iEnd] == null) && (iEnd < newEnd) ) {
iEnd ++;
}
var iLength = iEnd - iStart;
var jStart = j;
var jEnd = j;
while ( (text.oldToNew[jEnd] == null) && (jEnd < oldEnd) ) {
jEnd ++;
}
var jLength = jEnd - jStart;
// recursively diff the unresolved sequence
if ( (iLength > 0) && (jLength > 0) ) {
if ( (iLength > 1) || (jLength > 1) ) {
if ( (iStart != newStart) || (iEnd != newEnd) || (jStart != oldStart) || (jEnd != oldEnd) ) {
TextDiff(text, iStart, iEnd, jStart, jEnd, recursionLevel + 1);
}
}
}
i = iEnd;
}
else {
i ++;
}
}
}
// recursively diff still unresolved regions upwards
if (recursiveDiff) {
i = newEnd - 1;
j = oldEnd - 1;
while (i >= newStart) {
if (text.newToOld[i + 1] != null) {
j = text.newToOld[i + 1] - 1;
}
// check for the start of an unresolved sequence
if ( (text.newToOld[i] == null) && (text.oldToNew[j] == null) ) {
// determine the ends of the sequences
var iStart = i;
var iEnd = i + 1;
while ( (text.newToOld[iStart - 1] == null) && (iStart >= newStart) ) {
iStart --;
}
var iLength = iEnd - iStart;
var jStart = j;
var jEnd = j + 1;
while ( (text.oldToNew[jStart - 1] == null) && (jStart >= oldStart) ) {
jStart --;
}
var jLength = jEnd - jStart;
// recursively diff the unresolved sequence
if ( (iLength > 0) && (jLength > 0) ) {
if ( (iLength > 1) || (jLength > 1) ) {
if ( (iStart != newStart) || (iEnd != newEnd) || (jStart != oldStart) || (jEnd != oldEnd) ) {
TextDiff(text, iStart, iEnd, jStart, jEnd, recursionLevel + 1);
}
}
}
i = iStart - 1;
}
else {
i --;
}
}
}
return;
}
// DiffToHtml: process diff data into formatted html text
// input: text.newWords and text.oldWords, arrays containing the texts in arrays of words
// text.newToOld and text.oldToNew, containing the line numbers in the other version
// block data structure
// returns: outText, a html string
function DiffToHtml(text, block) {
var outText = text.message;
var blockNumber = 0;
i = 0;
j = 0;
do {
var movedIndex = [];
var movedBlock = [];
var movedLeft = [];
var blockText = '';
var identText = '';
var delText = '';
var insText = '';
var identStart = '';
var identEnd = '';
// detect block boundary
if ( (text.newToOld[i] != j) || (blockNumber == 0 ) ) {
if ( ( (text.newToOld[i] != null) || (i >= text.newWords.length) ) && ( (text.oldToNew[j] != null) || (j >= text.oldWords.length) ) ) {
// block moved right
var moved = block.newRight[blockNumber];
if (moved > 0) {
var index = block.newRightIndex[blockNumber];
movedIndex.push(index);
movedBlock.push(moved);
movedLeft.push(false);
}
// block moved left
moved = block.newLeft[blockNumber];
if (moved > 0) {
var index = block.newLeftIndex[blockNumber];
movedIndex.push(index);
movedBlock.push(moved);
movedLeft.push(true);
}
// check if a block starts here
moved = block.newBlock[blockNumber];
if (moved > 0) {
// mark block as inserted text
if (block.newWords[blockNumber] < blockMinLength) {
identStart = htmlInsertStart;
identEnd = htmlInsertEnd;
}
// mark block by color
else {
if (moved > styleBlock.length) {
moved = styleBlock.length;
}
identStart = HtmlCustomize(htmlBlockStart, moved - 1);
identEnd = htmlBlockEnd;
}
}
if (i >= text.newWords.length) {
i ++;
}
else {
j = text.newToOld[i];
blockNumber ++;
}
}
}
// get the correct order if moved to the left as well as to the right from here
if (movedIndex.length == 2) {
if (movedIndex[0] > movedIndex[1]) {
movedIndex.reverse();
movedBlock.reverse();
movedLeft.reverse();
}
}
// handle left and right block moves from this position
for (m = 0; m < movedIndex.length; m ++) {
// insert the block as deleted text
if (block.newWords[ movedIndex[m] ] < blockMinLength) {
var movedStart = block.newStart[ movedIndex[m] ];
var movedLength = block.newLength[ movedIndex[m] ];
var str = '';
for (n = movedStart; n < movedStart + movedLength; n ++) {
str += text.newWords[n];
}
str = str.replace(/\n/g, '¶<br>');
blockText += htmlDeleteStart + Escape(str) + htmlDeleteEnd;
}
// add a placeholder / move direction indicator
else {
if (movedBlock[m] > styleBlock.length) {
movedBlock[m] = styleBlock.length;
}
if (movedLeft[m]) {
blockText += HtmlCustomize(htmlMovedLeft, movedBlock[m] - 1);
}
else {
blockText += HtmlCustomize(htmlMovedRight, movedBlock[m] - 1);
}
}
}
// collect consecutive identical text
while ( (i < text.newWords.length) && (j < text.oldWords.length) ) {
if ( (text.newToOld[i] == null) || (text.oldToNew[j] == null) ) {
break;
}
if (text.newToOld[i] != j) {
break;
}
identText += text.newWords[i];
i ++;
j ++;
}
// collect consecutive deletions
while ( (text.oldToNew[j] == null) && (j < text.oldWords.length) ) {
delText += text.oldWords[j];
j ++;
}
// collect consecutive inserts
while ( (text.newToOld[i] == null) && (i < text.newWords.length) ) {
insText += text.newWords[i];
i ++;
}
// remove leading and trailing similarities betweein delText and ins from highlighting
var preText = '';
var postText = '';
if (wordDiff) {
if ( (delText != '') && (insText != '') ) {
// remove leading similarities
while ( delText.charAt(0) == insText.charAt(0) && (delText != '') && (insText != '') ) {
preText = preText + delText.charAt(0);
delText = delText.substr(1);
insText = insText.substr(1);
}
// remove trailing similarities
while ( delText.charAt(delText.length - 1) == insText.charAt(insText.length - 1) && (delText != '') && (insText != '') ) {
postText = delText.charAt(delText.length - 1) + postText;
delText = delText.substr(0, delText.length - 1);
insText = insText.substr(0, insText.length - 1);
}
}
}
// output the identical text, deletions and inserts
if (blockText != '') {
outText += blockText;
}
if (identText != '') {
outText += identStart + Escape(identText) + identEnd;
}
outText += preText;
if (delText != '') {
delText = htmlDeleteStart + Escape(delText) + htmlDeleteEnd;
delText = delText.replace(/\n/g, '¶<br>');
outText += delText;
}
if (insText != '') {
insText = htmlInsertStart + Escape(insText) + htmlInsertEnd;
insText = insText.replace(/\n/g, '¶<br>');
outText += insText;
}
outText += postText;
} while (i <= text.newWords.length);
outText += '\n';
outText = HtmlFormat(outText);
return(outText);
}
// Escape: replaces html-sensitive characters in output text with character entities
function Escape(text) {
text = text.replace(/&/g, "&");
text = text.replace(/</g, "<");
text = text.replace(/>/g, ">");
text = text.replace(/"/g, """); //"
return(text);
}
// HtmlCustomize: customize indicator html: replace {number} with the block number, {block} with the block style
function HtmlCustomize(text, block) {
text = text.replace(/\{number\}/, block);
text = text.replace(/\{block\}/, styleBlock[block]);
return(text);
}
// HtmlFormat: replaces newlines and multiple spaces in text with html code
function HtmlFormat(text) {
text = text.replace(/ /g, ' ');
text = text.replace(/\n/g, '<br>');
return(text);
}
// DetectBlocks: detect block borders and moved blocks
// input: text object, block object
function DetectBlocks(text, block) {
block.oldStart = [];
block.oldToNew = [];
block.oldLength = [];
block.oldWords = [];
block.newStart = [];
block.newLength = [];
block.newWords = [];
block.newNumber = [];
block.newBlock = [];
block.newLeft = [];
block.newRight = [];
block.newLeftIndex = [];
block.newRightIndex = [];
var blockNumber = 0;
var wordCounter = 0;
var realWordCounter = 0;
// get old text block order
if (showBlockMoves) {
j = 0;
i = 0;
do {
// detect block boundaries on old text
if ( (text.oldToNew[j] != i) || (blockNumber == 0 ) ) {
if ( ( (text.oldToNew[j] != null) || (j >= text.oldWords.length) ) && ( (text.newToOld[i] != null) || (i >= text.newWords.length) ) ) {
if (blockNumber > 0) {
block.oldLength[blockNumber - 1] = wordCounter;
block.oldWords[blockNumber - 1] = realWordCounter;
wordCounter = 0;
realWordCounter = 0;
}
if (j >= text.oldWords.length) {
j ++;
}
else {
i = text.oldToNew[j];
block.oldStart[blockNumber] = j;
block.oldToNew[blockNumber] = text.oldToNew[j];
blockNumber ++;
}
}
}
// jump over identical pairs
while ( (i < text.newWords.length) && (j < text.oldWords.length) ) {
if ( (text.newToOld[i] == null) || (text.oldToNew[j] == null) ) {
break;
}
if (text.oldToNew[j] != i) {
break;
}
i ++;
j ++;
wordCounter ++;
if ( /\w/.test( text.newWords[i] ) ) {
realWordCounter ++;
}
}
// jump over consecutive deletions
while ( (text.oldToNew[j] == null) && (j < text.oldWords.length) ) {
j ++;
}
// jump over consecutive inserts
while ( (text.newToOld[i] == null) && (i < text.newWords.length) ) {
i ++;
}
} while (j <= text.oldWords.length);
// get the block order in the new text
var lastMin;
var currMinIndex;
lastMin = null;
// sort the data by increasing start numbers into new text block info
for (i = 0; i < blockNumber; i ++) {
currMin = null;
for (j = 0; j < blockNumber; j ++) {
curr = block.oldToNew[j];
if ( (curr > lastMin) || (lastMin == null) ) {
if ( (curr < currMin) || (currMin == null) ) {
currMin = curr;
currMinIndex = j;
}
}
}
block.newStart[i] = block.oldToNew[currMinIndex];
block.newLength[i] = block.oldLength[currMinIndex];
block.newWords[i] = block.oldWords[currMinIndex];
block.newNumber[i] = currMinIndex;
lastMin = currMin;
}
// detect not moved blocks
for (i = 0; i < blockNumber; i ++) {
if (block.newBlock[i] == null) {
if (block.newNumber[i] == i) {
block.newBlock[i] = 0;
}
}
}
// detect switches of neighbouring blocks
for (i = 0; i < blockNumber - 1; i ++) {
if ( (block.newBlock[i] == null) && (block.newBlock[i + 1] == null) ) {
if (block.newNumber[i] - block.newNumber[i + 1] == 1) {
if ( (block.newNumber[i + 1] - block.newNumber[i + 2] != 1) || (i + 2 >= blockNumber) ) {
// the shorter one is declared the moved one
if (block.newLength[i] < block.newLength[i + 1]) {
block.newBlock[i] = 1;
block.newBlock[i + 1] = 0;
}
else {
block.newBlock[i] = 0;
block.newBlock[i + 1] = 1;
}
}
}
}
}
// mark all others as moved and number the moved blocks
j = 1;
for (i = 0; i < blockNumber; i ++) {
if ( (block.newBlock[i] == null) || (block.newBlock[i] == 1) ) {
block.newBlock[i] = j++;
}
}
// check if a block has been moved from this block border
for (i = 0; i < blockNumber; i ++) {
for (j = 0; j < blockNumber; j ++) {
if (block.newNumber[j] == i) {
if (block.newBlock[j] > 0) {
// block moved right
if (block.newNumber[j] < j) {
block.newRight[i] = block.newBlock[j];
block.newRightIndex[i] = j;
}
// block moved left
else {
block.newLeft[i + 1] = block.newBlock[j];
block.newLeftIndex[i + 1] = j;
}
}
}
}
}
}
return;
}
