diff --git a/editions/tw5.com/tiddlers/releasenotes/5.4.0/#9397.tid b/editions/tw5.com/tiddlers/releasenotes/5.4.0/#9397.tid new file mode 100644 index 000000000..768c442a1 --- /dev/null +++ b/editions/tw5.com/tiddlers/releasenotes/5.4.0/#9397.tid @@ -0,0 +1,17 @@ +title: $:/changenotes/5.4.0/#9397 +description: Fix critical freelinks bugs: first character loss and false positive matches in v5.4.0 +release: 5.4.0 +tags: $:/tags/ChangeNote +change-type: bugfix +change-category: plugin +github-links: https://github.com/TiddlyWiki/TiddlyWiki5/pull/9084 https://github.com/TiddlyWiki/TiddlyWiki5/pull/9397 +github-contributors: s793016 + +This note addresses two major bugs introduced in the Freelinks plugin with the v5.4.0 release: + +Fixes: +* First Character Loss: The first character of a matched word would incorrectly disappear (e.g., "The" became "he"). This was fixed by correctly timing the filtering of the current tiddler's title during match validation, ensuring proper substring handling. +* False Positive Matches: Unrelated words (like "it is" or "Choose") would incorrectly link to a tiddler title. This was resolved by fixing wrong output merging in the Aho-Corasick failure-link handling, eliminating spurious matches from intermediate nodes, and adding cycle detection. + +Impact: +* Significantly improved correctness and reliability of automatic linking for all users, especially in multilingual and large wikis. diff --git a/plugins/tiddlywiki/freelinks/aho-corasick.js b/plugins/tiddlywiki/freelinks/aho-corasick.js index bcbcf9b44..4f66256a9 100644 --- a/plugins/tiddlywiki/freelinks/aho-corasick.js +++ b/plugins/tiddlywiki/freelinks/aho-corasick.js @@ -3,8 +3,7 @@ title: $:/core/modules/utils/aho-corasick.js type: application/javascript module-type: utils -Optimized Aho-Corasick string matching algorithm implementation with enhanced performance -and error handling for TiddlyWiki freelinking functionality. +Optimized Aho-Corasick string matching algorithm implementation with enhanced performance and error handling for TiddlyWiki freelinking functionality. Useage: @@ -39,7 +38,7 @@ Notes Word Boundary: Enabling useWordBoundary ensures more precise matches, ideal for link detection scenarios. Compatibility: Ensure compatibility with other TiddlyWiki modules (e.g., wikiparser.js) when processing WikiText. Debugging: Use getStats() to inspect the trie structure's size and ensure it does not overload browser memory. - + \*/ "use strict"; @@ -111,14 +110,9 @@ AhoCorasick.prototype.buildFailureLinks = function() { var failureLink = (fail && fail[char]) ? fail[char] : root; this.failure[child] = failureLink; - var failureOutput = this.failure[child]; - if(failureOutput && failureOutput.$) { - if(!child.$) { - child.$ = []; - } - child.$.push.apply(child.$, failureOutput.$); - } - + // Do not merge outputs from failure links during build + // Instead, collect matches dynamically by traversing failure links during search + queue.push(child); } } @@ -143,6 +137,7 @@ AhoCorasick.prototype.search = function(text, useWordBoundary) { var char = text[i]; var transitionCount = 0; + // Follow failure links to find a valid transition while(node && !node[char] && node !== this.trie && transitionCount < this.maxFailureDepth) { node = this.failure[node] || this.trie; transitionCount++; @@ -157,9 +152,19 @@ AhoCorasick.prototype.search = function(text, useWordBoundary) { } } + // Traverse the current node and its failure link chain to gather all patterns var currentNode = node; var collectCount = 0; + var visitedNodes = new Set(); + while(currentNode && collectCount < 10) { + // Prevent infinite loops + if(visitedNodes.has(currentNode)) { + break; + } + visitedNodes.add(currentNode); + + // Only collect outputs from the current node (not merged ones) if(currentNode.$) { var outputs = currentNode.$; for(var j = 0; j < outputs.length && matches.length < maxMatches; j++) { @@ -167,6 +172,11 @@ AhoCorasick.prototype.search = function(text, useWordBoundary) { var matchStart = i - output.length + 1; var matchEnd = i + 1; + var matchedText = text.substring(matchStart, matchEnd); + if(matchedText !== output.pattern) { + continue; + } + if(useWordBoundary && !this.isWordBoundaryMatch(text, matchStart, matchEnd)) { continue; } @@ -179,6 +189,7 @@ AhoCorasick.prototype.search = function(text, useWordBoundary) { }); } } + currentNode = this.failure[currentNode]; if(currentNode === this.trie) break; collectCount++; diff --git a/plugins/tiddlywiki/freelinks/text.js b/plugins/tiddlywiki/freelinks/text.js index 5af9fffc0..e6e1ab430 100755 --- a/plugins/tiddlywiki/freelinks/text.js +++ b/plugins/tiddlywiki/freelinks/text.js @@ -84,7 +84,8 @@ TextNodeWidget.prototype.execute = function() { if(this.tiddlerTitleInfo.titles.length > 0) { var newParseTree = this.processTextWithMatches(text, currentTiddlerTitle, ignoreCase, useWordBoundary); - if(newParseTree.length > 1 || newParseTree[0].type !== "plain-text") { + if(newParseTree && newParseTree.length > 0 && + (newParseTree.length > 1 || newParseTree[0].type !== "plain-text")) { childParseTree = newParseTree; } } @@ -94,6 +95,10 @@ TextNodeWidget.prototype.execute = function() { }; TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerTitle, ignoreCase, useWordBoundary) { + if(!text || text.length === 0) { + return [{type: "plain-text", text: text}]; + } + var searchText = ignoreCase ? text.toLowerCase() : text; var matches; @@ -108,8 +113,10 @@ TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerT } matches.sort(function(a, b) { - var posDiff = a.index - b.index; - return posDiff !== 0 ? posDiff : b.length - a.length; + if(a.index !== b.index) { + return a.index - b.index; + } + return b.length - a.length; }); var processedPositions = new FastPositionSet(); @@ -120,6 +127,23 @@ TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerT var matchStart = match.index; var matchEnd = matchStart + match.length; + if(matchStart < 0 || matchEnd > text.length) { + continue; + } + + var matchedTitle = this.tiddlerTitleInfo.titles[match.titleIndex]; + + var titleToCompare = ignoreCase ? + (currentTiddlerTitle ? currentTiddlerTitle.toLowerCase() : "") : + currentTiddlerTitle; + var matchedTitleToCompare = ignoreCase ? + (matchedTitle ? matchedTitle.toLowerCase() : "") : + matchedTitle; + + if(titleToCompare && matchedTitleToCompare === titleToCompare) { + continue; + } + var hasOverlap = false; for(var pos = matchStart; pos < matchEnd && !hasOverlap; pos++) { if(processedPositions.has(pos)) { @@ -148,39 +172,36 @@ TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerT var matchEnd = matchStart + match.length; if(matchStart > currentPos) { + var beforeText = text.substring(currentPos, matchStart); newParseTree.push({ type: "plain-text", - text: text.slice(currentPos, matchStart) + text: beforeText }); } var matchedTitle = this.tiddlerTitleInfo.titles[match.titleIndex]; + var matchedText = text.substring(matchStart, matchEnd); - if(matchedTitle === currentTiddlerTitle) { - newParseTree.push({ + newParseTree.push({ + type: "link", + attributes: { + to: {type: "string", value: matchedTitle}, + "class": {type: "string", value: "tc-freelink"} + }, + children: [{ type: "plain-text", - text: text.slice(matchStart, matchEnd) - }); - } else { - newParseTree.push({ - type: "link", - attributes: { - to: {type: "string", value: matchedTitle}, - "class": {type: "string", value: "tc-freelink"} - }, - children: [{ - type: "plain-text", - text: text.slice(matchStart, matchEnd) - }] - }); - } + text: matchedText + }] + }); + currentPos = matchEnd; } if(currentPos < text.length) { + var remainingText = text.substring(currentPos); newParseTree.push({ type: "plain-text", - text: text.slice(currentPos) + text: remainingText }); } @@ -203,7 +224,6 @@ function computeTiddlerTitleInfo(self, ignoreCase) { var validTitles = []; var ac = new AhoCorasick(); - // Process titles in a single pass to avoid duplication for(var i = 0; i < titles.length; i++) { var title = titles[i]; if(title && title.length > 0 && title.substring(0,3) !== "$:/") { @@ -214,17 +234,16 @@ function computeTiddlerTitleInfo(self, ignoreCase) { } } - // Sort by length (descending) then alphabetically - // Longer titles are prioritized to avoid partial matches (e.g., "JavaScript" before "Java") var sortedTitles = validTitles.sort(function(a,b) { var lenDiff = b.length - a.length; - return lenDiff !== 0 ? lenDiff : (a < b ? -1 : a > b ? 1 : 0); + if(lenDiff !== 0) return lenDiff; + return a < b ? -1 : a > b ? 1 : 0; }); - // Build Aho-Corasick automaton for(var i = 0; i < sortedTitles.length; i++) { var title = sortedTitles[i]; - ac.addPattern(ignoreCase ? title.toLowerCase() : title, i); + var pattern = ignoreCase ? title.toLowerCase() : title; + ac.addPattern(pattern, i); } try {