Fix Freelinks Aho-Corasick: failure links, cache invalidation, longest-match, and Unicode safety (#9676)

* Update aho-corasick.js

 fix transition logic; ensure complete outputs (via failure-output merge); clean up stats/build scoping; clarify CJK boundary behavior.

* Update text.js

implement global longest-match priority with overlap suppression; fix refresh invalidation to ignore $:/state and drafts; handle deletions precisely to avoid rebuilding on draft deletion; add defensive check for cached automaton presence.

* Update text.js

remove comment

* Update aho-corasick.js

remove comment

* Create #9672.tid

* Create #2026-0222.tid

* Delete editions/tw5.com/tiddlers/releasenotes/5.4.0/#2026-0222.tid

* Update text.js

remove \"

* Update and rename #9672.tid to #9676.tid

change to right number

* Update #9397.tid

update the existing release note with the new PR link instead of creating a new release note.

* Delete editions/tw5.com/tiddlers/releasenotes/5.4.0/#9676.tid

update the existing release note with the new PR link instead of creating a new release note.

* Rename #9397.tid to #9676.tid

update the existing release note with the new PR link instead of creating a new release note.

* Update and rename #9676.tid to #9397.tid

add link

* Rename #9397.tid to #9676.tid

* Update tiddlywiki.info

add plugin for test build

* Update tiddlywiki.info

reverse change, ready to be merge.
This commit is contained in:
s793016
2026-02-25 19:07:32 +08:00
committed by GitHub
parent 91e7a62c13
commit b0d99f3bd3
5 changed files with 293 additions and 354 deletions

View File

@@ -1,9 +1,14 @@
/*\
title: $:/core/modules/widgets/text.js
type: application/javascript
module-type: widget
An optimized override of the core text widget that automatically linkifies the text, with support for non-Latin languages like Chinese, prioritizing longer titles, skipping processed matches, excluding the current tiddler title from linking, and handling large title sets with enhanced Aho-Corasick algorithm.
Optimized override of the core text widget that automatically linkifies text.
- Supports non-Latin languages like Chinese.
- Global longest-match priority, then removes overlaps.
- Excludes current tiddler title from linking.
- Uses Aho-Corasick for performance.
\*/
@@ -18,28 +23,6 @@ var Widget = require("$:/core/modules/widgets/widget.js").widget,
ElementWidget = require("$:/core/modules/widgets/element.js").element,
AhoCorasick = require("$:/core/modules/utils/aho-corasick.js").AhoCorasick;
var ESCAPE_REGEX = /[\\^$*+?.()|[\]{}]/g;
function escapeRegExp(str) {
try {
return str.replace(ESCAPE_REGEX, "\\$&");
} catch(e) {
return null;
}
}
function FastPositionSet() {
this.set = new Set();
}
FastPositionSet.prototype.add = function(pos) {
this.set.add(pos);
};
FastPositionSet.prototype.has = function(pos) {
return this.set.has(pos);
};
var TextNodeWidget = function(parseTreeNode,options) {
this.initialise(parseTreeNode,options);
};
@@ -54,138 +37,121 @@ TextNodeWidget.prototype.render = function(parent,nextSibling) {
};
TextNodeWidget.prototype.execute = function() {
var self = this,
ignoreCase = self.getVariable("tv-freelinks-ignore-case",{defaultValue:"no"}).trim() === "yes";
var self = this;
var ignoreCase = self.getVariable("tv-freelinks-ignore-case",{defaultValue:"no"}).trim() === "yes";
var childParseTree = [{
type: "plain-text",
text: this.getAttribute("text",this.parseTreeNode.text || "")
}];
var text = childParseTree[0].text;
if(!text || text.length < 2) {
this.makeChildWidgets(childParseTree);
return;
}
if(this.getVariable("tv-wikilinks",{defaultValue:"yes"}) !== "no" &&
this.getVariable("tv-freelinks",{defaultValue:"no"}) === "yes" &&
!this.isWithinButtonOrLink()) {
if(this.getVariable("tv-wikilinks",{defaultValue:"yes"}) !== "no" &&
this.getVariable("tv-freelinks",{defaultValue:"no"}) === "yes" &&
!this.isWithinButtonOrLink()) {
var currentTiddlerTitle = this.getVariable("currentTiddler") || "";
var useWordBoundary = self.wiki.getTiddlerText(WORD_BOUNDARY_TIDDLER, "no") === "yes";
var useWordBoundary = self.wiki.getTiddlerText(WORD_BOUNDARY_TIDDLER,"no") === "yes";
var cacheKey = "tiddler-title-info-" + (ignoreCase ? "insensitive" : "sensitive");
this.tiddlerTitleInfo = this.wiki.getGlobalCache(cacheKey, function() {
return computeTiddlerTitleInfo(self, ignoreCase);
this.tiddlerTitleInfo = this.wiki.getGlobalCache(cacheKey,function() {
return computeTiddlerTitleInfo(self,ignoreCase);
});
if(this.tiddlerTitleInfo.titles.length > 0) {
var newParseTree = this.processTextWithMatches(text, currentTiddlerTitle, ignoreCase, useWordBoundary);
if(newParseTree && newParseTree.length > 0 &&
(newParseTree.length > 1 || newParseTree[0].type !== "plain-text")) {
if(this.tiddlerTitleInfo && this.tiddlerTitleInfo.titles && this.tiddlerTitleInfo.titles.length > 0 && this.tiddlerTitleInfo.ac) {
var newParseTree = this.processTextWithMatches(text,currentTiddlerTitle,ignoreCase,useWordBoundary);
if(newParseTree && newParseTree.length > 0 &&
(newParseTree.length > 1 || newParseTree[0].type !== "plain-text")) {
childParseTree = newParseTree;
}
}
}
this.makeChildWidgets(childParseTree);
};
TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerTitle, ignoreCase, useWordBoundary) {
TextNodeWidget.prototype.processTextWithMatches = function(text,currentTiddlerTitle,ignoreCase,useWordBoundary) {
if(!text || text.length === 0) {
return [{type: "plain-text", text: text}];
}
var searchText = ignoreCase ? text.toLowerCase() : text;
var matches;
try {
matches = this.tiddlerTitleInfo.ac.search(searchText, useWordBoundary);
matches = this.tiddlerTitleInfo.ac.search(text, useWordBoundary, ignoreCase);
} catch(e) {
return [{type: "plain-text", text: text}];
}
if(!matches || matches.length === 0) {
return [{type: "plain-text", text: text}];
}
matches.sort(function(a, b) {
if(a.index !== b.index) {
return a.index - b.index;
}
return b.length - a.length;
var titleToCompare = ignoreCase ?
(currentTiddlerTitle ? currentTiddlerTitle.toLowerCase() : "") :
currentTiddlerTitle;
matches.sort(function(a,b) {
if(b.length !== a.length) return b.length - a.length;
return a.index - b.index;
});
var processedPositions = new FastPositionSet();
var occupied = new Uint8Array(text.length);
var validMatches = [];
for(var i = 0; i < matches.length; i++) {
var match = matches[i];
var matchStart = match.index;
var matchEnd = matchStart + match.length;
if(matchStart < 0 || matchEnd > text.length) {
continue;
var m = matches[i];
var start = m.index;
var end = start + m.length;
if(start < 0 || end > text.length) continue;
var matchedTitle = this.tiddlerTitleInfo.titles[m.titleIndex];
if(!matchedTitle) continue;
var matchedTitleToCompare = ignoreCase ? matchedTitle.toLowerCase() : matchedTitle;
if(titleToCompare && matchedTitleToCompare === titleToCompare) continue;
var overlapping = false;
for(var j = start; j < end; j++) {
if(occupied[j]) { overlapping = true; break; }
}
var matchedTitle = this.tiddlerTitleInfo.titles[match.titleIndex];
var titleToCompare = ignoreCase ?
(currentTiddlerTitle ? currentTiddlerTitle.toLowerCase() : "") :
currentTiddlerTitle;
var matchedTitleToCompare = ignoreCase ?
(matchedTitle ? matchedTitle.toLowerCase() : "") :
matchedTitle;
if(titleToCompare && matchedTitleToCompare === titleToCompare) {
continue;
}
var hasOverlap = false;
for(var pos = matchStart; pos < matchEnd && !hasOverlap; pos++) {
if(processedPositions.has(pos)) {
hasOverlap = true;
}
}
if(!hasOverlap) {
for(var pos = matchStart; pos < matchEnd; pos++) {
processedPositions.add(pos);
}
validMatches.push(match);
if(overlapping) continue;
validMatches.push(m);
for(var k = start; k < end; k++) {
occupied[k] = 1;
}
}
if(validMatches.length === 0) {
return [{type: "plain-text", text: text}];
}
validMatches.sort(function(a,b){ return a.index - b.index; });
var newParseTree = [];
var currentPos = 0;
for(var i = 0; i < validMatches.length; i++) {
var match = validMatches[i];
var matchStart = match.index;
var matchEnd = matchStart + match.length;
if(matchStart > currentPos) {
var beforeText = text.substring(currentPos, matchStart);
newParseTree.push({
type: "plain-text",
text: beforeText
});
var curPos = 0;
for(var x = 0; x < validMatches.length; x++) {
var mm = validMatches[x];
var s = mm.index;
var e = s + mm.length;
if(s > curPos) {
newParseTree.push({ type: "plain-text", text: text.substring(curPos,s) });
}
var matchedTitle = this.tiddlerTitleInfo.titles[match.titleIndex];
var matchedText = text.substring(matchStart, matchEnd);
var toTitle = this.tiddlerTitleInfo.titles[mm.titleIndex];
var matchedText = text.substring(s,e);
newParseTree.push({
type: "link",
attributes: {
to: {type: "string", value: matchedTitle},
to: {type: "string", value: toTitle},
"class": {type: "string", value: "tc-freelink"}
},
children: [{
@@ -193,80 +159,63 @@ TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerT
text: matchedText
}]
});
currentPos = matchEnd;
curPos = e;
}
if(currentPos < text.length) {
var remainingText = text.substring(currentPos);
newParseTree.push({
type: "plain-text",
text: remainingText
});
if(curPos < text.length) {
newParseTree.push({ type: "plain-text", text: text.substring(curPos) });
}
return newParseTree;
};
function computeTiddlerTitleInfo(self, ignoreCase) {
function computeTiddlerTitleInfo(self,ignoreCase) {
var targetFilterText = self.wiki.getTiddlerText(TITLE_TARGET_FILTER),
titles = !!targetFilterText ?
self.wiki.filterTiddlers(targetFilterText,$tw.rootWidget) :
titles = targetFilterText ?
self.wiki.filterTiddlers(targetFilterText,$tw.rootWidget) :
self.wiki.allTitles();
if(!titles || titles.length === 0) {
return {
titles: [],
ac: new AhoCorasick()
};
return { titles: [], ac: new AhoCorasick() };
}
var validTitles = [];
var ac = new AhoCorasick();
for(var i = 0; i < titles.length; i++) {
var title = titles[i];
if(title && title.length > 0 && title.substring(0,3) !== "$:/") {
var escapedTitle = escapeRegExp(title);
if(escapedTitle) {
validTitles.push(title);
}
var t = titles[i];
if(t && t.length > 0 && t.substring(0,3) !== "$:/") {
validTitles.push(t);
}
}
var sortedTitles = validTitles.sort(function(a,b) {
var lenDiff = b.length - a.length;
if(lenDiff !== 0) return lenDiff;
validTitles.sort(function(a,b) {
var d = b.length - a.length;
if(d !== 0) return d;
return a < b ? -1 : a > b ? 1 : 0;
});
for(var i = 0; i < sortedTitles.length; i++) {
var title = sortedTitles[i];
var ac = new AhoCorasick();
for(var j = 0; j < validTitles.length; j++) {
var title = validTitles[j];
var pattern = ignoreCase ? title.toLowerCase() : title;
ac.addPattern(pattern, i);
ac.addPattern(pattern,j);
}
try {
ac.buildFailureLinks();
} catch(e) {
return {
titles: [],
ac: new AhoCorasick()
};
return { titles: [], ac: new AhoCorasick() };
}
return {
titles: sortedTitles,
ac: ac
};
return { titles: validTitles, ac: ac };
}
TextNodeWidget.prototype.isWithinButtonOrLink = function() {
var widget = this.parentWidget;
while(widget) {
if(widget instanceof ButtonWidget ||
widget instanceof LinkWidget ||
((widget instanceof ElementWidget) && widget.parseTreeNode.tag === "a")) {
if(widget instanceof ButtonWidget ||
widget instanceof LinkWidget ||
((widget instanceof ElementWidget) && widget.parseTreeNode.tag === "a")) {
return true;
}
widget = widget.parentWidget;
@@ -275,35 +224,56 @@ TextNodeWidget.prototype.isWithinButtonOrLink = function() {
};
TextNodeWidget.prototype.refresh = function(changedTiddlers) {
var self = this,
changedAttributes = this.computeAttributes(),
titlesHaveChanged = false;
var self = this;
var changedAttributes = this.computeAttributes();
var titlesHaveChanged = false;
if(changedTiddlers) {
$tw.utils.each(changedTiddlers,function(change,title) {
if(change.isDeleted) {
if(titlesHaveChanged) return;
if(title === WORD_BOUNDARY_TIDDLER || title === TITLE_TARGET_FILTER) {
titlesHaveChanged = true;
return;
}
if(title.substring(0,3) === "$:/") {
return;
}
if(change && change.isDeleted) {
if(self.tiddlerTitleInfo && self.tiddlerTitleInfo.titles && self.tiddlerTitleInfo.titles.indexOf(title) !== -1) {
titlesHaveChanged = true;
}
return;
}
var tiddler = self.wiki.getTiddler(title);
if(tiddler && tiddler.hasField("draft.of")) {
return;
}
if(!self.tiddlerTitleInfo || !self.tiddlerTitleInfo.titles || self.tiddlerTitleInfo.titles.indexOf(title) === -1) {
titlesHaveChanged = true;
} else {
titlesHaveChanged = titlesHaveChanged ||
!self.tiddlerTitleInfo ||
self.tiddlerTitleInfo.titles.indexOf(title) === -1;
}
});
}
if(changedAttributes.text || titlesHaveChanged ||
(changedTiddlers && changedTiddlers[WORD_BOUNDARY_TIDDLER])) {
var wordBoundaryChanged = !!(changedTiddlers && changedTiddlers[WORD_BOUNDARY_TIDDLER]);
if(changedAttributes.text || titlesHaveChanged || wordBoundaryChanged) {
if(titlesHaveChanged) {
var ignoreCase = self.getVariable("tv-freelinks-ignore-case",{defaultValue:"no"}).trim() === "yes";
var cacheKey = "tiddler-title-info-" + (ignoreCase ? "insensitive" : "sensitive");
self.wiki.clearCache(cacheKey);
self.wiki.clearCache("tiddler-title-info-insensitive");
self.wiki.clearCache("tiddler-title-info-sensitive");
}
this.refreshSelf();
return true;
} else {
}
if(changedTiddlers) {
return this.refreshChildren(changedTiddlers);
}
return false;
};
exports.text = TextNodeWidget;