mirror of
https://github.com/TiddlyWiki/TiddlyWiki5.git
synced 2026-05-03 23:37:25 +00:00
Fix Freelinks Aho-Corasick: failure links, cache invalidation, longest-match, and Unicode safety (#9676)
* Update aho-corasick.js fix transition logic; ensure complete outputs (via failure-output merge); clean up stats/build scoping; clarify CJK boundary behavior. * Update text.js implement global longest-match priority with overlap suppression; fix refresh invalidation to ignore $:/state and drafts; handle deletions precisely to avoid rebuilding on draft deletion; add defensive check for cached automaton presence. * Update text.js remove comment * Update aho-corasick.js remove comment * Create #9672.tid * Create #2026-0222.tid * Delete editions/tw5.com/tiddlers/releasenotes/5.4.0/#2026-0222.tid * Update text.js remove \" * Update and rename #9672.tid to #9676.tid change to right number * Update #9397.tid update the existing release note with the new PR link instead of creating a new release note. * Delete editions/tw5.com/tiddlers/releasenotes/5.4.0/#9676.tid update the existing release note with the new PR link instead of creating a new release note. * Rename #9397.tid to #9676.tid update the existing release note with the new PR link instead of creating a new release note. * Update and rename #9676.tid to #9397.tid add link * Rename #9397.tid to #9676.tid * Update tiddlywiki.info add plugin for test build * Update tiddlywiki.info reverse change, ready to be merge.
This commit is contained in:
@@ -1,9 +1,14 @@
|
||||
/*\
|
||||
|
||||
title: $:/core/modules/widgets/text.js
|
||||
type: application/javascript
|
||||
module-type: widget
|
||||
|
||||
An optimized override of the core text widget that automatically linkifies the text, with support for non-Latin languages like Chinese, prioritizing longer titles, skipping processed matches, excluding the current tiddler title from linking, and handling large title sets with enhanced Aho-Corasick algorithm.
|
||||
Optimized override of the core text widget that automatically linkifies text.
|
||||
- Supports non-Latin languages like Chinese.
|
||||
- Global longest-match priority, then removes overlaps.
|
||||
- Excludes current tiddler title from linking.
|
||||
- Uses Aho-Corasick for performance.
|
||||
|
||||
\*/
|
||||
|
||||
@@ -18,28 +23,6 @@ var Widget = require("$:/core/modules/widgets/widget.js").widget,
|
||||
ElementWidget = require("$:/core/modules/widgets/element.js").element,
|
||||
AhoCorasick = require("$:/core/modules/utils/aho-corasick.js").AhoCorasick;
|
||||
|
||||
var ESCAPE_REGEX = /[\\^$*+?.()|[\]{}]/g;
|
||||
|
||||
function escapeRegExp(str) {
|
||||
try {
|
||||
return str.replace(ESCAPE_REGEX, "\\$&");
|
||||
} catch(e) {
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
||||
function FastPositionSet() {
|
||||
this.set = new Set();
|
||||
}
|
||||
|
||||
FastPositionSet.prototype.add = function(pos) {
|
||||
this.set.add(pos);
|
||||
};
|
||||
|
||||
FastPositionSet.prototype.has = function(pos) {
|
||||
return this.set.has(pos);
|
||||
};
|
||||
|
||||
var TextNodeWidget = function(parseTreeNode,options) {
|
||||
this.initialise(parseTreeNode,options);
|
||||
};
|
||||
@@ -54,138 +37,121 @@ TextNodeWidget.prototype.render = function(parent,nextSibling) {
|
||||
};
|
||||
|
||||
TextNodeWidget.prototype.execute = function() {
|
||||
var self = this,
|
||||
ignoreCase = self.getVariable("tv-freelinks-ignore-case",{defaultValue:"no"}).trim() === "yes";
|
||||
|
||||
var self = this;
|
||||
var ignoreCase = self.getVariable("tv-freelinks-ignore-case",{defaultValue:"no"}).trim() === "yes";
|
||||
|
||||
var childParseTree = [{
|
||||
type: "plain-text",
|
||||
text: this.getAttribute("text",this.parseTreeNode.text || "")
|
||||
}];
|
||||
|
||||
|
||||
var text = childParseTree[0].text;
|
||||
|
||||
if(!text || text.length < 2) {
|
||||
this.makeChildWidgets(childParseTree);
|
||||
return;
|
||||
}
|
||||
|
||||
if(this.getVariable("tv-wikilinks",{defaultValue:"yes"}) !== "no" &&
|
||||
this.getVariable("tv-freelinks",{defaultValue:"no"}) === "yes" &&
|
||||
!this.isWithinButtonOrLink()) {
|
||||
|
||||
|
||||
if(this.getVariable("tv-wikilinks",{defaultValue:"yes"}) !== "no" &&
|
||||
this.getVariable("tv-freelinks",{defaultValue:"no"}) === "yes" &&
|
||||
!this.isWithinButtonOrLink()) {
|
||||
|
||||
var currentTiddlerTitle = this.getVariable("currentTiddler") || "";
|
||||
var useWordBoundary = self.wiki.getTiddlerText(WORD_BOUNDARY_TIDDLER, "no") === "yes";
|
||||
|
||||
var useWordBoundary = self.wiki.getTiddlerText(WORD_BOUNDARY_TIDDLER,"no") === "yes";
|
||||
|
||||
var cacheKey = "tiddler-title-info-" + (ignoreCase ? "insensitive" : "sensitive");
|
||||
|
||||
this.tiddlerTitleInfo = this.wiki.getGlobalCache(cacheKey, function() {
|
||||
return computeTiddlerTitleInfo(self, ignoreCase);
|
||||
this.tiddlerTitleInfo = this.wiki.getGlobalCache(cacheKey,function() {
|
||||
return computeTiddlerTitleInfo(self,ignoreCase);
|
||||
});
|
||||
|
||||
if(this.tiddlerTitleInfo.titles.length > 0) {
|
||||
var newParseTree = this.processTextWithMatches(text, currentTiddlerTitle, ignoreCase, useWordBoundary);
|
||||
if(newParseTree && newParseTree.length > 0 &&
|
||||
(newParseTree.length > 1 || newParseTree[0].type !== "plain-text")) {
|
||||
|
||||
if(this.tiddlerTitleInfo && this.tiddlerTitleInfo.titles && this.tiddlerTitleInfo.titles.length > 0 && this.tiddlerTitleInfo.ac) {
|
||||
var newParseTree = this.processTextWithMatches(text,currentTiddlerTitle,ignoreCase,useWordBoundary);
|
||||
if(newParseTree && newParseTree.length > 0 &&
|
||||
(newParseTree.length > 1 || newParseTree[0].type !== "plain-text")) {
|
||||
childParseTree = newParseTree;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
this.makeChildWidgets(childParseTree);
|
||||
};
|
||||
|
||||
TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerTitle, ignoreCase, useWordBoundary) {
|
||||
TextNodeWidget.prototype.processTextWithMatches = function(text,currentTiddlerTitle,ignoreCase,useWordBoundary) {
|
||||
if(!text || text.length === 0) {
|
||||
return [{type: "plain-text", text: text}];
|
||||
}
|
||||
|
||||
var searchText = ignoreCase ? text.toLowerCase() : text;
|
||||
|
||||
var matches;
|
||||
|
||||
try {
|
||||
matches = this.tiddlerTitleInfo.ac.search(searchText, useWordBoundary);
|
||||
matches = this.tiddlerTitleInfo.ac.search(text, useWordBoundary, ignoreCase);
|
||||
} catch(e) {
|
||||
return [{type: "plain-text", text: text}];
|
||||
}
|
||||
|
||||
|
||||
if(!matches || matches.length === 0) {
|
||||
return [{type: "plain-text", text: text}];
|
||||
}
|
||||
|
||||
matches.sort(function(a, b) {
|
||||
if(a.index !== b.index) {
|
||||
return a.index - b.index;
|
||||
}
|
||||
return b.length - a.length;
|
||||
|
||||
var titleToCompare = ignoreCase ?
|
||||
(currentTiddlerTitle ? currentTiddlerTitle.toLowerCase() : "") :
|
||||
currentTiddlerTitle;
|
||||
|
||||
matches.sort(function(a,b) {
|
||||
if(b.length !== a.length) return b.length - a.length;
|
||||
return a.index - b.index;
|
||||
});
|
||||
|
||||
var processedPositions = new FastPositionSet();
|
||||
|
||||
var occupied = new Uint8Array(text.length);
|
||||
var validMatches = [];
|
||||
|
||||
|
||||
for(var i = 0; i < matches.length; i++) {
|
||||
var match = matches[i];
|
||||
var matchStart = match.index;
|
||||
var matchEnd = matchStart + match.length;
|
||||
|
||||
if(matchStart < 0 || matchEnd > text.length) {
|
||||
continue;
|
||||
var m = matches[i];
|
||||
var start = m.index;
|
||||
var end = start + m.length;
|
||||
if(start < 0 || end > text.length) continue;
|
||||
|
||||
var matchedTitle = this.tiddlerTitleInfo.titles[m.titleIndex];
|
||||
if(!matchedTitle) continue;
|
||||
|
||||
var matchedTitleToCompare = ignoreCase ? matchedTitle.toLowerCase() : matchedTitle;
|
||||
if(titleToCompare && matchedTitleToCompare === titleToCompare) continue;
|
||||
|
||||
var overlapping = false;
|
||||
for(var j = start; j < end; j++) {
|
||||
if(occupied[j]) { overlapping = true; break; }
|
||||
}
|
||||
|
||||
var matchedTitle = this.tiddlerTitleInfo.titles[match.titleIndex];
|
||||
|
||||
var titleToCompare = ignoreCase ?
|
||||
(currentTiddlerTitle ? currentTiddlerTitle.toLowerCase() : "") :
|
||||
currentTiddlerTitle;
|
||||
var matchedTitleToCompare = ignoreCase ?
|
||||
(matchedTitle ? matchedTitle.toLowerCase() : "") :
|
||||
matchedTitle;
|
||||
|
||||
if(titleToCompare && matchedTitleToCompare === titleToCompare) {
|
||||
continue;
|
||||
}
|
||||
|
||||
var hasOverlap = false;
|
||||
for(var pos = matchStart; pos < matchEnd && !hasOverlap; pos++) {
|
||||
if(processedPositions.has(pos)) {
|
||||
hasOverlap = true;
|
||||
}
|
||||
}
|
||||
|
||||
if(!hasOverlap) {
|
||||
for(var pos = matchStart; pos < matchEnd; pos++) {
|
||||
processedPositions.add(pos);
|
||||
}
|
||||
validMatches.push(match);
|
||||
if(overlapping) continue;
|
||||
|
||||
validMatches.push(m);
|
||||
for(var k = start; k < end; k++) {
|
||||
occupied[k] = 1;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if(validMatches.length === 0) {
|
||||
return [{type: "plain-text", text: text}];
|
||||
}
|
||||
|
||||
|
||||
validMatches.sort(function(a,b){ return a.index - b.index; });
|
||||
|
||||
var newParseTree = [];
|
||||
var currentPos = 0;
|
||||
|
||||
for(var i = 0; i < validMatches.length; i++) {
|
||||
var match = validMatches[i];
|
||||
var matchStart = match.index;
|
||||
var matchEnd = matchStart + match.length;
|
||||
|
||||
if(matchStart > currentPos) {
|
||||
var beforeText = text.substring(currentPos, matchStart);
|
||||
newParseTree.push({
|
||||
type: "plain-text",
|
||||
text: beforeText
|
||||
});
|
||||
var curPos = 0;
|
||||
|
||||
for(var x = 0; x < validMatches.length; x++) {
|
||||
var mm = validMatches[x];
|
||||
var s = mm.index;
|
||||
var e = s + mm.length;
|
||||
|
||||
if(s > curPos) {
|
||||
newParseTree.push({ type: "plain-text", text: text.substring(curPos,s) });
|
||||
}
|
||||
|
||||
var matchedTitle = this.tiddlerTitleInfo.titles[match.titleIndex];
|
||||
var matchedText = text.substring(matchStart, matchEnd);
|
||||
|
||||
|
||||
var toTitle = this.tiddlerTitleInfo.titles[mm.titleIndex];
|
||||
var matchedText = text.substring(s,e);
|
||||
|
||||
newParseTree.push({
|
||||
type: "link",
|
||||
attributes: {
|
||||
to: {type: "string", value: matchedTitle},
|
||||
to: {type: "string", value: toTitle},
|
||||
"class": {type: "string", value: "tc-freelink"}
|
||||
},
|
||||
children: [{
|
||||
@@ -193,80 +159,63 @@ TextNodeWidget.prototype.processTextWithMatches = function(text, currentTiddlerT
|
||||
text: matchedText
|
||||
}]
|
||||
});
|
||||
|
||||
currentPos = matchEnd;
|
||||
|
||||
curPos = e;
|
||||
}
|
||||
|
||||
if(currentPos < text.length) {
|
||||
var remainingText = text.substring(currentPos);
|
||||
newParseTree.push({
|
||||
type: "plain-text",
|
||||
text: remainingText
|
||||
});
|
||||
|
||||
if(curPos < text.length) {
|
||||
newParseTree.push({ type: "plain-text", text: text.substring(curPos) });
|
||||
}
|
||||
|
||||
|
||||
return newParseTree;
|
||||
};
|
||||
|
||||
function computeTiddlerTitleInfo(self, ignoreCase) {
|
||||
function computeTiddlerTitleInfo(self,ignoreCase) {
|
||||
var targetFilterText = self.wiki.getTiddlerText(TITLE_TARGET_FILTER),
|
||||
titles = !!targetFilterText ?
|
||||
self.wiki.filterTiddlers(targetFilterText,$tw.rootWidget) :
|
||||
titles = targetFilterText ?
|
||||
self.wiki.filterTiddlers(targetFilterText,$tw.rootWidget) :
|
||||
self.wiki.allTitles();
|
||||
|
||||
|
||||
if(!titles || titles.length === 0) {
|
||||
return {
|
||||
titles: [],
|
||||
ac: new AhoCorasick()
|
||||
};
|
||||
return { titles: [], ac: new AhoCorasick() };
|
||||
}
|
||||
|
||||
|
||||
var validTitles = [];
|
||||
var ac = new AhoCorasick();
|
||||
|
||||
for(var i = 0; i < titles.length; i++) {
|
||||
var title = titles[i];
|
||||
if(title && title.length > 0 && title.substring(0,3) !== "$:/") {
|
||||
var escapedTitle = escapeRegExp(title);
|
||||
if(escapedTitle) {
|
||||
validTitles.push(title);
|
||||
}
|
||||
var t = titles[i];
|
||||
if(t && t.length > 0 && t.substring(0,3) !== "$:/") {
|
||||
validTitles.push(t);
|
||||
}
|
||||
}
|
||||
|
||||
var sortedTitles = validTitles.sort(function(a,b) {
|
||||
var lenDiff = b.length - a.length;
|
||||
if(lenDiff !== 0) return lenDiff;
|
||||
|
||||
validTitles.sort(function(a,b) {
|
||||
var d = b.length - a.length;
|
||||
if(d !== 0) return d;
|
||||
return a < b ? -1 : a > b ? 1 : 0;
|
||||
});
|
||||
|
||||
for(var i = 0; i < sortedTitles.length; i++) {
|
||||
var title = sortedTitles[i];
|
||||
|
||||
var ac = new AhoCorasick();
|
||||
for(var j = 0; j < validTitles.length; j++) {
|
||||
var title = validTitles[j];
|
||||
var pattern = ignoreCase ? title.toLowerCase() : title;
|
||||
ac.addPattern(pattern, i);
|
||||
ac.addPattern(pattern,j);
|
||||
}
|
||||
|
||||
|
||||
try {
|
||||
ac.buildFailureLinks();
|
||||
} catch(e) {
|
||||
return {
|
||||
titles: [],
|
||||
ac: new AhoCorasick()
|
||||
};
|
||||
return { titles: [], ac: new AhoCorasick() };
|
||||
}
|
||||
|
||||
return {
|
||||
titles: sortedTitles,
|
||||
ac: ac
|
||||
};
|
||||
|
||||
return { titles: validTitles, ac: ac };
|
||||
}
|
||||
|
||||
TextNodeWidget.prototype.isWithinButtonOrLink = function() {
|
||||
var widget = this.parentWidget;
|
||||
while(widget) {
|
||||
if(widget instanceof ButtonWidget ||
|
||||
widget instanceof LinkWidget ||
|
||||
((widget instanceof ElementWidget) && widget.parseTreeNode.tag === "a")) {
|
||||
if(widget instanceof ButtonWidget ||
|
||||
widget instanceof LinkWidget ||
|
||||
((widget instanceof ElementWidget) && widget.parseTreeNode.tag === "a")) {
|
||||
return true;
|
||||
}
|
||||
widget = widget.parentWidget;
|
||||
@@ -275,35 +224,56 @@ TextNodeWidget.prototype.isWithinButtonOrLink = function() {
|
||||
};
|
||||
|
||||
TextNodeWidget.prototype.refresh = function(changedTiddlers) {
|
||||
var self = this,
|
||||
changedAttributes = this.computeAttributes(),
|
||||
titlesHaveChanged = false;
|
||||
|
||||
var self = this;
|
||||
var changedAttributes = this.computeAttributes();
|
||||
var titlesHaveChanged = false;
|
||||
|
||||
if(changedTiddlers) {
|
||||
$tw.utils.each(changedTiddlers,function(change,title) {
|
||||
if(change.isDeleted) {
|
||||
if(titlesHaveChanged) return;
|
||||
|
||||
if(title === WORD_BOUNDARY_TIDDLER || title === TITLE_TARGET_FILTER) {
|
||||
titlesHaveChanged = true;
|
||||
return;
|
||||
}
|
||||
|
||||
if(title.substring(0,3) === "$:/") {
|
||||
return;
|
||||
}
|
||||
|
||||
if(change && change.isDeleted) {
|
||||
if(self.tiddlerTitleInfo && self.tiddlerTitleInfo.titles && self.tiddlerTitleInfo.titles.indexOf(title) !== -1) {
|
||||
titlesHaveChanged = true;
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
var tiddler = self.wiki.getTiddler(title);
|
||||
if(tiddler && tiddler.hasField("draft.of")) {
|
||||
return;
|
||||
}
|
||||
|
||||
if(!self.tiddlerTitleInfo || !self.tiddlerTitleInfo.titles || self.tiddlerTitleInfo.titles.indexOf(title) === -1) {
|
||||
titlesHaveChanged = true;
|
||||
} else {
|
||||
titlesHaveChanged = titlesHaveChanged ||
|
||||
!self.tiddlerTitleInfo ||
|
||||
self.tiddlerTitleInfo.titles.indexOf(title) === -1;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
if(changedAttributes.text || titlesHaveChanged ||
|
||||
(changedTiddlers && changedTiddlers[WORD_BOUNDARY_TIDDLER])) {
|
||||
|
||||
var wordBoundaryChanged = !!(changedTiddlers && changedTiddlers[WORD_BOUNDARY_TIDDLER]);
|
||||
|
||||
if(changedAttributes.text || titlesHaveChanged || wordBoundaryChanged) {
|
||||
if(titlesHaveChanged) {
|
||||
var ignoreCase = self.getVariable("tv-freelinks-ignore-case",{defaultValue:"no"}).trim() === "yes";
|
||||
var cacheKey = "tiddler-title-info-" + (ignoreCase ? "insensitive" : "sensitive");
|
||||
self.wiki.clearCache(cacheKey);
|
||||
self.wiki.clearCache("tiddler-title-info-insensitive");
|
||||
self.wiki.clearCache("tiddler-title-info-sensitive");
|
||||
}
|
||||
|
||||
this.refreshSelf();
|
||||
return true;
|
||||
} else {
|
||||
}
|
||||
|
||||
if(changedTiddlers) {
|
||||
return this.refreshChildren(changedTiddlers);
|
||||
}
|
||||
return false;
|
||||
};
|
||||
|
||||
exports.text = TextNodeWidget;
|
||||
|
||||
Reference in New Issue
Block a user