<?xml version="1.0"?>
<feed xmlns="http://www.w3.org/2005/Atom" xml:lang="zh">
	<id>https://arolstar52-zhtest.hf.space/index.php?action=history&amp;feed=atom&amp;title=Module%3AConversion_rule_extractor%2FMatcher</id>
	<title>Module:Conversion rule extractor/Matcher - 版本历史</title>
	<link rel="self" type="application/atom+xml" href="https://arolstar52-zhtest.hf.space/index.php?action=history&amp;feed=atom&amp;title=Module%3AConversion_rule_extractor%2FMatcher"/>
	<link rel="alternate" type="text/html" href="https://arolstar52-zhtest.hf.space/index.php?title=Module:Conversion_rule_extractor/Matcher&amp;action=history"/>
	<updated>2026-06-30T21:14:03Z</updated>
	<subtitle>在这个wiki上该页的修订历史</subtitle>
	<generator>MediaWiki 1.43.8</generator>
	<entry>
		<id>https://arolstar52-zhtest.hf.space/index.php?title=Module:Conversion_rule_extractor/Matcher&amp;diff=4632629&amp;oldid=prev</id>
		<title>imported&gt;PexEric：​←建立内容为“-- Module:Conversion_rule_extractor/Matcher -- 子模块：负责匹配规则与目标页面内容  local Matcher = {}  -- 工具函数：从规则字符串中提取需要匹配的源文本 -- 例如：&#039;zh-cn:图尔库;zh-tw:土庫;&#039; -&gt; {&quot;图尔库&quot;, &quot;土庫&quot;} -- 例如：&#039;巨集=&gt;zh-cn:宏;&#039; -&gt; {&quot;巨集&quot;} local function extractRuleSources(ruleString)     local sources = {}     local sourceSet = {} -- 用于去重      -- 移除外层包裹（…”的新页面</title>
		<link rel="alternate" type="text/html" href="https://arolstar52-zhtest.hf.space/index.php?title=Module:Conversion_rule_extractor/Matcher&amp;diff=4632629&amp;oldid=prev"/>
		<updated>2025-05-03T17:05:46Z</updated>

		<summary type="html">&lt;p&gt;&lt;a href=&quot;/index.php?title=WP:AES&amp;amp;action=edit&amp;amp;redlink=1&quot; class=&quot;new&quot; title=&quot;WP:AES（页面不存在）&quot;&gt;←&lt;/a&gt;建立内容为“-- Module:Conversion_rule_extractor/Matcher -- 子模块：负责匹配规则与目标页面内容  local Matcher = {}  -- 工具函数：从规则字符串中提取需要匹配的源文本 -- 例如：&amp;#039;zh-cn:图尔库;zh-tw:土庫;&amp;#039; -&amp;gt; {&amp;quot;图尔库&amp;quot;, &amp;quot;土庫&amp;quot;} -- 例如：&amp;#039;巨集=&amp;gt;zh-cn:宏;&amp;#039; -&amp;gt; {&amp;quot;巨集&amp;quot;} local function extractRuleSources(ruleString)     local sources = {}     local sourceSet = {} -- 用于去重      -- 移除外层包裹（…”的新页面&lt;/p&gt;
&lt;p&gt;&lt;b&gt;新页面&lt;/b&gt;&lt;/p&gt;&lt;div&gt;-- Module:Conversion_rule_extractor/Matcher&lt;br /&gt;
-- 子模块：负责匹配规则与目标页面内容&lt;br /&gt;
&lt;br /&gt;
local Matcher = {}&lt;br /&gt;
&lt;br /&gt;
-- 工具函数：从规则字符串中提取需要匹配的源文本&lt;br /&gt;
-- 例如：&amp;#039;zh-cn:图尔库;zh-tw:土庫;&amp;#039; -&amp;gt; {&amp;quot;图尔库&amp;quot;, &amp;quot;土庫&amp;quot;}&lt;br /&gt;
-- 例如：&amp;#039;巨集=&amp;gt;zh-cn:宏;&amp;#039; -&amp;gt; {&amp;quot;巨集&amp;quot;}&lt;br /&gt;
local function extractRuleSources(ruleString)&lt;br /&gt;
    local sources = {}&lt;br /&gt;
    local sourceSet = {} -- 用于去重&lt;br /&gt;
&lt;br /&gt;
    -- 移除外层包裹（如果存在，尽管 Extractor 通常会清理掉）&lt;br /&gt;
    ruleString = ruleString:match(&amp;#039;^%-{.-|(.*)}%-$&amp;#039;) or ruleString&lt;br /&gt;
&lt;br /&gt;
    for part in mw.text.gsplit(ruleString, &amp;#039;;&amp;#039;) do&lt;br /&gt;
        part = mw.text.trim(part)&lt;br /&gt;
        if part ~= &amp;#039;&amp;#039; then&lt;br /&gt;
            local source&lt;br /&gt;
            local unidirectionalMatch = part:match(&amp;#039;^([^=]-)=&amp;gt;&amp;#039;) -- 检查单向规则 A=&amp;gt;B&lt;br /&gt;
            local bidirectionalMatch = part:match(&amp;#039;^%w+%-%w+:(.+)&amp;#039;) -- 检查双向规则 lang:Text&lt;br /&gt;
            local simpleBidirectionalMatch = part:match(&amp;#039;^([^:]+):(.+)&amp;#039;) -- 检查简单的双向规则 Text:Variant (不太标准，但可能存在)&lt;br /&gt;
            local fallbackMatch = part:match(&amp;#039;^([^=:]+)&amp;#039;) -- 如果没有=&amp;gt;或:，取整个部分作为源？(可能不太安全，但作为后备)&lt;br /&gt;
&lt;br /&gt;
            if unidirectionalMatch then&lt;br /&gt;
                source = mw.text.trim(unidirectionalMatch)&lt;br /&gt;
            elseif bidirectionalMatch then&lt;br /&gt;
                source = mw.text.trim(bidirectionalMatch)&lt;br /&gt;
            elseif simpleBidirectionalMatch then&lt;br /&gt;
            	-- 对于 Text:Variant 格式，我们假设 Text 是要匹配的源&lt;br /&gt;
                source = mw.text.trim(simpleBidirectionalMatch)&lt;br /&gt;
            elseif fallbackMatch and not part:find(&amp;#039;=&amp;#039;) and not part:find(&amp;#039;:&amp;#039;) then&lt;br /&gt;
                 -- 只有在没有 =&amp;gt; 和 : 时才考虑整个部分作为源，例如 &amp;quot;單純文字&amp;quot; 这种无效但可能存在的规则&lt;br /&gt;
                 source = mw.text.trim(fallbackMatch)&lt;br /&gt;
            end&lt;br /&gt;
&lt;br /&gt;
            if source and source ~= &amp;#039;&amp;#039; and not sourceSet[source] then&lt;br /&gt;
                table.insert(sources, source)&lt;br /&gt;
                sourceSet[source] = true&lt;br /&gt;
                -- mw.log(&amp;#039;Extracted source:&amp;#039;, source, &amp;#039;from part:&amp;#039;, part)&lt;br /&gt;
            -- else&lt;br /&gt;
                -- mw.log(&amp;#039;Could not extract source from part:&amp;#039;, part)&lt;br /&gt;
            end&lt;br /&gt;
        end&lt;br /&gt;
    end&lt;br /&gt;
    -- mw.logObject(&amp;#039;Extracted sources for rule &amp;quot;&amp;#039; .. ruleString .. &amp;#039;&amp;quot;:&amp;#039;, sources)&lt;br /&gt;
    return sources&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
-- 构建用于匹配的Trie树 (改编自 Module:NoteTA-lite)&lt;br /&gt;
-- 输入: rulesList - 一个包含规则字符串的列表&lt;br /&gt;
-- 输出: Trie树，叶子节点存储规则在 rulesList 中的索引列表&lt;br /&gt;
function Matcher.buildRuleTrie(rulesList)&lt;br /&gt;
    local trie = {}&lt;br /&gt;
    local ruleSourcesMap = {} -- 存储每个源文本对应的规则索引列表 { [&amp;quot;源文本&amp;quot;] = {idx1, idx2} }&lt;br /&gt;
&lt;br /&gt;
    for index, ruleString in ipairs(rulesList) do&lt;br /&gt;
        local sources = extractRuleSources(ruleString)&lt;br /&gt;
        for _, source in ipairs(sources) do&lt;br /&gt;
            if not ruleSourcesMap[source] then&lt;br /&gt;
                ruleSourcesMap[source] = {}&lt;br /&gt;
            end&lt;br /&gt;
            table.insert(ruleSourcesMap[source], index)&lt;br /&gt;
            -- mw.log(&amp;#039;Mapping source:&amp;#039;, source, &amp;#039;to index:&amp;#039;, index)&lt;br /&gt;
        end&lt;br /&gt;
    end&lt;br /&gt;
&lt;br /&gt;
    -- 构建Trie&lt;br /&gt;
    for source, indices in pairs(ruleSourcesMap) do&lt;br /&gt;
        local currentNode = trie&lt;br /&gt;
        -- 使用 mw.ustring 处理 UTF-8 字符&lt;br /&gt;
        for i = 1, mw.ustring.len(source) do&lt;br /&gt;
            local char = mw.ustring.sub(source, i, i)&lt;br /&gt;
            currentNode[char] = currentNode[char] or {}&lt;br /&gt;
            currentNode = currentNode[char]&lt;br /&gt;
        end&lt;br /&gt;
        -- 在叶子节点存储规则索引列表&lt;br /&gt;
        currentNode.indices = indices&lt;br /&gt;
        -- mw.log(&amp;#039;Added indices to Trie node for source:&amp;#039;, source, indices)&lt;br /&gt;
    end&lt;br /&gt;
&lt;br /&gt;
    return trie&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
-- 使用Trie树在文本中查找匹配的规则 (改编自 Module:NoteTA-lite)&lt;br /&gt;
-- 输入: text - 要搜索的文本内容&lt;br /&gt;
-- 输入: trie - Matcher.buildRuleTrie 构建的Trie树&lt;br /&gt;
-- 输出: matchedIndices - 一个集合 (table)，key 是匹配到的规则索引，value 是 true&lt;br /&gt;
function Matcher.matchTextWithTrie(text, trie)&lt;br /&gt;
    local matchedIndices = {}&lt;br /&gt;
    if not text or text == &amp;#039;&amp;#039; then return matchedIndices end&lt;br /&gt;
&lt;br /&gt;
    local len = mw.ustring.len(text)&lt;br /&gt;
    for i = 1, len do&lt;br /&gt;
        local currentNode = trie&lt;br /&gt;
        for j = i, len do&lt;br /&gt;
            local char = mw.ustring.sub(text, j, j)&lt;br /&gt;
            if not currentNode[char] then&lt;br /&gt;
                break -- 没有后续匹配&lt;br /&gt;
            end&lt;br /&gt;
            currentNode = currentNode[char]&lt;br /&gt;
            -- 检查当前节点是否是某个源文本的结尾&lt;br /&gt;
            if currentNode.indices then&lt;br /&gt;
                -- mw.log(&amp;#039;Match found ending at pos&amp;#039;, j, &amp;#039;for source ending with char&amp;#039;, char)&lt;br /&gt;
                for _, index in ipairs(currentNode.indices) do&lt;br /&gt;
                    if not matchedIndices[index] then&lt;br /&gt;
                        -- mw.log(&amp;#039;Recording match for rule index:&amp;#039;, index)&lt;br /&gt;
                        matchedIndices[index] = true&lt;br /&gt;
                    end&lt;br /&gt;
                end&lt;br /&gt;
                -- 继续检查更长的匹配&lt;br /&gt;
            end&lt;br /&gt;
        end&lt;br /&gt;
    end&lt;br /&gt;
    -- mw.logObject(&amp;#039;Indices matched in text:&amp;#039;, matchedIndices)&lt;br /&gt;
    return matchedIndices&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
-- 主函数：筛选规则列表，只保留在目标页面内容中能匹配到的规则&lt;br /&gt;
-- 输入: rulesList - 包含规则字符串的列表&lt;br /&gt;
-- 输入: targetPageTitleOrText - 目标页面的标题字符串 或 直接的文本内容&lt;br /&gt;
-- 输出: filteredRules - 只包含匹配到的规则字符串的列表&lt;br /&gt;
function Matcher.filterRules(rulesList, targetPageTitleOrText)&lt;br /&gt;
    local filteredRules = {}&lt;br /&gt;
    if not rulesList or #rulesList == 0 then&lt;br /&gt;
        return filteredRules&lt;br /&gt;
    end&lt;br /&gt;
&lt;br /&gt;
    local textContent&lt;br /&gt;
    if type(targetPageTitleOrText) == &amp;#039;string&amp;#039; then&lt;br /&gt;
        -- 检查是页面标题还是直接文本&lt;br /&gt;
        local titleObj = mw.title.new(targetPageTitleOrText)&lt;br /&gt;
        if titleObj and titleObj.exists then&lt;br /&gt;
            -- 是有效的页面标题，获取内容&lt;br /&gt;
             textContent = titleObj:getContent()&lt;br /&gt;
             -- mw.log(&amp;#039;Matching against content of page:&amp;#039;, targetPageTitleOrText)&lt;br /&gt;
        else&lt;br /&gt;
            -- 认为是直接的文本内容&lt;br /&gt;
             textContent = targetPageTitleOrText&lt;br /&gt;
             -- mw.log(&amp;#039;Matching against provided text string.&amp;#039;)&lt;br /&gt;
        end&lt;br /&gt;
    else&lt;br /&gt;
        -- 如果传入的是 title 对象&lt;br /&gt;
        if targetPageTitleOrText and targetPageTitleOrText.getContent then&lt;br /&gt;
             textContent = targetPageTitleOrText:getContent()&lt;br /&gt;
             -- mw.log(&amp;#039;Matching against content of provided title object:&amp;#039;, targetPageTitleOrText.prefixedText)&lt;br /&gt;
        else&lt;br /&gt;
             -- mw.log(&amp;#039;Invalid target provided for matching.&amp;#039;)&lt;br /&gt;
             return filteredRules -- 无法获取内容，返回空&lt;br /&gt;
        end&lt;br /&gt;
    end&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
    if not textContent or textContent == &amp;#039;&amp;#039; then&lt;br /&gt;
        -- mw.log(&amp;#039;Target content is empty, no rules will match.&amp;#039;)&lt;br /&gt;
        return filteredRules -- 没有内容可匹配&lt;br /&gt;
    end&lt;br /&gt;
&lt;br /&gt;
    local trie = Matcher.buildRuleTrie(rulesList)&lt;br /&gt;
    local matchedIndices = Matcher.matchTextWithTrie(textContent, trie)&lt;br /&gt;
&lt;br /&gt;
    for index, rule in ipairs(rulesList) do&lt;br /&gt;
        if matchedIndices[index] then&lt;br /&gt;
            table.insert(filteredRules, rule)&lt;br /&gt;
            -- mw.log(&amp;#039;Rule matched and kept:&amp;#039;, rule)&lt;br /&gt;
        -- else&lt;br /&gt;
            -- mw.log(&amp;#039;Rule did not match:&amp;#039;, rule)&lt;br /&gt;
        end&lt;br /&gt;
    end&lt;br /&gt;
&lt;br /&gt;
    return filteredRules&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
-- 专门用于匹配标题的函数，只使用标题文本进行匹配&lt;br /&gt;
function Matcher.filterRulesAgainstTitleText(rulesList, pageTitle)&lt;br /&gt;
    local filteredRules = {}&lt;br /&gt;
    if not rulesList or #rulesList == 0 then&lt;br /&gt;
        return filteredRules&lt;br /&gt;
    end&lt;br /&gt;
&lt;br /&gt;
    local titleObj = mw.title.new(pageTitle)&lt;br /&gt;
    if not titleObj then&lt;br /&gt;
        -- mw.log(&amp;#039;Invalid title provided for title text matching:&amp;#039;, pageTitle)&lt;br /&gt;
        return filteredRules&lt;br /&gt;
    end&lt;br /&gt;
&lt;br /&gt;
    local titleText = titleObj.text -- 获取不含名字空间的标题文本&lt;br /&gt;
    -- mw.log(&amp;#039;Matching rules against title text:&amp;#039;, titleText)&lt;br /&gt;
&lt;br /&gt;
    if not titleText or titleText == &amp;#039;&amp;#039; then&lt;br /&gt;
        -- mw.log(&amp;#039;Title text is empty, no rules will match.&amp;#039;)&lt;br /&gt;
        return filteredRules&lt;br /&gt;
    end&lt;br /&gt;
&lt;br /&gt;
    local trie = Matcher.buildRuleTrie(rulesList)&lt;br /&gt;
    local matchedIndices = Matcher.matchTextWithTrie(titleText, trie)&lt;br /&gt;
&lt;br /&gt;
    for index, rule in ipairs(rulesList) do&lt;br /&gt;
        if matchedIndices[index] then&lt;br /&gt;
            table.insert(filteredRules, rule)&lt;br /&gt;
            -- mw.log(&amp;#039;Rule matched title text and kept:&amp;#039;, rule)&lt;br /&gt;
        end&lt;br /&gt;
    end&lt;br /&gt;
&lt;br /&gt;
    return filteredRules&lt;br /&gt;
end&lt;br /&gt;
&lt;br /&gt;
&lt;br /&gt;
return Matcher&lt;/div&gt;</summary>
		<author><name>imported&gt;PexEric</name></author>
	</entry>
</feed>