一、引言
前缀树又叫字典树,可以快速查找字符串或字符串前缀出现的次数,方便进行前缀匹配、词频统计
二、字典树模型
现有一个字典树,里面有money、mother、salary、salary、say五个单词
其中根节点位置还没有字符,相当于空串,字符作为路径进行搜索,节点里有pass、end,
pass代表从根节点走到当前节点的次数(或者从根到当前节点的路径字符串前缀出现的次数)
end代表从根节点开始,当前节点结束的字符串出现次数
三、代码实现
方式一、构建实际字典树
class Trie {class TrieNode {int pass;int end;TrieNode[] path;TrieNode() {this.pass = 0;this.end = 0;this.path = new TrieNode[26];} TrieNode(int pass, int end) {this.pass = pass;this.end = end;this.path = new TrieNode[26];} }private TrieNode root;public Trie() {this.root = new TrieNode();}// 添加单词public void insert(String word) {TrieNode cur = root;// 插入字符串时经过该节点,记录经过次数cur.pass++;int len = word.length();for(int i = 0; i < len; i++) {int p = word.charAt(i) - 'a';// 找到字符路径对应的节点,如果不存在则创建if(cur.path[p] == null) {cur.path[p] = new TrieNode();}cur = cur.path[p];// 记录经过次数cur.pass++;}// 标记结束cur.end++;}// 统计单词出现次数public int countWordsEqualTo(String word) {// 找到对应的passint len = word.length();TrieNode cur = root;for(int i = 0; i < len; i++) {int p = word.charAt(i) - 'a';if(cur.path[p] == null) {return 0;}cur = cur.path[p];}return cur.end;}// 统计前缀出现次数public int countWordsStartingWith(String prefix) {// 找到对应的passint len = prefix.length();TrieNode cur = root;for(int i = 0; i < len; i++) {int p = prefix.charAt(i) - 'a';if(cur.path[p] == null) {return 0;}cur = cur.path[p];}return cur.pass;}// 删除单词public void erase(String word) {// 获取单词出现次数,如果出现次数为0则不必删除if(countWordsEqualTo(word) <= 0) {return;}int len = word.length();TrieNode cur = root;for(int i = 0; i < len; i++) {cur.pass--;int p = word.charAt(i) - 'a';cur = cur.path[p];}cur.pass--;cur.end--;}
}
方式二、用二维数组模拟字典树
class Trie {// 节点最大数量private static int NODE_MAX_SIZE = 20001;// 创建节点的路径信息,第0行不使用private static int[][] path = new int[NODE_MAX_SIZE][26];// 目标节点的passprivate static int[] pass = new int[NODE_MAX_SIZE];// 目标节点的endprivate static int[] end = new int[NODE_MAX_SIZE];// 计数器,代表节点的个数,默认path[1]为根节点private static int cnt = 1;public Trie() {for(int i = 1; i <= cnt; i++) {Arrays.fill(path[i], 0);pass[i] = 0;end[i] = 0;}cnt = 1;}public void insert(String word) {int cur = 1;pass[cur]++;int len = word.length();for(int i = 0; i < len; i++) {int p = word.charAt(i) - 'a';// 找到目标节点int target = path[cur][p];if(target == 0 || pass[target] == 0) {// 创建节点target = ++cnt;path[cur][p] = target;}// 记录passpass[target]++;cur = target;}// 记录endend[cur]++;}public int countWordsEqualTo(String word) {int cur = 1;int len = word.length();for(int i = 0; i < len; i++) {int p = word.charAt(i) - 'a';// 找到目标节点int target = path[cur][p];if(target == 0 || pass[target] == 0) {return 0;}cur = target;}// 记录endreturn end[cur];}public int countWordsStartingWith(String prefix) {int cur = 1;int len = prefix.length();for(int i = 0; i < len; i++) {int p = prefix.charAt(i) - 'a';// 找到目标节点int target = path[cur][p];if(target == 0 || pass[target] == 0) {return 0;}cur = target;}// 记录endreturn pass[cur];}public void erase(String word) {if(countWordsEqualTo(word) <= 0) {return;}int cur = 1;int len = word.length();for(int i = 0; i < len; i++) {int p = word.charAt(i) - 'a';// 找到目标节点int target = path[cur][p];if(--pass[target] == 0) {path[cur][p] = 0;}cur = target;}end[cur]--;}
}
四、刷题
牛客字典序题目:字典树的实现
参考代码:
import java.util.Scanner;
import java.io.*;// 注意类名必须为 Main, 不要有任何 package xxx 信息
public class Main {private static class Trie1 {class TrieNode {int pass;int end;TrieNode[] path;TrieNode() {this.pass = 0;this.end = 0;this.path = new TrieNode[26];}TrieNode(int pass, int end) {this.pass = pass;this.end = end;this.path = new TrieNode[26];}}TrieNode root;public Trie1() {this.root = new TrieNode();}public void insert(String word) {TrieNode cur = root;cur.pass++;int len = word.length();for (int i = 0; i < len; i++) {int p = word.charAt(i) - 'a';if (cur.path[p] == null) {cur.path[p] = new TrieNode();}cur = cur.path[p];cur.pass++;}// 标记结束cur.end++;}public int countWordsEqualTo(String word) {// 找到对应的passint len = word.length();TrieNode cur = root;for (int i = 0; i < len; i++) {int p = word.charAt(i) - 'a';if (cur.path[p] == null) {return 0;}cur = cur.path[p];}return cur.end;}public int countWordsStartingWith(String prefix) {// 找到对应的passint len = prefix.length();TrieNode cur = root;for (int i = 0; i < len; i++) {int p = prefix.charAt(i) - 'a';if (cur.path[p] == null) {return 0;}cur = cur.path[p];}return cur.pass;}public void erase(String word) {// 获取单词出现次数,如果出现次数为0则不必删除if (countWordsEqualTo(word) <= 0) {return;}int len = word.length();TrieNode cur = root;for (int i = 0; i < len; i++) {cur.pass--;int p = word.charAt(i) - 'a';cur = cur.path[p];}cur.pass--;cur.end--;}}public static void main(String[] args)throws Exception {StreamTokenizer input = new StreamTokenizer(new BufferedReader(new InputStreamReader(System.in)));PrintWriter output = new PrintWriter(new BufferedWriter(new OutputStreamWriter(System.out)));Trie1 trie = new Trie1();input.nextToken();int count = (int)input.nval;while (count-- > 0) {input.nextToken();int op = (int)input.nval;input.nextToken();String s = input.sval;if(op == 1) {trie.insert(s);} else if(op == 2) {trie.erase(s);} else if(op == 3) {boolean exists = trie.countWordsEqualTo(s) > 0;output.println(exists ? "YES" : "NO");} else {int prefixCount = trie.countWordsStartingWith(s);output.println(prefixCount);}}output.flush();output.close();}
}
力扣字典序题目(plus会员):1804. 实现 Trie (前缀树) II - 力扣(LeetCode)
参考代码:
class Trie {class TrieNode {int pass;int end;TrieNode[] path;TrieNode() {this.pass = 0;this.end = 0;this.path = new TrieNode[26];} TrieNode(int pass, int end) {this.pass = pass;this.end = end;this.path = new TrieNode[26];} }private TrieNode root;public Trie() {this.root = new TrieNode();}public void insert(String word) {TrieNode cur = root;// 插入字符串时经过该节点,记录经过次数cur.pass++;int len = word.length();for(int i = 0; i < len; i++) {int p = word.charAt(i) - 'a';// 找到字符路径对应的节点,如果不存在则创建if(cur.path[p] == null) {cur.path[p] = new TrieNode();}cur = cur.path[p];// 记录经过次数cur.pass++;}// 标记结束cur.end++;}public int countWordsEqualTo(String word) {// 找到对应的passint len = word.length();TrieNode cur = root;for(int i = 0; i < len; i++) {int p = word.charAt(i) - 'a';if(cur.path[p] == null) {return 0;}cur = cur.path[p];}return cur.end;}public int countWordsStartingWith(String prefix) {// 找到对应的passint len = prefix.length();TrieNode cur = root;for(int i = 0; i < len; i++) {int p = prefix.charAt(i) - 'a';if(cur.path[p] == null) {return 0;}cur = cur.path[p];}return cur.pass;}public void erase(String word) {// 获取单词出现次数,如果出现次数为0则不必删除if(countWordsEqualTo(word) <= 0) {return;}int len = word.length();TrieNode cur = root;for(int i = 0; i < len; i++) {int p = word.charAt(i) - 'a';TrieNode target = cur.path[p];// 如果移除之后pass=0说明单词不存在了,可以直接删除了if(--cur.pass == 0) {cur.path[p] = null;return;}cur = target;}cur.pass--;cur.end--;}
}