字典树(前缀树)-数据结构

skyyemperor ·2020-03-28 ·451 次阅读

前缀树

前缀树

字典树又名前缀树，Trie树，是一种存储大量字符串的树形数据结构，相比于HashMap存储，在存储单词（和语种无关，任意语言都可以）的场景上，节省了大量的内存空间。

下图演示了一个保存了8个单词的字典树的结构，8个单词分别是："A", "to", "tea", "ted", "ten", "i", "in", "inn"。

怎么理解这颗树呢？你从根节点走到叶子节点，尝试走一下所有的路径。你会发现，每条从根节点到叶子节点的路径都构成了单词（有的不需要走到叶子节点也是单词，比如 "i" 和 "in"）

1)单个字符串中,字符从前到后的加到一棵多叉树上 
2)字符放在路上,节点上有专属的数据项(常见的是pass和end值) 
3)所有样本都这样添加,如果没有路就新建,如有路就复用 
4)沿途节点的pass值增加1,每个字符串结束时来到的节点end值增加1 可以完成前缀相关的查询

使用数组实现

public class Trie {
    private Node root;

    static class Node {
        public int pass;//经过该节点的次数
        public int end;//以该节点为结尾的次数
        public Node[] nexts;

        public Node() {
            pass = 0;
            end = 0;
            nexts = new Node[26];
        }
    }

    public Trie() {
        root = new Node();
    }

    //增加word
    public void insert(String word) {
        if (word == null) {
            return;
        }
        char[] str = word.toCharArray();
        Node node = root;
        node.pass++;
        int path = 0;
        for (int i = 0; i < str.length; i++) { // 从左往右遍历字符
            path = str[i] - 'a'; // 由字符，对应成走向哪条路
            if (node.nexts[path] == null) {
                node.nexts[path] = new Node();
            }
            node = node.nexts[path];
            node.pass++;
        }
        node.end++;
    }

    //删除word
    public void delete(String word) {
        if (search(word) != 0) {
            char[] chs = word.toCharArray();
            Node node = root;
            node.pass--;
            int path = 0;
            for (int i = 0; i < chs.length; i++) {
                path = chs[i] - 'a';
                if (--node.nexts[path].pass == 0) {
                    node.nexts[path] = null;
                    return;
                }
                node = node.nexts[path];
            }
            node.end--;
        }
    }

    // word这个单词之前加入过几次
    public int search(String word) {
        if (word == null) {
            return 0;
        }
        char[] chs = word.toCharArray();
        Node node = root;
        int index = 0;
        for (int i = 0; i < chs.length; i++) {
            index = chs[i] - 'a';
            if (node.nexts[index] == null) {
                return 0;
            }
            node = node.nexts[index];
        }
        return node.end;
    }

    // 所有加入的字符串中，有几个是以pre这个字符串作为前缀的
    public int prefixNumber(String pre) {
        if (pre == null) {
            return 0;
        }
        char[] chs = pre.toCharArray();
        Node node = root;
        int index = 0;
        for (int i = 0; i < chs.length; i++) {
            index = chs[i] - 'a';
            if (node.nexts[index] == null) {
                return 0;
            }
            node = node.nexts[index];
        }
        return node.pass;
    }
}

使用HashMap实现

class Trie {
    Node root;

    static class Node {
        public int pass;
        public int end;
        public String str;
        public HashMap<Integer, Node> children;

        public Node() {
            pass = 0;
            end = 0;
            str = null;
            children = new HashMap<>();
        }
    }

    public Trie() {
        root = new Node();
    }

    public void insert(String word) {
        if (word == null) return;

        Node node = root;
        node.pass++;
        for (char c : word.toCharArray()) {
            int idx = (int) c;
            if (!node.children.containsKey(idx)) node.children.put(idx, new Node());
            node = node.children.get(idx);
            node.pass++;
        }
        node.str = word;
        node.end++;
    }

    public void delete(String word) {
        if (search(word) != 0) {
            Node node = root;
            node.pass--;
            for (char c : word.toCharArray()) {
                int idx = (int) c;
                if (--node.children.get(idx).pass == 0) {
                    node.children.remove(idx);
                    return;
                }
                node = node.children.get(idx);
            }
            node.end--;
        }
    }

    // word这个单词之前加入过几次
    public int search(String word) {
        if (word == null) return 0;

        Node node = root;
        for (char c : word.toCharArray()) {
            int idx = (int) c;
            if (!node.children.containsKey(idx)) return 0;
            node = node.children.get(idx);
        }
        return node.end;
    }

    // 所有加入的字符串中，有几个是以pre这个字符串作为前缀的
    public int prefixNumber(String word) {
        if (word == null) return 0;

        Node node = root;
        for (char c : word.toCharArray()) {
            int idx = (int) c;
            if (!node.children.containsKey(idx)) return 0;
            node = node.children.get(idx);
        }
        return node.pass;
    }
}

字符串匹配(可能包含 . )

/**
 * 如果数据结构中存在字符串与word匹配，则返回true;否则，返回false。
 * word中可能包含一些 '.' ，每个 . 都可以表示任何一个字母。
 */
public boolean search(String word) {
    return search(root, word, 0);
}

public boolean search(Node node, String word, int p) {
    if (p == word.length()) return node.end > 0;
    int c = word.charAt(p);
    int idx = (int) c;

    if (c == '.') {
        for (Node child : node.children.values())
            if (search(child, word, p + 1)) return true;
        return false;
    } else {
        if (!node.children.containsKey(idx)) return false;
        return search(node.children.get(idx), word, p + 1);
    }
}

魔法字典

/**
 * 判定能否只将字符串中一个字母换成另一个字母，使得所形成的新字符串能够与字典中的任一字符串匹配。
 */
public boolean searchMagic(String word) {
    int cnt = search(word);
    if (cnt > 0) delete(word); //先删除，防止匹配到原字符串
    boolean ans = searchMagic(root, word, 0, 1);
    if (cnt > 0) insert(word);
    return ans;
}

private boolean searchMagic(Node node, String word, int p, int k) {
    if (word.length() == p) return node.end > 0;
    char c = word.charAt(p);
    int idx = (int) c;

    //该节点匹配到该字符，继续向下匹配
    if (node.children.containsKey(idx)) {
        //这里返回true才return，防止匹配到但是end不为0
        if (searchMagic(node.children.get(idx), word, p + 1, k)) return true;
    }
    
    //没有匹配到则修改字符
    if (k > 0) {
        for (Node child : node.children.values())
            if (searchMagic(child, word, p + 1, --k)) return true;
        return false;
    }
    return false;
}

数组中的异或

数组中两个数的最大异或值

可以利用Trie树求出某一个num与数组中另一个有最大异或值的num。

之后重复n次，求出最大的异或值

static class Trie {
    private Node root;

    static class Node {
        public int pass;//经过该节点的次数
        public int end;//以该节点为结尾的次数
        public Node[] nexts;
        public int val;

        public Node() {
            nexts = new Node[26];
        }
    }

    public Trie() {
        root = new Node();
    }

    //增加word
    public void insert(int word) {
        int[] chs = new int[32];
        for (int i = 31; i >= 0; i--)
            chs[31 - i] = word >> i & 1;
        Node node = root;
        node.pass++;
        int path = 0;
        for (int i = 0; i < chs.length; i++) { // 从左往右遍历字符
            path = chs[i]; // 由字符，对应成走向哪条路
            if (node.nexts[path] == null) {
                node.nexts[path] = new Node();
            }
            node = node.nexts[path];
            node.pass++;
        }
        node.end++;
        node.val = word;
    }

    // word这个单词之前加入过几次
    public int search(int word) {
        int[] chs = new int[32];
        for (int i = 31; i >= 0; i--)
            chs[31 - i] = word >> i & 1;
        Node node = root;
        for (int i = 0; i < chs.length; i++) {
            int idx = chs[i];
            int opposite = idx == 0 ? 1 : 0;
            if (node.nexts[opposite] != null) {
                node = node.nexts[opposite];
            } else {
                node = node.nexts[idx];
            }
        }
        return word ^ node.val;
    }
}

public int findMaximumXOR(int[] nums) {
    Trie trie = new Trie();
    for (int num : nums) trie.insert(num);
    int res = 0;
    for (int num : nums) res = Math.max(res, trie.search(num));
    return res;
}

统计异或值在范围内的数对有多少

题目相当于[0,high+1) - [0,low)

要求 num异或小于一个值(x)的数对的数量，可以通过num^x得出一个偏向值。一层一层地遍历，若x的数位上为0，不做操作；若x的数位为1，则将相反的数位的数目相加，因为数位为0的话，这种情况是不会有数对比这个小的。

static class Trie {
    private Node root;

    static class Node {
        public int pass;//经过该节点的次数
        public int end;//以该节点为结尾的次数
        public Node[] nexts;
        public int val;

        public Node() {
            nexts = new Node[26];
        }
    }

    public Trie() {
        root = new Node();
    }

    //增加word
    public void insert(int word) {
        int[] str = new int[32];
        for (int i = 14; i >= 0; i--)
            str[14 - i] = word >> i & 1;
        Node node = root;
        node.pass++;
        int path = 0;
        for (int i = 0; i < str.length; i++) { // 从左往右遍历字符
            path = str[i]; // 由字符，对应成走向哪条路
            if (node.nexts[path] == null) {
                node.nexts[path] = new Node();
            }
            node = node.nexts[path];
            node.pass++;
        }
        node.end++;
        node.val = word;
    }

    // word这个单词之前加入过几次
    public int search(int word, int high) {
        Node node = root;
        int res = 0;
        for (int i = 14; i >= 0; i--) {
            int idx = word >> i & 1;
            int hi = high >> i & 1;
            int like = idx ^ hi;
            if (hi == 1) {
                int opposite = like == 1 ? 0 : 1;
                res += node.nexts[opposite] == null ? 0 : node.nexts[opposite].pass;
            }
            if (node.nexts[like] != null)
                node = node.nexts[like];
            else
                break;
        }
        return res;
    }
}

public int countPairs(int[] nums, int low, int high) {
    Trie trie = new Trie();
    for (int num : nums) trie.insert(num);
    int res = 0;
    for (int num : nums) {
        res += trie.search(num, high + 1) - trie.search(num, low);
    }
    return res / 2;
}

java利用HttpClient进行Get和Post请求

缓存机制

skyyemperor

skyyemperor

hhhhh

查看评论 - 0 条评论

Comments | 0 条评论

nice