Skip to content

17.07. Baby Names

Description

Each year, the government releases a list of the 10000 most common baby names and their frequencies (the number of babies with that name). The only problem with this is that some names have multiple spellings. For example,"John" and ''Jon" are essentially the same name but would be listed separately in the list. Given two lists, one of names/frequencies and the other of pairs of equivalent names, write an algorithm to print a new list of the true frequency of each name. Note that if John and Jon are synonyms, and Jon and Johnny are synonyms, then John and Johnny are synonyms. (It is both transitive and symmetric.) In the final list, choose the name that are lexicographically smallest as the "real" name.

Example:


Input: names = ["John(15)","Jon(12)","Chris(13)","Kris(4)","Christopher(19)"], synonyms = ["(Jon,John)","(John,Johnny)","(Chris,Kris)","(Chris,Christopher)"]

Output: ["John(27)","Chris(36)"]

Note:

  • names.length <= 100000

Solutions

Solution 1: Hash Table + DFS

For each pair of synonyms, we establish bidirectional edges between the two names and store them in the adjacency list $g$. Then, we traverse all names, store them in the set $s$, and store their frequencies in the hash table $cnt$.

Next, we traverse each name in the set $s$. If the name has not been visited, we perform a depth-first search to find all names in the connected component where the name is located. We use the name with the smallest lexicographic order as the real name, and the sum of their frequencies is the frequency of the real name. Then, we store this name and its frequency in the answer array.

After traversing all names, the answer array is what we seek.

The time complexity is $O(n + m)$, and the space complexity is $O(n + m)$. Where $n$ and $m$ are the lengths of the name array and the synonym array, respectively.

 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
class Solution:
    def trulyMostPopular(self, names: List[str], synonyms: List[str]) -> List[str]:
        def dfs(a):
            vis.add(a)
            mi, x = a, cnt[a]
            for b in g[a]:
                if b not in vis:
                    t, y = dfs(b)
                    if mi > t:
                        mi = t
                    x += y
            return mi, x

        g = defaultdict(list)
        for e in synonyms:
            a, b = e[1:-1].split(',')
            g[a].append(b)
            g[b].append(a)
        s = set()
        cnt = defaultdict(int)
        for x in names:
            name, freq = x[:-1].split("(")
            s.add(name)
            cnt[name] = int(freq)
        vis = set()
        ans = []
        for name in s:
            if name not in vis:
                name, freq = dfs(name)
                ans.append(f"{name}({freq})")
        return ans
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
class Solution {
    private Map<String, List<String>> g = new HashMap<>();
    private Map<String, Integer> cnt = new HashMap<>();
    private Set<String> vis = new HashSet<>();
    private int freq;

    public String[] trulyMostPopular(String[] names, String[] synonyms) {
        for (String pairs : synonyms) {
            String[] pair = pairs.substring(1, pairs.length() - 1).split(",");
            String a = pair[0], b = pair[1];
            g.computeIfAbsent(a, k -> new ArrayList<>()).add(b);
            g.computeIfAbsent(b, k -> new ArrayList<>()).add(a);
        }
        Set<String> s = new HashSet<>();
        for (String x : names) {
            int i = x.indexOf('(');
            String name = x.substring(0, i);
            s.add(name);
            cnt.put(name, Integer.parseInt(x.substring(i + 1, x.length() - 1)));
        }
        List<String> res = new ArrayList<>();
        for (String name : s) {
            if (!vis.contains(name)) {
                freq = 0;
                name = dfs(name);
                res.add(name + "(" + freq + ")");
            }
        }
        return res.toArray(new String[0]);
    }

    private String dfs(String a) {
        String mi = a;
        vis.add(a);
        freq += cnt.getOrDefault(a, 0);
        for (String b : g.getOrDefault(a, new ArrayList<>())) {
            if (!vis.contains(b)) {
                String t = dfs(b);
                if (t.compareTo(mi) < 0) {
                    mi = t;
                }
            }
        }
        return mi;
    }
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
class Solution {
public:
    vector<string> trulyMostPopular(vector<string>& names, vector<string>& synonyms) {
        unordered_map<string, vector<string>> g;
        unordered_map<string, int> cnt;
        for (auto& e : synonyms) {
            int i = e.find(',');
            string a = e.substr(1, i - 1);
            string b = e.substr(i + 1, e.size() - i - 2);
            g[a].emplace_back(b);
            g[b].emplace_back(a);
        }
        unordered_set<string> s;
        for (auto& e : names) {
            int i = e.find('(');
            string name = e.substr(0, i);
            s.insert(name);
            cnt[name] += stoi(e.substr(i + 1, e.size() - i - 2));
        }
        unordered_set<string> vis;
        int freq = 0;

        function<string(string)> dfs = [&](string a) -> string {
            string res = a;
            vis.insert(a);
            freq += cnt[a];
            for (auto& b : g[a]) {
                if (!vis.count(b)) {
                    string t = dfs(b);
                    if (t < res) {
                        res = move(t);
                    }
                }
            }
            return move(res);
        };

        vector<string> ans;
        for (auto& name : s) {
            if (!vis.count(name)) {
                freq = 0;
                string x = dfs(name);
                ans.emplace_back(x + "(" + to_string(freq) + ")");
            }
        }
        return ans;
    }
};
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
func trulyMostPopular(names []string, synonyms []string) (ans []string) {
    g := map[string][]string{}
    for _, s := range synonyms {
        i := strings.Index(s, ",")
        a, b := s[1:i], s[i+1:len(s)-1]
        g[a] = append(g[a], b)
        g[b] = append(g[b], a)
    }
    s := map[string]struct{}{}
    cnt := map[string]int{}
    for _, e := range names {
        i := strings.Index(e, "(")
        name, num := e[:i], e[i+1:len(e)-1]
        x, _ := strconv.Atoi(num)
        cnt[name] += x
        s[name] = struct{}{}
    }
    freq := 0
    vis := map[string]struct{}{}
    var dfs func(string) string
    dfs = func(a string) string {
        vis[a] = struct{}{}
        freq += cnt[a]
        res := a
        for _, b := range g[a] {
            if _, ok := vis[b]; !ok {
                t := dfs(b)
                if t < res {
                    res = t
                }
            }
        }
        return res
    }
    for name := range s {
        if _, ok := vis[name]; !ok {
            freq = 0
            root := dfs(name)
            ans = append(ans, root+"("+strconv.Itoa(freq)+")")
        }
    }
    return
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
function trulyMostPopular(names: string[], synonyms: string[]): string[] {
    const map = new Map<string, string>();
    for (const synonym of synonyms) {
        const [k1, k2] = [...synonym]
            .slice(1, synonym.length - 1)
            .join('')
            .split(',');
        const [v1, v2] = [map.get(k1) ?? k1, map.get(k2) ?? k2];
        const min = v1 < v2 ? v1 : v2;
        const max = v1 < v2 ? v2 : v1;
        map.set(k1, min);
        map.set(k2, min);
        for (const [k, v] of map.entries()) {
            if (v === max) {
                map.set(k, min);
            }
        }
    }

    const keyCount = new Map<string, number>();
    for (const name of names) {
        const num = name.match(/\d+/)[0];
        const k = name.split('(')[0];
        const key = map.get(k) ?? k;
        keyCount.set(key, (keyCount.get(key) ?? 0) + Number(num));
    }
    return [...keyCount.entries()].map(([k, v]) => `${k}(${v})`);
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
class Solution {
    private var graph = [String: [String]]()
    private var count = [String: Int]()
    private var visited = Set<String>()
    private var freq: Int = 0

    func trulyMostPopular(_ names: [String], _ synonyms: [String]) -> [String] {
        for pair in synonyms {
            let cleanPair = pair.dropFirst().dropLast()
            let parts = cleanPair.split(separator: ",").map(String.init)
            let a = parts[0], b = parts[1]
            graph[a, default: []].append(b)
            graph[b, default: []].append(a)
        }

        var namesSet = Set<String>()
        for name in names {
            let index = name.firstIndex(of: "(")!
            let realName = String(name[..<index])
            namesSet.insert(realName)
            let num = Int(name[name.index(after: index)..<name.index(before: name.endIndex)])!
            count[realName] = num
        }

        var result = [String]()
        for name in namesSet {
            if !visited.contains(name) {
                freq = 0
                let representative = dfs(name)
                result.append("\(representative)(\(freq))")
            }
        }

        return result
    }

    private func dfs(_ name: String) -> String {
        var minName = name
        visited.insert(name)
        freq += count[name, default: 0]
        for neighbor in graph[name, default: []] {
            if !visited.contains(neighbor) {
                let temp = dfs(neighbor)
                if temp < minName {
                    minName = temp
                }
            }
        }
        return minName
    }
}

Comments