跳转至

819. 最常见的单词

题目描述

给你一个字符串 paragraph 和一个表示禁用词的字符串数组 banned ,返回出现频率最高的非禁用词。题目数据 保证 至少存在一个非禁用词,且答案 唯一

paragraph 中的单词 不区分大小写 ,答案应以 小写 形式返回。

 

示例 1:

输入:paragraph = "Bob hit a ball, the hit BALL flew far after it was hit.", banned = ["hit"]
输出:"ball"
解释:
"hit" 出现了 3 次,但它是禁用词。
"ball" 出现了两次(没有其他单词出现这么多次),因此它是段落中出现频率最高的非禁用词。
请注意,段落中的单词不区分大小写,
标点符号会被忽略(即使它们紧挨着单词,如 "ball,"),
并且尽管 "hit" 出现的次数更多,但它不能作为答案,因为它是禁用词。

示例 2:

输入:paragraph = "a.", banned = []
输出:"a"

 

提示:

  • 1 <= paragraph.length <= 1000
  • paragraph 由英文字母、空格 ' '、和以下符号组成:"!?',;."
  • 0 <= banned.length <= 100
  • 1 <= banned[i].length <= 10
  • banned[i] 仅由小写英文字母组成

解法

方法一:正则匹配/双指针 + 哈希表

正则匹配(或双指针)找出所有单词,用哈希表统计每个单词出现的频率,找到出现未在 banned 中出现且频率最大的单词。

1
2
3
4
5
class Solution:
    def mostCommonWord(self, paragraph: str, banned: List[str]) -> str:
        s = set(banned)
        p = Counter(re.findall('[a-z]+', paragraph.lower()))
        return next(word for word, _ in p.most_common() if word not in s)
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
import java.util.regex.Matcher;
import java.util.regex.Pattern;

class Solution {
    private static Pattern pattern = Pattern.compile("[a-z]+");

    public String mostCommonWord(String paragraph, String[] banned) {
        Set<String> bannedWords = new HashSet<>();
        for (String word : banned) {
            bannedWords.add(word);
        }
        Map<String, Integer> counter = new HashMap<>();
        Matcher matcher = pattern.matcher(paragraph.toLowerCase());
        while (matcher.find()) {
            String word = matcher.group();
            if (bannedWords.contains(word)) {
                continue;
            }
            counter.put(word, counter.getOrDefault(word, 0) + 1);
        }
        int max = Integer.MIN_VALUE;
        String ans = null;
        for (Map.Entry<String, Integer> entry : counter.entrySet()) {
            if (entry.getValue() > max) {
                max = entry.getValue();
                ans = entry.getKey();
            }
        }
        return ans;
    }
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
class Solution {
public:
    string mostCommonWord(string paragraph, vector<string>& banned) {
        unordered_set<string> s(banned.begin(), banned.end());
        unordered_map<string, int> counter;
        string ans;
        for (int i = 0, mx = 0, n = paragraph.size(); i < n;) {
            if (!isalpha(paragraph[i]) && (++i > 0)) continue;
            int j = i;
            string word;
            while (j < n && isalpha(paragraph[j])) {
                word.push_back(tolower(paragraph[j]));
                ++j;
            }
            i = j + 1;
            if (s.count(word)) continue;
            ++counter[word];
            if (counter[word] > mx) {
                ans = word;
                mx = counter[word];
            }
        }
        return ans;
    }
};
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
func mostCommonWord(paragraph string, banned []string) string {
    s := make(map[string]bool)
    for _, w := range banned {
        s[w] = true
    }
    counter := make(map[string]int)
    var ans string
    for i, mx, n := 0, 0, len(paragraph); i < n; {
        if !unicode.IsLetter(rune(paragraph[i])) {
            i++
            continue
        }
        j := i
        var word []byte
        for j < n && unicode.IsLetter(rune(paragraph[j])) {
            word = append(word, byte(unicode.ToLower(rune(paragraph[j]))))
            j++
        }
        i = j + 1
        t := string(word)
        if s[t] {
            continue
        }
        counter[t]++
        if counter[t] > mx {
            ans = t
            mx = counter[t]
        }
    }
    return ans
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
function mostCommonWord(paragraph: string, banned: string[]): string {
    const s = paragraph.toLocaleLowerCase();
    const map = new Map<string, number>();
    const set = new Set<string>(banned);
    for (const word of s.split(/[^A-z]/)) {
        if (word === '' || set.has(word)) {
            continue;
        }
        map.set(word, (map.get(word) ?? 0) + 1);
    }
    return [...map.entries()].reduce((r, v) => (v[1] > r[1] ? v : r), ['', 0])[0];
}
 1
 2
 3
 4
 5
 6
 7
 8
 9
10
11
12
13
14
15
16
17
18
19
use std::collections::{ HashMap, HashSet };
impl Solution {
    pub fn most_common_word(mut paragraph: String, banned: Vec<String>) -> String {
        paragraph.make_ascii_lowercase();
        let banned: HashSet<&str> = banned.iter().map(String::as_str).collect();
        let mut map = HashMap::new();
        for word in paragraph.split(|c| !matches!(c, 'a'..='z')) {
            if word.is_empty() || banned.contains(word) {
                continue;
            }
            let val = map.get(&word).unwrap_or(&0) + 1;
            map.insert(word, val);
        }
        map.into_iter()
            .max_by_key(|&(_, v)| v)
            .unwrap()
            .0.to_string()
    }
}

评论