数组
哈希表
字符串
计数
题目描述
给你一个字符串 paragraph
和一个表示禁用词的字符串数组 banned
,返回出现频率最高的非禁用词。题目数据 保证 至少存在一个非禁用词,且答案 唯一 。
paragraph
中的单词 不区分大小写 ,答案应以 小写 形式返回。
示例 1:
输入: paragraph = "Bob hit a ball, the hit BALL flew far after it was hit.", banned = ["hit"]
输出: "ball"
解释:
"hit" 出现了 3 次,但它是禁用词。
"ball" 出现了两次(没有其他单词出现这么多次),因此它是段落中出现频率最高的非禁用词。
请注意,段落中的单词不区分大小写,
标点符号会被忽略(即使它们紧挨着单词,如 "ball,"),
并且尽管 "hit" 出现的次数更多,但它不能作为答案,因为它是禁用词。
示例 2:
输入: paragraph = "a.", banned = []
输出: "a"
提示:
1 <= paragraph.length <= 1000
paragraph
由英文字母、空格 ' '
、和以下符号组成:"!?',;."
0 <= banned.length <= 100
1 <= banned[i].length <= 10
banned[i]
仅由小写英文字母组成
解法
方法一:正则匹配/双指针 + 哈希表
正则匹配(或双指针)找出所有单词,用哈希表统计每个单词出现的频率,找到出现未在 banned 中出现且频率最大的单词。
Python3 Java C++ Go TypeScript Rust
class Solution :
def mostCommonWord ( self , paragraph : str , banned : List [ str ]) -> str :
s = set ( banned )
p = Counter ( re . findall ( '[a-z]+' , paragraph . lower ()))
return next ( word for word , _ in p . most_common () if word not in s )
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 import java.util.regex.Matcher ;
import java.util.regex.Pattern ;
class Solution {
private static Pattern pattern = Pattern . compile ( "[a-z]+" );
public String mostCommonWord ( String paragraph , String [] banned ) {
Set < String > bannedWords = new HashSet <> ();
for ( String word : banned ) {
bannedWords . add ( word );
}
Map < String , Integer > counter = new HashMap <> ();
Matcher matcher = pattern . matcher ( paragraph . toLowerCase ());
while ( matcher . find ()) {
String word = matcher . group ();
if ( bannedWords . contains ( word )) {
continue ;
}
counter . put ( word , counter . getOrDefault ( word , 0 ) + 1 );
}
int max = Integer . MIN_VALUE ;
String ans = null ;
for ( Map . Entry < String , Integer > entry : counter . entrySet ()) {
if ( entry . getValue () > max ) {
max = entry . getValue ();
ans = entry . getKey ();
}
}
return ans ;
}
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25 class Solution {
public :
string mostCommonWord ( string paragraph , vector < string >& banned ) {
unordered_set < string > s ( banned . begin (), banned . end ());
unordered_map < string , int > counter ;
string ans ;
for ( int i = 0 , mx = 0 , n = paragraph . size (); i < n ;) {
if ( ! isalpha ( paragraph [ i ]) && ( ++ i > 0 )) continue ;
int j = i ;
string word ;
while ( j < n && isalpha ( paragraph [ j ])) {
word . push_back ( tolower ( paragraph [ j ]));
++ j ;
}
i = j + 1 ;
if ( s . count ( word )) continue ;
++ counter [ word ];
if ( counter [ word ] > mx ) {
ans = word ;
mx = counter [ word ];
}
}
return ans ;
}
};
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31 func mostCommonWord ( paragraph string , banned [] string ) string {
s := make ( map [ string ] bool )
for _ , w := range banned {
s [ w ] = true
}
counter := make ( map [ string ] int )
var ans string
for i , mx , n := 0 , 0 , len ( paragraph ); i < n ; {
if ! unicode . IsLetter ( rune ( paragraph [ i ])) {
i ++
continue
}
j := i
var word [] byte
for j < n && unicode . IsLetter ( rune ( paragraph [ j ])) {
word = append ( word , byte ( unicode . ToLower ( rune ( paragraph [ j ]))))
j ++
}
i = j + 1
t := string ( word )
if s [ t ] {
continue
}
counter [ t ] ++
if counter [ t ] > mx {
ans = t
mx = counter [ t ]
}
}
return ans
}
1
2
3
4
5
6
7
8
9
10
11
12 function mostCommonWord ( paragraph : string , banned : string []) : string {
const s = paragraph . toLocaleLowerCase ();
const map = new Map < string , number > ();
const set = new Set < string > ( banned );
for ( const word of s . split ( /[^A-z]/ )) {
if ( word === '' || set . has ( word )) {
continue ;
}
map . set ( word , ( map . get ( word ) ?? 0 ) + 1 );
}
return [... map . entries ()]. reduce (( r , v ) => ( v [ 1 ] > r [ 1 ] ? v : r ), [ '' , 0 ])[ 0 ];
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20 use std :: collections ::{ HashMap , HashSet };
impl Solution {
pub fn most_common_word ( mut paragraph : String , banned : Vec < String > ) -> String {
paragraph . make_ascii_lowercase ();
let banned : HashSet <& str > = banned . iter (). map ( String :: as_str ). collect ();
let mut map = HashMap :: new ();
for word in paragraph . split ( | c | ! matches! ( c , 'a' ..= 'z' )) {
if word . is_empty () || banned . contains ( word ) {
continue ;
}
let val = map . get ( & word ). unwrap_or ( & 0 ) + 1 ;
map . insert ( word , val );
}
map . into_iter ()
. max_by_key ( |& ( _ , v ) | v )
. unwrap ()
. 0
. to_string ()
}
}