2019-11-02 22:27:54 +00:00
|
|
|
from bisect import bisect_left
|
|
|
|
import string
|
|
|
|
import sys
|
|
|
|
import os
|
|
|
|
|
|
|
|
class WhiteList:
|
|
|
|
|
|
|
|
def __init__(self, wordlist):
|
|
|
|
self.words = []
|
|
|
|
for line in wordlist:
|
2019-12-31 00:17:24 +00:00
|
|
|
self.words.append(line.strip(b'\n\r').lower())
|
2019-11-02 22:27:54 +00:00
|
|
|
|
|
|
|
self.words.sort()
|
|
|
|
self.numWords = len(self.words)
|
|
|
|
|
|
|
|
def cleanText(self, text):
|
2021-06-30 14:53:39 +00:00
|
|
|
if type(text) == bytes:
|
|
|
|
text = text.decode('utf-8').strip('.,?!')
|
|
|
|
else:
|
|
|
|
text = text.strip('.,?!')
|
2019-12-31 00:56:05 +00:00
|
|
|
text = text.lower().encode('utf-8')
|
2019-11-02 22:27:54 +00:00
|
|
|
return text
|
|
|
|
|
|
|
|
def isWord(self, text):
|
|
|
|
text = self.cleanText(text)
|
|
|
|
i = bisect_left(self.words, text)
|
|
|
|
if i == self.numWords:
|
|
|
|
return False
|
|
|
|
return self.words[i] == text
|
|
|
|
|
|
|
|
def isPrefix(self, text):
|
|
|
|
text = self.cleanText(text)
|
|
|
|
i = bisect_left(self.words, text)
|
|
|
|
if i == self.numWords:
|
|
|
|
return False
|
|
|
|
return self.words[i].startswith(text)
|
|
|
|
|
|
|
|
def prefixCount(self, text):
|
|
|
|
text = self.cleanText(text)
|
|
|
|
i = bisect_left(self.words, text)
|
|
|
|
j = i
|
|
|
|
while j < self.numWords and self.words[j].startswith(text):
|
|
|
|
j += 1
|
|
|
|
|
|
|
|
return j - i
|
|
|
|
|
|
|
|
def prefixList(self, text):
|
|
|
|
text = self.cleanText(text)
|
|
|
|
i = bisect_left(self.words, text)
|
|
|
|
j = i
|
|
|
|
while j < self.numWords and self.words[j].startswith(text):
|
|
|
|
j += 1
|
|
|
|
|
|
|
|
return self.words[i:j]
|