2021-03-24
阅读量:
11133
英文分词报错
from nltk.tokenize import word_tokenize
string=df['title'][0]
string
#输出结果为
'Economy Gains 227,000 Jobs in January Under President Trump - Breitbart'
word_tokenize(string)
报错提示为
---------------------------------------------------------------------------LookupError Traceback (most recent call last)<ipython-input-38-543e2cb9f883> in <module>----> 1 word_tokenize(string)C:\ProgramData\Anaconda3\lib\site-packages\nltk\tokenize\__init__.py in word_tokenize(text, language, preserve_line) 127 :type preserve_line: bool 128 """--> 129 sentences = [text] if preserve_line else sent_tokenize(text, language) 130 return [ 131 token for sent in sentences for token in _treebank_word_tokenizer.tokenize(sent)C:\ProgramData\Anaconda3\lib\site-packages\nltk\tokenize\__init__.py in sent_tokenize(text, language) 104 :param language: the model name in the Punkt corpus 105 """--> 106 tokenizer = load("tokenizers/punkt/{0}.pickle".format(language)) 107 return tokenizer.tokenize(text) 108 C:\ProgramData\Anaconda3\lib\site-packages\nltk\data.py in load(resource_url, format, cache, verbose, logic_parser, fstruct_reader, encoding) 750 751 # Load the resource.--> 752 opened_resource = _open(resource_url) 753 754 if format == "raw":C:\ProgramData\Anaconda3\lib\site-packages\nltk\data.py in _open(resource_url) 875 876 if protocol is None or protocol.lower() == "nltk":--> 877 return find(path_, path + [""]).open() 878 elif protocol.lower() == "file": 879 # urllib might not use mode='rb', so handle this one ourselves:C:\ProgramData\Anaconda3\lib\site-packages\nltk\data.py in find(resource_name, paths) 583 sep = "*" * 70 584 resource_not_found = "\n%s\n%s\n%s\n" % (sep, msg, sep)--> 585 raise LookupError(resource_not_found) 586 587 LookupError: ********************************************************************** Resource punkt not found. Please use the NLTK Downloader to obtain the resource: >>> import nltk >>> nltk.download('punkt') For more information see: https://www.nltk.org/data.html Attempted to load tokenizers/punkt/english.pickle Searched in: - 'C:\\Users\\chen/nltk_data' - 'C:\\ProgramData\\Anaconda3\\nltk_data' - 'C:\\ProgramData\\Anaconda3\\share\\nltk_data' - 'C:\\ProgramData\\Anaconda3\\lib\\nltk_data' - 'C:\\Users\\chen\\AppData\\Roaming\\nltk_data' - 'C:\\nltk_data' - 'D:\\nltk_data' - 'E:\\nltk_data' - '' **********************************************************************






评论(8)

推荐帖子
0条评论
0条评论
0条评论