>>> from pattern.en import parse
>>>
>>> s = 'The mobile web is more important than mobile apps.'
>>> s = parse(s, relations=True, lemmata=True)
>>> print s
'The/DT/B-NP/O/NP-SBJ-1/the mobile/JJ/I-NP/O/NP-SBJ-1/mobile' ...
from spacy.en import English
nlp = English()
doc = nlp(u'A whole document.\nNo preprocessing require. Robust to arbitrary formating.')
for sent in doc:
for token in sent:
if token.is_alpha:
print token.orth_, token.tag_, token.head.lemma_
Choi et al。(2015) 发现 space 是可用的最快的依赖解析器。它每秒处理超过13000个句子,在一个线程上。在《华尔街日报》的标准评估中,它得分为92.7% ,比 CoreNLP 的任何一个模型都要准确1% 以上。
import os
from nltk.parse.corenlp import CoreNLPServer
# The server needs to know the location of the following files:
# - stanford-corenlp-X.X.X.jar
# - stanford-corenlp-X.X.X-models.jar
STANFORD = os.path.join("models", "stanford-corenlp-full-2018-02-27")
# Create the server
server = CoreNLPServer(
os.path.join(STANFORD, "stanford-corenlp-3.9.1.jar"),
os.path.join(STANFORD, "stanford-corenlp-3.9.1-models.jar"),
)
# Start the server in the background
server.start()