1import pytest 2import spacy 3 4en_text = ( 5 "When Sebastian Thrun started working on self-driving cars at " 6 "Google in 2007, few people outside of the company took him " 7 "seriously. “I can tell you very senior CEOs of major American " 8 "car companies would shake my hand and turn away because I wasn’t " 9 "worth talking to,” said Thrun, in an interview with Recode earlier " 10 "this week.") 11 12 13@pytest.fixture 14def en_core_web_sm(): 15 return spacy.load("en_core_web_sm") 16 17 18@pytest.fixture 19def doc_en_core_web_sm(en_core_web_sm): 20 return en_core_web_sm(en_text) 21 22 23def test_entities(doc_en_core_web_sm): 24 entities = list(map(lambda e: (e.text, e.label_), 25 doc_en_core_web_sm.ents)) 26 27 assert entities == [ 28 ('Sebastian Thrun', 'PERSON'), 29 ('Google', 'ORG'), 30 ('2007', 'DATE'), 31 ('American', 'NORP'), 32 ('Thrun', 'GPE'), 33 ('Recode', 'ORG'), 34 ('earlier this week', 'DATE'), 35 ] 36 37 38def test_nouns(doc_en_core_web_sm): 39 assert [ 40 chunk.text for chunk in doc_en_core_web_sm.noun_chunks] == [ 41 'Sebastian Thrun', 42 'self-driving cars', 43 'Google', 44 'few people', 45 'the company', 46 'him', 47 'I', 48 'you', 49 'very senior CEOs', 50 'major American car companies', 51 'my hand', 52 'I', 53 'Thrun', 54 'an interview', 55 'Recode'] 56 57 58def test_verbs(doc_en_core_web_sm): 59 assert [ 60 token.lemma_ for token in doc_en_core_web_sm if token.pos_ == "VERB"] == [ 61 'start', 62 'work', 63 'drive', 64 'take', 65 'tell', 66 'shake', 67 'turn', 68 'talk', 69 'say']