simple sentance parser and dataset tester program

This commit is contained in:
Henry Dowd
2025-10-08 14:55:08 +01:00
parent ed7046a8c0
commit 188a8e5852
11 changed files with 92 additions and 0 deletions

0
tools/__init__.py Normal file
View File

Binary file not shown.

Binary file not shown.

35
tools/parser.py Normal file
View File

@@ -0,0 +1,35 @@
import spacy
# English model
nlp = spacy.load("en_core_web_sm")
# Parse a single sentence
def parse_sentence(sentence):
doc = nlp(sentence)
print("Token-by-token analysis:")
for token in doc:
print(f"Text: {token.text:<12} Dep: {token.dep_:<10} Head: {token.head.text:<10} POS: {token.pos_:<8}")
return doc
def extract_dependency_relationships(doc):
"""Extract dependency relationships for graph representation"""
dependencies = []
for token in doc:
# Skip punctuation
if token.is_punct:
continue
dependency = {
'word': token.text,
'lemma': token.lemma_,
'dep_type': token.dep_,
'head': token.head.text,
'head_lemma': token.head.lemma_,
'pos': token.pos_
}
dependencies.append(dependency)
return dependencies