simple sentance parser and dataset tester program
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
venv
|
||||
.vscode
|
||||
0
__init__.py
Normal file
0
__init__.py
Normal file
BIN
__pycache__/parser.cpython-313.pyc
Normal file
BIN
__pycache__/parser.cpython-313.pyc
Normal file
Binary file not shown.
14
main.py
Normal file
14
main.py
Normal file
@@ -0,0 +1,14 @@
|
||||
from tools.parser import parse_sentence, extract_dependency_relationships
|
||||
|
||||
def main():
|
||||
|
||||
sentence = "The quick brown fox jumps over the lazy dog."
|
||||
doc = parse_sentence(sentence)
|
||||
|
||||
dependencies = extract_dependency_relationships(doc)
|
||||
print("\nDependency relationships:")
|
||||
for dep in dependencies:
|
||||
print(f"{dep['word']} --{dep['dep_type']}--> {dep['head']}")
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
0
testing/__init__.py
Normal file
0
testing/__init__.py
Normal file
BIN
testing/__pycache__/datasets.cpython-313.pyc
Normal file
BIN
testing/__pycache__/datasets.cpython-313.pyc
Normal file
Binary file not shown.
41
testing/dataset_testing.py
Normal file
41
testing/dataset_testing.py
Normal file
@@ -0,0 +1,41 @@
|
||||
import spacy
|
||||
from tools import parser
|
||||
|
||||
# Load spaCy and dataset
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
dataset = parser.load_dataset("glue", "mrpc")
|
||||
|
||||
def process_sentence_pair(sentence1, sentence2):
|
||||
"""Parse both sentences and extract their dependency structures"""
|
||||
|
||||
# Parse both sentences
|
||||
doc1 = nlp(sentence1)
|
||||
doc2 = nlp(sentence2)
|
||||
|
||||
# Extract dependency graphs
|
||||
deps1 = parser.extract_dependency_relationships(doc1)
|
||||
deps2 = parser.extract_dependency_relationships(doc2)
|
||||
|
||||
return {
|
||||
'sentence1': sentence1,
|
||||
'sentence2': sentence2,
|
||||
'dependencies1': deps1,
|
||||
'dependencies2': deps2,
|
||||
'doc1': doc1,
|
||||
'doc2': doc2
|
||||
}
|
||||
|
||||
# Process a few examples from the dataset
|
||||
print("Processing MRPC examples...")
|
||||
for i in range(3): # Just do first 3 examples
|
||||
example = dataset['train'][i]
|
||||
result = process_sentence_pair(example['sentence1'], example['sentence2'])
|
||||
|
||||
print(f"\nExample {i+1}:")
|
||||
print(f"Sentence 1: {result['sentence1']}")
|
||||
print(f"Sentence 2: {result['sentence2']}")
|
||||
print(f"Label: {example['label']} (1=paraphrase, 0=not paraphrase)")
|
||||
|
||||
print(f"\nDependencies for Sentence 1:")
|
||||
for dep in result['dependencies1'][:5]: # Show first 5 dependencies
|
||||
print(f" {dep['word']} --{dep['dep_type']}--> {dep['head']}")
|
||||
0
tools/__init__.py
Normal file
0
tools/__init__.py
Normal file
BIN
tools/__pycache__/__init__.cpython-313.pyc
Normal file
BIN
tools/__pycache__/__init__.cpython-313.pyc
Normal file
Binary file not shown.
BIN
tools/__pycache__/parser.cpython-313.pyc
Normal file
BIN
tools/__pycache__/parser.cpython-313.pyc
Normal file
Binary file not shown.
35
tools/parser.py
Normal file
35
tools/parser.py
Normal file
@@ -0,0 +1,35 @@
|
||||
import spacy
|
||||
|
||||
# English model
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
|
||||
# Parse a single sentence
|
||||
def parse_sentence(sentence):
|
||||
doc = nlp(sentence)
|
||||
|
||||
print("Token-by-token analysis:")
|
||||
for token in doc:
|
||||
print(f"Text: {token.text:<12} Dep: {token.dep_:<10} Head: {token.head.text:<10} POS: {token.pos_:<8}")
|
||||
|
||||
return doc
|
||||
|
||||
def extract_dependency_relationships(doc):
|
||||
"""Extract dependency relationships for graph representation"""
|
||||
dependencies = []
|
||||
|
||||
for token in doc:
|
||||
# Skip punctuation
|
||||
if token.is_punct:
|
||||
continue
|
||||
|
||||
dependency = {
|
||||
'word': token.text,
|
||||
'lemma': token.lemma_,
|
||||
'dep_type': token.dep_,
|
||||
'head': token.head.text,
|
||||
'head_lemma': token.head.lemma_,
|
||||
'pos': token.pos_
|
||||
}
|
||||
dependencies.append(dependency)
|
||||
|
||||
return dependencies
|
||||
Reference in New Issue
Block a user