Removed old files and added MSRParaphrase parser
This commit is contained in:
6
basic-testing/parse_file.py
Normal file
6
basic-testing/parse_file.py
Normal file
@@ -0,0 +1,6 @@
|
||||
import pandas as pd
|
||||
|
||||
def load_msr_data(file_path):
|
||||
"""Load the MSR Paraphrase Corpus from a TSV file."""
|
||||
df = pd.read_csv("../data/processed/msr_paraphrase_train.txt", sep='\t', quoting=3)
|
||||
return df
|
||||
29
basic-testing/parse_tree.py
Normal file
29
basic-testing/parse_tree.py
Normal file
@@ -0,0 +1,29 @@
|
||||
import spacy
|
||||
from spacy import displacy
|
||||
|
||||
# Load the model
|
||||
nlp = spacy.load("en_core_web_sm")
|
||||
|
||||
def extract_parse_tree(text):
|
||||
"""Extract basic parse tree information"""
|
||||
doc = nlp(text)
|
||||
|
||||
print(f"Sentence: {text}")
|
||||
print("\nDependency Parse Tree:")
|
||||
print("-" * 50)
|
||||
|
||||
for token in doc:
|
||||
print(f"{token.text:<12} {token.dep_:<12} {token.head.text:<12} {[child.text for child in token.children]}")
|
||||
|
||||
return doc
|
||||
|
||||
# Test with some sentences
|
||||
test_sentences = [
|
||||
"The cat sat on the mat.",
|
||||
"A quick brown fox jumps over the lazy dog.",
|
||||
"She gave him the book yesterday."
|
||||
]
|
||||
|
||||
for sentence in test_sentences:
|
||||
doc = extract_parse_tree(sentence)
|
||||
print("\n" + "="*60 + "\n")
|
||||
Reference in New Issue
Block a user