Preprocessing text for each analyis type
This commit is contained in:
@@ -1,5 +1,37 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": "Import Cell\n",
|
||||
"id": "dd72d1539056a64"
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-11-21T17:01:35.974978Z",
|
||||
"start_time": "2025-11-21T17:01:34.412508Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import token\n",
|
||||
"import spacy\n",
|
||||
"from spacy import displacy\n",
|
||||
"from IPython.display import display, HTML\n",
|
||||
"\n",
|
||||
"nlp = spacy.load(\"en_core_web_md\") # Medium size model\n",
|
||||
"\n",
|
||||
"test_sentences = [\n",
|
||||
" \"The cat sat on the mat.\",\n",
|
||||
" \"On the mat, the cat was sitting.\",\n",
|
||||
" \"A completely different sentence about something else.\"\n",
|
||||
"]"
|
||||
],
|
||||
"id": "12579bf734bb1a92",
|
||||
"outputs": [],
|
||||
"execution_count": 21
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
@@ -15,16 +47,6 @@
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import token\n",
|
||||
"import spacy\n",
|
||||
"\n",
|
||||
"nlp = spacy.load(\"en_core_web_md\") # Can swap for large model if required\n",
|
||||
"\n",
|
||||
"test_sentences = [\n",
|
||||
" \"The cat sat on the mat.\",\n",
|
||||
" \"On the mat, the cat was sitting.\",\n",
|
||||
" \"A completely different sentence about something else.\"\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"for sent in test_sentences:\n",
|
||||
" doc = nlp(sent)\n",
|
||||
@@ -55,28 +77,19 @@
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-11-20T19:53:11.868566Z",
|
||||
"start_time": "2025-11-20T19:53:11.861295Z"
|
||||
"end_time": "2025-11-21T16:32:45.216663Z",
|
||||
"start_time": "2025-11-21T16:32:42.601290Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import spacy\n",
|
||||
"import token\n",
|
||||
"\n",
|
||||
"nlp = spacy.load(\"en_core_web_md\")\n",
|
||||
"\n",
|
||||
"test_sentences = [\n",
|
||||
" \"The cat sat on the mat.\",\n",
|
||||
" \"On the mat, the cat was sitting.\",\n",
|
||||
" \"A completely different sentence about something else.\"\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"class TextPreprocessor:\n",
|
||||
" def __init__(self):\n",
|
||||
" self.nlp = spacy.load(\"en_core_web_md\")\n",
|
||||
"\n",
|
||||
" def direct_detection(self, text):\n",
|
||||
" @staticmethod\n",
|
||||
" def direct_detection(text):\n",
|
||||
" \"\"\"For direct copy detection\"\"\"\n",
|
||||
" #Keep punctuation\n",
|
||||
" return text.lower().strip()\n",
|
||||
@@ -84,36 +97,78 @@
|
||||
" def semantic_analysis(self, text):\n",
|
||||
" \"\"\"Semantic Similarity\"\"\"\n",
|
||||
" doc = self.nlp(text)\n",
|
||||
" tokens = []\n",
|
||||
" processed_tokens = []\n",
|
||||
" # Remove stopwords, punctuation\n",
|
||||
" for token in doc:\n",
|
||||
" if (not token.is_punct and not token.is_space and token.is_alpha and token.is_stop and len(token.lemma_) > 1): #Remove single char tokens\n",
|
||||
" tokens.append(token.lemma_.lower())\n",
|
||||
" return \" \".join(tokens)\n",
|
||||
"\n",
|
||||
" if not token.is_punct and not token.is_space and token.is_alpha and not token.is_stop:\n",
|
||||
" processed_tokens.append(token.lemma_.lower())\n",
|
||||
" return \" \".join(processed_tokens)\n",
|
||||
"\n",
|
||||
" def syntactic_analysis(self, text):\n",
|
||||
" \"\"\"Syntactic Similarity\"\"\"\n",
|
||||
" doc = self.nlp(text)\n",
|
||||
" processed_tokens = []\n",
|
||||
"\n",
|
||||
" # Normalize content words\n",
|
||||
" for token in doc:\n",
|
||||
" if token.is_space:\n",
|
||||
" continue\n",
|
||||
" elif token.is_punct:\n",
|
||||
" processed_tokens.append(token.text) # Keep punctuation\n",
|
||||
" elif token.is_stop:\n",
|
||||
" processed_tokens.append(token.lemma_.lower()) # Normalize stopwords\n",
|
||||
" else:\n",
|
||||
" processed_tokens.append(token.lemma_.lower()) # Normalize content words\n",
|
||||
" return \" \".join(processed_tokens)\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"preprocessor = TextPreprocessor()\n",
|
||||
"\n",
|
||||
"processed_direct = []\n",
|
||||
"processed_semantic = []\n",
|
||||
"processed_syntactic = []\n",
|
||||
"\n",
|
||||
"for sentence in test_sentences:\n",
|
||||
" processed_direct[iter] = preprocessor.semantic_analysis(sentence)\n",
|
||||
"#print(preprocessor.syntactic_analysis(\"A completely different sentence about something else.\"))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"for sent in test_sentences:\n",
|
||||
" print(f\"Original Sentence: {sent}\")\n",
|
||||
" print(\"---\")\n",
|
||||
" print(f\"Preprocessed Sentence: {preprocess_semantic(sent)}\")\n",
|
||||
" print(\"--- Semantic Analysis ---\")\n",
|
||||
" print(f\"Preprocessed Sentence: {preprocessor.semantic_analysis(sent)}\")\n",
|
||||
" print(\"--- Syntactic Analysis ---\")\n",
|
||||
" print(f\"Preprocessed Sentence: {preprocessor.syntactic_analysis(sent)}\")\n",
|
||||
" print(\"-\" * 50)"
|
||||
],
|
||||
"id": "5e488a878a5cfccb",
|
||||
"outputs": [
|
||||
{
|
||||
"ename": "IndentationError",
|
||||
"evalue": "expected an indented block after 'if' statement on line 26 (400725648.py, line 31)",
|
||||
"output_type": "error",
|
||||
"traceback": [
|
||||
" \u001B[36mCell\u001B[39m\u001B[36m \u001B[39m\u001B[32mIn[19]\u001B[39m\u001B[32m, line 31\u001B[39m\n\u001B[31m \u001B[39m\u001B[31mfor sent in test_sentences:\u001B[39m\n ^\n\u001B[31mIndentationError\u001B[39m\u001B[31m:\u001B[39m expected an indented block after 'if' statement on line 26\n"
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Original Sentence: The cat sat on the mat.\n",
|
||||
"--- Semantic Analysis ---\n",
|
||||
"Preprocessed Sentence: cat sit mat\n",
|
||||
"--- Syntactic Analysis ---\n",
|
||||
"Preprocessed Sentence: the cat sit on the mat .\n",
|
||||
"--------------------------------------------------\n",
|
||||
"Original Sentence: On the mat, the cat was sitting.\n",
|
||||
"--- Semantic Analysis ---\n",
|
||||
"Preprocessed Sentence: mat cat sit\n",
|
||||
"--- Syntactic Analysis ---\n",
|
||||
"Preprocessed Sentence: on the mat , the cat be sit .\n",
|
||||
"--------------------------------------------------\n",
|
||||
"Original Sentence: A completely different sentence about something else.\n",
|
||||
"--- Semantic Analysis ---\n",
|
||||
"Preprocessed Sentence: completely different sentence\n",
|
||||
"--- Syntactic Analysis ---\n",
|
||||
"Preprocessed Sentence: a completely different sentence about something else .\n",
|
||||
"--------------------------------------------------\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"execution_count": 19
|
||||
"execution_count": 17
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
@@ -124,9 +179,6 @@
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import spacy\n",
|
||||
"\n",
|
||||
"nlp = spacy.load(\"en_core_web_md\")\n",
|
||||
"\n",
|
||||
"def extract_parse_tree(text):\n",
|
||||
" doc = nlp(text)\n",
|
||||
@@ -140,12 +192,6 @@
|
||||
"\n",
|
||||
" return doc\n",
|
||||
"\n",
|
||||
"test_sentences = [\n",
|
||||
" \"The cat sat on the mat.\",\n",
|
||||
" \"On the mat, the cat was sitting.\",\n",
|
||||
" \"A completely different sentence about something else.\"\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"for sentence in test_sentences:\n",
|
||||
" doc = extract_parse_tree(sentence)\n",
|
||||
" print(\"\\n\" + \"=\"*60 + \"\\n\")"
|
||||
@@ -206,34 +252,36 @@
|
||||
],
|
||||
"execution_count": 15
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
"cell_type": "markdown",
|
||||
"source": "***USE NetworkX",
|
||||
"id": "5b5c8742d7c4c4c5"
|
||||
},
|
||||
{
|
||||
"metadata": {
|
||||
"ExecuteTime": {
|
||||
"end_time": "2025-11-20T15:46:08.461059Z",
|
||||
"start_time": "2025-11-20T15:45:47.529073Z"
|
||||
"end_time": "2025-11-21T18:20:09.575176Z",
|
||||
"start_time": "2025-11-21T18:20:09.465504Z"
|
||||
}
|
||||
},
|
||||
"cell_type": "code",
|
||||
"source": [
|
||||
"import spacy\n",
|
||||
"from spacy import displacy\n",
|
||||
"from IPython.display import display, HTML\n",
|
||||
"\n",
|
||||
"nlp = spacy.load(\"en_core_web_md\")\n",
|
||||
"\n",
|
||||
"test_sentences = [\n",
|
||||
" \"The cat sat on the mat.\",\n",
|
||||
" \"On the mat, the cat was sitting.\",\n",
|
||||
" \"A completely different sentence about something else.\"\n",
|
||||
"]\n",
|
||||
"\n",
|
||||
"def visualize_parse_tree(text):\n",
|
||||
" doc = nlp(text)\n",
|
||||
" html = displacy.render(doc, style=\"dep\", jupyter=False, options={\"distance\": 100})\n",
|
||||
" display(HTML(html))\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"\n",
|
||||
"for sentence in test_sentences:\n",
|
||||
" print(f\"Sentence: {sentence}\")\n",
|
||||
" print(\"---\")\n",
|
||||
" processed_sentence = preprocessor.syntactic_analysis(sentence)\n",
|
||||
" print(f\"Processed Sentence: \" + processed_sentence)\n",
|
||||
" visualize_parse_tree(processed_sentence)\n",
|
||||
" visualize_parse_tree(sentence)"
|
||||
],
|
||||
"id": "e413238c1af12f62",
|
||||
@@ -242,7 +290,9 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Sentence: The cat sat on the mat.\n"
|
||||
"Sentence: The cat sat on the mat.\n",
|
||||
"---\n",
|
||||
"Processed Sentence: the cat sit on the mat .\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -251,7 +301,92 @@
|
||||
"<IPython.core.display.HTML object>"
|
||||
],
|
||||
"text/html": [
|
||||
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"30acd41e2fa8417a92c8204d90501047-0\" class=\"displacy\" width=\"650\" height=\"237.0\" direction=\"ltr\" style=\"max-width: none; height: 237.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
|
||||
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"cbdcc1a7b2c3434da049b33ce22ba9dc-0\" class=\"displacy\" width=\"650\" height=\"237.0\" direction=\"ltr\" style=\"max-width: none; height: 237.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"147.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">the</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">DET</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"147.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"150\">cat</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"150\">NOUN</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"147.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"250\">sit</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"250\">VERB</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"147.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"350\">on</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"350\">ADP</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"147.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"450\">the</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"450\">DET</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"147.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"550\">mat .</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"550\">NOUN</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-cbdcc1a7b2c3434da049b33ce22ba9dc-0-0\" stroke-width=\"2px\" d=\"M70,102.0 C70,52.0 145.0,52.0 145.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-cbdcc1a7b2c3434da049b33ce22ba9dc-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M70,104.0 L62,92.0 78,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-cbdcc1a7b2c3434da049b33ce22ba9dc-0-1\" stroke-width=\"2px\" d=\"M170,102.0 C170,52.0 245.0,52.0 245.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-cbdcc1a7b2c3434da049b33ce22ba9dc-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M170,104.0 L162,92.0 178,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-cbdcc1a7b2c3434da049b33ce22ba9dc-0-2\" stroke-width=\"2px\" d=\"M270,102.0 C270,52.0 345.0,52.0 345.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-cbdcc1a7b2c3434da049b33ce22ba9dc-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">prep</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M345.0,104.0 L353.0,92.0 337.0,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-cbdcc1a7b2c3434da049b33ce22ba9dc-0-3\" stroke-width=\"2px\" d=\"M470,102.0 C470,52.0 545.0,52.0 545.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-cbdcc1a7b2c3434da049b33ce22ba9dc-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M470,104.0 L462,92.0 478,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-cbdcc1a7b2c3434da049b33ce22ba9dc-0-4\" stroke-width=\"2px\" d=\"M370,102.0 C370,2.0 550.0,2.0 550.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-cbdcc1a7b2c3434da049b33ce22ba9dc-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pobj</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M550.0,104.0 L558.0,92.0 542.0,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"</svg>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data",
|
||||
"jetTransient": {
|
||||
"display_id": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
],
|
||||
"text/html": [
|
||||
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"03663f6b8c874937b96eb1c0bf766ed5-0\" class=\"displacy\" width=\"650\" height=\"237.0\" direction=\"ltr\" style=\"max-width: none; height: 237.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"147.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">The</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">DET</tspan>\n",
|
||||
@@ -283,41 +418,41 @@
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-30acd41e2fa8417a92c8204d90501047-0-0\" stroke-width=\"2px\" d=\"M70,102.0 C70,52.0 145.0,52.0 145.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-03663f6b8c874937b96eb1c0bf766ed5-0-0\" stroke-width=\"2px\" d=\"M70,102.0 C70,52.0 145.0,52.0 145.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-30acd41e2fa8417a92c8204d90501047-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
||||
" <textPath xlink:href=\"#arrow-03663f6b8c874937b96eb1c0bf766ed5-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M70,104.0 L62,92.0 78,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-30acd41e2fa8417a92c8204d90501047-0-1\" stroke-width=\"2px\" d=\"M170,102.0 C170,52.0 245.0,52.0 245.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-03663f6b8c874937b96eb1c0bf766ed5-0-1\" stroke-width=\"2px\" d=\"M170,102.0 C170,52.0 245.0,52.0 245.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-30acd41e2fa8417a92c8204d90501047-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n",
|
||||
" <textPath xlink:href=\"#arrow-03663f6b8c874937b96eb1c0bf766ed5-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M170,104.0 L162,92.0 178,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-30acd41e2fa8417a92c8204d90501047-0-2\" stroke-width=\"2px\" d=\"M270,102.0 C270,52.0 345.0,52.0 345.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-03663f6b8c874937b96eb1c0bf766ed5-0-2\" stroke-width=\"2px\" d=\"M270,102.0 C270,52.0 345.0,52.0 345.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-30acd41e2fa8417a92c8204d90501047-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">prep</textPath>\n",
|
||||
" <textPath xlink:href=\"#arrow-03663f6b8c874937b96eb1c0bf766ed5-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">prep</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M345.0,104.0 L353.0,92.0 337.0,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-30acd41e2fa8417a92c8204d90501047-0-3\" stroke-width=\"2px\" d=\"M470,102.0 C470,52.0 545.0,52.0 545.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-03663f6b8c874937b96eb1c0bf766ed5-0-3\" stroke-width=\"2px\" d=\"M470,102.0 C470,52.0 545.0,52.0 545.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-30acd41e2fa8417a92c8204d90501047-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
||||
" <textPath xlink:href=\"#arrow-03663f6b8c874937b96eb1c0bf766ed5-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M470,104.0 L462,92.0 478,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-30acd41e2fa8417a92c8204d90501047-0-4\" stroke-width=\"2px\" d=\"M370,102.0 C370,2.0 550.0,2.0 550.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-03663f6b8c874937b96eb1c0bf766ed5-0-4\" stroke-width=\"2px\" d=\"M370,102.0 C370,2.0 550.0,2.0 550.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-30acd41e2fa8417a92c8204d90501047-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pobj</textPath>\n",
|
||||
" <textPath xlink:href=\"#arrow-03663f6b8c874937b96eb1c0bf766ed5-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pobj</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M550.0,104.0 L558.0,92.0 542.0,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
@@ -334,7 +469,9 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Sentence: On the mat, the cat was sitting.\n"
|
||||
"Sentence: On the mat, the cat was sitting.\n",
|
||||
"---\n",
|
||||
"Processed Sentence: on the mat , the cat be sit .\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -343,7 +480,105 @@
|
||||
"<IPython.core.display.HTML object>"
|
||||
],
|
||||
"text/html": [
|
||||
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"a6d4123128854110b9b87ac31b74258f-0\" class=\"displacy\" width=\"750\" height=\"287.0\" direction=\"ltr\" style=\"max-width: none; height: 287.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
|
||||
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"c413fa0318d04c93b9c3ffea0645caff-0\" class=\"displacy\" width=\"750\" height=\"287.0\" direction=\"ltr\" style=\"max-width: none; height: 287.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"197.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">on</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">ADP</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"197.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"150\">the</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"150\">DET</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"197.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"250\">mat ,</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"250\">NOUN</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"197.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"350\">the</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"350\">DET</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"197.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"450\">cat</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"450\">NOUN</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"197.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"550\">be</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"550\">AUX</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"197.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"650\">sit .</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"650\">VERB</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-c413fa0318d04c93b9c3ffea0645caff-0-0\" stroke-width=\"2px\" d=\"M70,152.0 C70,2.0 650.0,2.0 650.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-c413fa0318d04c93b9c3ffea0645caff-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">prep</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M70,154.0 L62,142.0 78,142.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-c413fa0318d04c93b9c3ffea0645caff-0-1\" stroke-width=\"2px\" d=\"M170,152.0 C170,102.0 240.0,102.0 240.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-c413fa0318d04c93b9c3ffea0645caff-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M170,154.0 L162,142.0 178,142.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-c413fa0318d04c93b9c3ffea0645caff-0-2\" stroke-width=\"2px\" d=\"M70,152.0 C70,52.0 245.0,52.0 245.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-c413fa0318d04c93b9c3ffea0645caff-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pobj</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M245.0,154.0 L253.0,142.0 237.0,142.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-c413fa0318d04c93b9c3ffea0645caff-0-3\" stroke-width=\"2px\" d=\"M370,152.0 C370,102.0 440.0,102.0 440.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-c413fa0318d04c93b9c3ffea0645caff-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M370,154.0 L362,142.0 378,142.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-c413fa0318d04c93b9c3ffea0645caff-0-4\" stroke-width=\"2px\" d=\"M470,152.0 C470,52.0 645.0,52.0 645.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-c413fa0318d04c93b9c3ffea0645caff-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M470,154.0 L462,142.0 478,142.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-c413fa0318d04c93b9c3ffea0645caff-0-5\" stroke-width=\"2px\" d=\"M570,152.0 C570,102.0 640.0,102.0 640.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-c413fa0318d04c93b9c3ffea0645caff-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">aux</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M570,154.0 L562,142.0 578,142.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"</svg>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data",
|
||||
"jetTransient": {
|
||||
"display_id": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
],
|
||||
"text/html": [
|
||||
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"0a7ff36b205d4165a3261fa3265e8c53-0\" class=\"displacy\" width=\"750\" height=\"287.0\" direction=\"ltr\" style=\"max-width: none; height: 287.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"197.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">On</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">ADP</tspan>\n",
|
||||
@@ -380,49 +615,49 @@
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-a6d4123128854110b9b87ac31b74258f-0-0\" stroke-width=\"2px\" d=\"M70,152.0 C70,2.0 650.0,2.0 650.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-0a7ff36b205d4165a3261fa3265e8c53-0-0\" stroke-width=\"2px\" d=\"M70,152.0 C70,2.0 650.0,2.0 650.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-a6d4123128854110b9b87ac31b74258f-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">prep</textPath>\n",
|
||||
" <textPath xlink:href=\"#arrow-0a7ff36b205d4165a3261fa3265e8c53-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">prep</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M70,154.0 L62,142.0 78,142.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-a6d4123128854110b9b87ac31b74258f-0-1\" stroke-width=\"2px\" d=\"M170,152.0 C170,102.0 240.0,102.0 240.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-0a7ff36b205d4165a3261fa3265e8c53-0-1\" stroke-width=\"2px\" d=\"M170,152.0 C170,102.0 240.0,102.0 240.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-a6d4123128854110b9b87ac31b74258f-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
||||
" <textPath xlink:href=\"#arrow-0a7ff36b205d4165a3261fa3265e8c53-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M170,154.0 L162,142.0 178,142.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-a6d4123128854110b9b87ac31b74258f-0-2\" stroke-width=\"2px\" d=\"M70,152.0 C70,52.0 245.0,52.0 245.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-0a7ff36b205d4165a3261fa3265e8c53-0-2\" stroke-width=\"2px\" d=\"M70,152.0 C70,52.0 245.0,52.0 245.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-a6d4123128854110b9b87ac31b74258f-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pobj</textPath>\n",
|
||||
" <textPath xlink:href=\"#arrow-0a7ff36b205d4165a3261fa3265e8c53-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pobj</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M245.0,154.0 L253.0,142.0 237.0,142.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-a6d4123128854110b9b87ac31b74258f-0-3\" stroke-width=\"2px\" d=\"M370,152.0 C370,102.0 440.0,102.0 440.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-0a7ff36b205d4165a3261fa3265e8c53-0-3\" stroke-width=\"2px\" d=\"M370,152.0 C370,102.0 440.0,102.0 440.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-a6d4123128854110b9b87ac31b74258f-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
||||
" <textPath xlink:href=\"#arrow-0a7ff36b205d4165a3261fa3265e8c53-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M370,154.0 L362,142.0 378,142.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-a6d4123128854110b9b87ac31b74258f-0-4\" stroke-width=\"2px\" d=\"M470,152.0 C470,52.0 645.0,52.0 645.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-0a7ff36b205d4165a3261fa3265e8c53-0-4\" stroke-width=\"2px\" d=\"M470,152.0 C470,52.0 645.0,52.0 645.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-a6d4123128854110b9b87ac31b74258f-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n",
|
||||
" <textPath xlink:href=\"#arrow-0a7ff36b205d4165a3261fa3265e8c53-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">nsubj</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M470,154.0 L462,142.0 478,142.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-a6d4123128854110b9b87ac31b74258f-0-5\" stroke-width=\"2px\" d=\"M570,152.0 C570,102.0 640.0,102.0 640.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-0a7ff36b205d4165a3261fa3265e8c53-0-5\" stroke-width=\"2px\" d=\"M570,152.0 C570,102.0 640.0,102.0 640.0,152.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-a6d4123128854110b9b87ac31b74258f-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">aux</textPath>\n",
|
||||
" <textPath xlink:href=\"#arrow-0a7ff36b205d4165a3261fa3265e8c53-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">aux</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M570,154.0 L562,142.0 578,142.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
@@ -439,7 +674,9 @@
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Sentence: A completely different sentence about something else.\n"
|
||||
"Sentence: A completely different sentence about something else.\n",
|
||||
"---\n",
|
||||
"Processed Sentence: a completely different sentence about something else .\n"
|
||||
]
|
||||
},
|
||||
{
|
||||
@@ -448,7 +685,105 @@
|
||||
"<IPython.core.display.HTML object>"
|
||||
],
|
||||
"text/html": [
|
||||
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"af01d7eb10f84122b89ef1cd5057e52c-0\" class=\"displacy\" width=\"750\" height=\"237.0\" direction=\"ltr\" style=\"max-width: none; height: 237.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
|
||||
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"358275480b4d4b099e836e8136c5b023-0\" class=\"displacy\" width=\"750\" height=\"237.0\" direction=\"ltr\" style=\"max-width: none; height: 237.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"147.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">a</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">DET</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"147.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"150\">completely</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"150\">ADV</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"147.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"250\">different</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"250\">ADJ</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"147.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"350\">sentence</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"350\">NOUN</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"147.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"450\">about</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"450\">ADP</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"147.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"550\">something</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"550\">PRON</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"147.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"650\">else .</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"650\">ADV</tspan>\n",
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-358275480b4d4b099e836e8136c5b023-0-0\" stroke-width=\"2px\" d=\"M70,102.0 C70,2.0 350.0,2.0 350.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-358275480b4d4b099e836e8136c5b023-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M70,104.0 L62,92.0 78,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-358275480b4d4b099e836e8136c5b023-0-1\" stroke-width=\"2px\" d=\"M170,102.0 C170,52.0 245.0,52.0 245.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-358275480b4d4b099e836e8136c5b023-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">advmod</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M170,104.0 L162,92.0 178,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-358275480b4d4b099e836e8136c5b023-0-2\" stroke-width=\"2px\" d=\"M270,102.0 C270,52.0 345.0,52.0 345.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-358275480b4d4b099e836e8136c5b023-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M270,104.0 L262,92.0 278,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-358275480b4d4b099e836e8136c5b023-0-3\" stroke-width=\"2px\" d=\"M370,102.0 C370,52.0 445.0,52.0 445.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-358275480b4d4b099e836e8136c5b023-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">prep</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M445.0,104.0 L453.0,92.0 437.0,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-358275480b4d4b099e836e8136c5b023-0-4\" stroke-width=\"2px\" d=\"M470,102.0 C470,52.0 545.0,52.0 545.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-358275480b4d4b099e836e8136c5b023-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pobj</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M545.0,104.0 L553.0,92.0 537.0,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-358275480b4d4b099e836e8136c5b023-0-5\" stroke-width=\"2px\" d=\"M570,102.0 C570,52.0 645.0,52.0 645.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-358275480b4d4b099e836e8136c5b023-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">advmod</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M645.0,104.0 L653.0,92.0 637.0,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"</svg>"
|
||||
]
|
||||
},
|
||||
"metadata": {},
|
||||
"output_type": "display_data",
|
||||
"jetTransient": {
|
||||
"display_id": null
|
||||
}
|
||||
},
|
||||
{
|
||||
"data": {
|
||||
"text/plain": [
|
||||
"<IPython.core.display.HTML object>"
|
||||
],
|
||||
"text/html": [
|
||||
"<svg xmlns=\"http://www.w3.org/2000/svg\" xmlns:xlink=\"http://www.w3.org/1999/xlink\" xml:lang=\"en\" id=\"bc8c620b992c40b5b591d785f5e540a4-0\" class=\"displacy\" width=\"750\" height=\"237.0\" direction=\"ltr\" style=\"max-width: none; height: 237.0px; color: #000000; background: #ffffff; font-family: Arial; direction: ltr\">\n",
|
||||
"<text class=\"displacy-token\" fill=\"currentColor\" text-anchor=\"middle\" y=\"147.0\">\n",
|
||||
" <tspan class=\"displacy-word\" fill=\"currentColor\" x=\"50\">A</tspan>\n",
|
||||
" <tspan class=\"displacy-tag\" dy=\"2em\" fill=\"currentColor\" x=\"50\">DET</tspan>\n",
|
||||
@@ -485,49 +820,49 @@
|
||||
"</text>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-af01d7eb10f84122b89ef1cd5057e52c-0-0\" stroke-width=\"2px\" d=\"M70,102.0 C70,2.0 350.0,2.0 350.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-bc8c620b992c40b5b591d785f5e540a4-0-0\" stroke-width=\"2px\" d=\"M70,102.0 C70,2.0 350.0,2.0 350.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-af01d7eb10f84122b89ef1cd5057e52c-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
||||
" <textPath xlink:href=\"#arrow-bc8c620b992c40b5b591d785f5e540a4-0-0\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">det</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M70,104.0 L62,92.0 78,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-af01d7eb10f84122b89ef1cd5057e52c-0-1\" stroke-width=\"2px\" d=\"M170,102.0 C170,52.0 245.0,52.0 245.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-bc8c620b992c40b5b591d785f5e540a4-0-1\" stroke-width=\"2px\" d=\"M170,102.0 C170,52.0 245.0,52.0 245.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-af01d7eb10f84122b89ef1cd5057e52c-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">advmod</textPath>\n",
|
||||
" <textPath xlink:href=\"#arrow-bc8c620b992c40b5b591d785f5e540a4-0-1\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">advmod</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M170,104.0 L162,92.0 178,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-af01d7eb10f84122b89ef1cd5057e52c-0-2\" stroke-width=\"2px\" d=\"M270,102.0 C270,52.0 345.0,52.0 345.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-bc8c620b992c40b5b591d785f5e540a4-0-2\" stroke-width=\"2px\" d=\"M270,102.0 C270,52.0 345.0,52.0 345.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-af01d7eb10f84122b89ef1cd5057e52c-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n",
|
||||
" <textPath xlink:href=\"#arrow-bc8c620b992c40b5b591d785f5e540a4-0-2\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">amod</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M270,104.0 L262,92.0 278,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-af01d7eb10f84122b89ef1cd5057e52c-0-3\" stroke-width=\"2px\" d=\"M370,102.0 C370,52.0 445.0,52.0 445.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-bc8c620b992c40b5b591d785f5e540a4-0-3\" stroke-width=\"2px\" d=\"M370,102.0 C370,52.0 445.0,52.0 445.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-af01d7eb10f84122b89ef1cd5057e52c-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">prep</textPath>\n",
|
||||
" <textPath xlink:href=\"#arrow-bc8c620b992c40b5b591d785f5e540a4-0-3\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">prep</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M445.0,104.0 L453.0,92.0 437.0,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-af01d7eb10f84122b89ef1cd5057e52c-0-4\" stroke-width=\"2px\" d=\"M470,102.0 C470,52.0 545.0,52.0 545.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-bc8c620b992c40b5b591d785f5e540a4-0-4\" stroke-width=\"2px\" d=\"M470,102.0 C470,52.0 545.0,52.0 545.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-af01d7eb10f84122b89ef1cd5057e52c-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pobj</textPath>\n",
|
||||
" <textPath xlink:href=\"#arrow-bc8c620b992c40b5b591d785f5e540a4-0-4\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">pobj</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M545.0,104.0 L553.0,92.0 537.0,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
"\n",
|
||||
"<g class=\"displacy-arrow\">\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-af01d7eb10f84122b89ef1cd5057e52c-0-5\" stroke-width=\"2px\" d=\"M570,102.0 C570,52.0 645.0,52.0 645.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <path class=\"displacy-arc\" id=\"arrow-bc8c620b992c40b5b591d785f5e540a4-0-5\" stroke-width=\"2px\" d=\"M570,102.0 C570,52.0 645.0,52.0 645.0,102.0\" fill=\"none\" stroke=\"currentColor\"/>\n",
|
||||
" <text dy=\"1.25em\" style=\"font-size: 0.8em; letter-spacing: 1px\">\n",
|
||||
" <textPath xlink:href=\"#arrow-af01d7eb10f84122b89ef1cd5057e52c-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">advmod</textPath>\n",
|
||||
" <textPath xlink:href=\"#arrow-bc8c620b992c40b5b591d785f5e540a4-0-5\" class=\"displacy-label\" startOffset=\"50%\" side=\"left\" fill=\"currentColor\" text-anchor=\"middle\">advmod</textPath>\n",
|
||||
" </text>\n",
|
||||
" <path class=\"displacy-arrowhead\" d=\"M645.0,104.0 L653.0,92.0 637.0,92.0\" fill=\"currentColor\"/>\n",
|
||||
"</g>\n",
|
||||
@@ -541,7 +876,7 @@
|
||||
}
|
||||
}
|
||||
],
|
||||
"execution_count": 14
|
||||
"execution_count": 32
|
||||
},
|
||||
{
|
||||
"metadata": {},
|
||||
|
||||
Reference in New Issue
Block a user