{ "cells": [ { "metadata": {}, "cell_type": "code", "outputs": [], "execution_count": null, "source": [ "import spacy\n", "from notebook_functions import *\n", "\n", "def extract_all_features(sentence_pairs):\n", " features = []\n", " for sent1, sent2 in sentence_pairs:\n", " feature_vector = [\n", " jaccard_similarity(sent1, sent2),\n", " sentence_similarity_avg(sent1, sent2),\n", " sentence_similarity_sif(sent1, sent2),\n", " syntactic_similarity(sent1, sent2)\n", " ]" ], "id": "1c45d83192facfc6" }, { "metadata": {}, "cell_type": "code", "outputs": [], "execution_count": null, "source": [ "from sklearn.linear_model import LogisticRegression\n", "from sklearn.model_selection import train_test_split\n", "\n", "labled_pairs = []\n", "\n", "X = extract_all_features(labled_pairs)\n", "y = [0,1,0,1...] #Lables for pairs\n", "\n", "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)\n", "model = LogisticRegression()\n", "model.fit(X_train, y_train)\n" ], "id": "9665682bd5a7951e" } ], "metadata": {}, "nbformat": 4, "nbformat_minor": 5 }