140 lines
2.7 KiB
Plaintext
140 lines
2.7 KiB
Plaintext
{
|
|
"cells": [
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 10,
|
|
"id": "a314a8ac",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"array([[0. , 0. ],\n",
|
|
" [0.26726124, 0.56694671]])"
|
|
]
|
|
},
|
|
"execution_count": 10,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"from sklearn.metrics.pairwise import cosine_similarity\n",
|
|
"import numpy as np\n",
|
|
"X = [[0, 0, 0], [1, 2, 3]]\n",
|
|
"Y = [[1, 0, 0], [1, 1, 0]]\n",
|
|
"cosine_similarity(X, Y)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 11,
|
|
"id": "4b560c4f",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": [
|
|
"sims = cosine_similarity(X)"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "d8d5d17a",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 34,
|
|
"id": "a1098a5a",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"name": "stdout",
|
|
"output_type": "stream",
|
|
"text": [
|
|
"(3, 3)\n",
|
|
"(array([0, 0, 1]), array([1, 2, 2]))\n",
|
|
"(3,)\n",
|
|
"mean sim: -0.3333333333333334 std: 0.47140452079103173\n"
|
|
]
|
|
}
|
|
],
|
|
"source": [
|
|
"# X = np.array([\n",
|
|
"# [0, 0, 0], \n",
|
|
"# [-1, 100, -1000],\n",
|
|
"# [-1, -2, -4]\n",
|
|
"# ]\n",
|
|
"# )\n",
|
|
"\n",
|
|
"X = np.array([\n",
|
|
" [0, 0, 0], \n",
|
|
" [1,1,1],\n",
|
|
" [-1, -1, -1]\n",
|
|
" ]\n",
|
|
" )\n",
|
|
"print(X.shape)\n",
|
|
"sims = cosine_similarity(X)\n",
|
|
"\n",
|
|
"triu_idxs = np.triu_indices_from(sims, k=1)\n",
|
|
"print(triu_idxs)\n",
|
|
"dist_vals = sims[triu_idxs]\n",
|
|
"print(dist_vals.shape)\n",
|
|
"print(\"mean sim:\", dist_vals.mean(), \"std:\", dist_vals.std())"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": 27,
|
|
"id": "2dacad18",
|
|
"metadata": {},
|
|
"outputs": [
|
|
{
|
|
"data": {
|
|
"text/plain": [
|
|
"array([0.])"
|
|
]
|
|
},
|
|
"execution_count": 27,
|
|
"metadata": {},
|
|
"output_type": "execute_result"
|
|
}
|
|
],
|
|
"source": [
|
|
"dist_vals"
|
|
]
|
|
},
|
|
{
|
|
"cell_type": "code",
|
|
"execution_count": null,
|
|
"id": "76d25e07",
|
|
"metadata": {},
|
|
"outputs": [],
|
|
"source": []
|
|
}
|
|
],
|
|
"metadata": {
|
|
"kernelspec": {
|
|
"display_name": "venv",
|
|
"language": "python",
|
|
"name": "python3"
|
|
},
|
|
"language_info": {
|
|
"codemirror_mode": {
|
|
"name": "ipython",
|
|
"version": 3
|
|
},
|
|
"file_extension": ".py",
|
|
"mimetype": "text/x-python",
|
|
"name": "python",
|
|
"nbconvert_exporter": "python",
|
|
"pygments_lexer": "ipython3",
|
|
"version": "3.10.12"
|
|
}
|
|
},
|
|
"nbformat": 4,
|
|
"nbformat_minor": 5
|
|
}
|