Files
embedding-clustering/extract/test.ipynb

140 lines
2.7 KiB
Plaintext
Raw Normal View History

2025-09-02 15:01:50 +00:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 10,
"id": "a314a8ac",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0. , 0. ],\n",
" [0.26726124, 0.56694671]])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.metrics.pairwise import cosine_similarity\n",
"import numpy as np\n",
"X = [[0, 0, 0], [1, 2, 3]]\n",
"Y = [[1, 0, 0], [1, 1, 0]]\n",
"cosine_similarity(X, Y)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "4b560c4f",
"metadata": {},
"outputs": [],
"source": [
"sims = cosine_similarity(X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8d5d17a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 34,
"id": "a1098a5a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(3, 3)\n",
"(array([0, 0, 1]), array([1, 2, 2]))\n",
"(3,)\n",
"mean sim: -0.3333333333333334 std: 0.47140452079103173\n"
]
}
],
"source": [
"# X = np.array([\n",
"# [0, 0, 0], \n",
"# [-1, 100, -1000],\n",
"# [-1, -2, -4]\n",
"# ]\n",
"# )\n",
"\n",
"X = np.array([\n",
" [0, 0, 0], \n",
" [1,1,1],\n",
" [-1, -1, -1]\n",
" ]\n",
" )\n",
"print(X.shape)\n",
"sims = cosine_similarity(X)\n",
"\n",
"triu_idxs = np.triu_indices_from(sims, k=1)\n",
"print(triu_idxs)\n",
"dist_vals = sims[triu_idxs]\n",
"print(dist_vals.shape)\n",
"print(\"mean sim:\", dist_vals.mean(), \"std:\", dist_vals.std())"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "2dacad18",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.])"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dist_vals"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "76d25e07",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}