2 Commits

Author SHA1 Message Date
878310a551 update source code and pipeline 2025-09-04 14:39:02 +00:00
9aabd991c5 check visison extract model 2025-09-02 15:01:50 +00:00
88 changed files with 27410 additions and 0 deletions

182
.gitignore vendored
View File

@@ -1,2 +1,184 @@
*.json *.json
extracted_images/ extracted_images/
temp/
result/
# Created by https://www.toptal.com/developers/gitignore/api/python
# Edit at https://www.toptal.com/developers/gitignore?templates=python
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
cover/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
.pybuilder/
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
# For a library or package, you might want to ignore these files since the code is
# intended to run in multiple environments; otherwise, check them in:
# .python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# poetry
# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
# This is especially recommended for binary packages to ensure reproducibility, and is more
# commonly ignored for libraries.
# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
#poetry.lock
# pdm
# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
#pdm.lock
# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
# in version control.
# https://pdm.fming.dev/#use-with-ide
.pdm.toml
# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# Cython debug symbols
cython_debug/
# PyCharm
# JetBrains specific template is maintained in a separate JetBrains.gitignore that can
# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
# and can be added to the global gitignore or merged into this file. For a more nuclear
# option (not recommended) you can uncomment the following to ignore the entire idea folder.
#.idea/
### Python Patch ###
# Poetry local configuration file - https://python-poetry.org/docs/configuration/#local-configuration
poetry.toml
# ruff
.ruff_cache/
# LSP config files
pyrightconfig.json
# End of https://www.toptal.com/developers/gitignore/api/python
Beta
0 / 0
used queries

View File

@@ -0,0 +1,61 @@
# pipeline
VLM model → feature extraction → clustering → data filtering → fine-tuning → LoRA export → LLVM serve → inference → evaluation
# 001 VLM model → feature extraction
```bash
cd extract
run clustering_example_qwen notebook
```
# 002 clustering grid search
```bash
cd cluster
# dbscan
python auto_cluster.py --embeddings_path /home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json --method dbscan
# gmm
python gmm_extensive.py --embeddings_path /home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json
```
- Cluster Result will save at:
- cluster/dbscan_results.json
- cluster/gmm_final_results.json
# 003 filter data
```bash
cd filter
bash run_filter.sh
```
- EMBEDDINGS_PATH: path to the embedding file generated in step 001
- CLUSTERING_RESULTS_PATH: path to the file containing the clustering information generated in step 002
- OUTPUT_PATH: path to save the retained files
- selection_ratio: proportion of data to be passed into the filter
- center_ratio: proportion of center points to take
- border_ratio: proportion of boundary points to take
# 004 create data from full data and filter data
```bash
cd filter
python3 create_label_data.py
```
- dbscan_results_path: OUTPUT_PATH produced in step 003
- label_data_path: ground truth after being filtered from the filtered data, has the same format as the full dataset, used for finetuning the VLM model
# visual and check data
```bash
cd check_filter
bash run.sh
```

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

1
check_filter/run.sh Normal file
View File

@@ -0,0 +1 @@
streamlit run visual_data.py --server.port 8501

View File

@@ -0,0 +1,579 @@
"""
Streamlit app để trực quan hóa embedding 3 chiều (PCA / UMAP / t-SNE) + phân cụm.
Chạy:
streamlit run visual_data.py --server.port 8501
Yêu cầu cài đặt (một lần):
pip install streamlit plotly scikit-learn umap-learn numpy pandas
Tính năng:
- Load file JSON lớn chứa các object {"filepath": ..., "embedding": [...]} hoặc định dạng JSON lines.
- Tùy chọn sample n phần tử (random) để tăng tốc.
- Chọn thuật toán giảm chiều: PCA, UMAP, t-SNE.
- Tham số điều chỉnh: n_neighbors, min_dist (UMAP); perplexity (t-SNE); n_components=3.
- KMeans clustering (tuỳ chọn) để tô màu điểm; hoặc tô màu theo regex/substring trong tên file.
- Lọc theo từ khóa trong đường dẫn.
- Tải xuống toạ độ 3D + nhãn cluster.
File embedding quan sát được có thể không phải JSON array chuẩn; script sẽ thử:
1. Parse như JSON array.
2. Parse như JSON lines (mỗi dòng 1 object).
3. Parse thủ công bằng cách tìm pattern {"filepath": ... , "embedding": [ ... ]}.
Nếu kích thước > ~1e6 bytes, dùng đọc streaming để giảm RAM.
"""
from __future__ import annotations
import json
import os
import re
import math
import random
from dataclasses import dataclass
from typing import Iterable, List, Optional, Tuple
import numpy as np
import pandas as pd
import streamlit as st
import colorsys
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
try:
import umap # type: ignore
except ImportError: # pragma: no cover
umap = None # handled later
@dataclass
class EmbeddingRecord:
filepath: str
embedding: List[float]
def _smart_json_object_stream(raw_text: str) -> Iterable[str]:
"""Yield JSON object strings from a large raw buffer.
Heuristic: find balanced braces starting with {"filepath": ...}.
This is a fallback when content is not standard array / jsonlines.
"""
brace = 0
buf = []
in_obj = False
for ch in raw_text:
if ch == '{':
if not in_obj:
in_obj = True
buf = ['{']
brace = 1
else:
brace += 1
buf.append(ch)
elif ch == '}':
if in_obj:
brace -= 1
buf.append('}')
if brace == 0:
yield ''.join(buf)
in_obj = False
else:
# stray closing
continue
else:
if in_obj:
buf.append(ch)
def load_embeddings(
path: str,
sample_size: Optional[int] = None,
sampling_seed: int = 42,
max_objects: Optional[int] = None,
) -> List[EmbeddingRecord]:
"""Load embeddings from a possibly large JSON / JSONL / raw file.
Args:
path: file path
sample_size: random sample (after load) if provided
sampling_seed: RNG seed
max_objects: hard cap to stop early (for speed)
"""
# size = os.path.getsize(path) # kích thước có thể dùng sau nếu muốn tối ưu đọc streaming
# First attempt: JSON array
records: List[EmbeddingRecord] = []
def to_rec(obj) -> Optional[EmbeddingRecord]:
if not isinstance(obj, dict):
return None
if 'embedding' in obj:
fp = str(obj.get('filepath') or obj.get('file_path') or obj.get('path') or '')
emb = obj['embedding']
if isinstance(emb, list) and fp:
return EmbeddingRecord(fp, emb)
return None
try:
with open(path, 'r', encoding='utf-8') as f:
text = f.read()
text_stripped = text.strip()
if text_stripped.startswith('[') and text_stripped.endswith(']'):
arr = json.loads(text_stripped)
for obj in arr:
rec = to_rec(obj)
if rec:
records.append(rec)
if max_objects and len(records) >= max_objects:
break
else:
raise ValueError('Not a JSON array')
except Exception:
# Retry as JSON lines
records = []
try:
with open(path, 'r', encoding='utf-8') as f:
for line in f:
line_strip = line.strip().rstrip(',')
if not line_strip:
continue
if not line_strip.startswith('{'):
continue
try:
obj = json.loads(line_strip)
rec = to_rec(obj)
if rec:
records.append(rec)
if max_objects and len(records) >= max_objects:
break
except json.JSONDecodeError:
continue
if not records:
raise ValueError('No JSONL records')
except Exception:
# Fallback: heuristic extraction
records = []
with open(path, 'r', encoding='utf-8') as f:
raw = f.read()
for obj_str in _smart_json_object_stream(raw):
if 'embedding' not in obj_str:
continue
# Clean possible trailing ',"
try:
# Attempt to fix malformed numbers like '1.2\n421875' (broken newline) by removing stray newlines inside arrays
fixed = re.sub(r"(\d)\n(\d)", r"\1\2", obj_str)
obj = json.loads(fixed)
except Exception:
continue
rec = to_rec(obj)
if rec:
records.append(rec)
if max_objects and len(records) >= max_objects:
break
if not records:
raise RuntimeError("Không load được embedding nào từ file.")
# Random sample if needed
if sample_size and sample_size < len(records):
random.seed(sampling_seed)
records = random.sample(records, sample_size)
return records
def reduce_embeddings(
X: np.ndarray,
method: str,
random_state: int = 42,
umap_neighbors: int = 15,
umap_min_dist: float = 0.1,
tsne_perplexity: int = 30,
tsne_learning_rate: str | float = 'auto',
) -> Tuple[np.ndarray, dict]:
"""Project high-dim embeddings to 3D.
Returns (coords (n,3), meta_info)
"""
meta = {"method": method}
if method == 'PCA':
pca = PCA(n_components=3, random_state=random_state)
coords = pca.fit_transform(X)
meta['explained_variance_ratio'] = pca.explained_variance_ratio_.tolist()
return coords, meta
if method == 'UMAP':
if umap is None:
raise RuntimeError("Chưa cài umap-learn: pip install umap-learn")
reducer = umap.UMAP(
n_components=3,
n_neighbors=umap_neighbors,
min_dist=umap_min_dist,
metric='cosine',
random_state=random_state,
)
coords = reducer.fit_transform(X)
meta['umap_graph_connectivity'] = float(reducer.graph_.getnnz())
return coords, meta
if method == 't-SNE':
perplexity = min(tsne_perplexity, max(5, (X.shape[0] - 1) // 3))
tsne = TSNE(
n_components=3,
perplexity=perplexity,
learning_rate=tsne_learning_rate,
init='pca',
random_state=random_state,
n_iter=1000,
verbose=0,
)
coords = tsne.fit_transform(X)
meta['effective_perplexity'] = perplexity
return coords, meta
raise ValueError(f"Unknown method {method}")
def kmeans_cluster(coords: np.ndarray, n_clusters: int, seed: int = 42) -> Tuple[np.ndarray, float]:
if n_clusters <= 1:
return np.zeros(coords.shape[0], dtype=int), float('nan')
km = KMeans(n_clusters=n_clusters, n_init='auto', random_state=seed)
labels = km.fit_predict(coords)
score = float('nan')
if len(set(labels)) > 1 and coords.shape[0] >= n_clusters * 5:
try:
score = silhouette_score(coords, labels)
except Exception:
pass
return labels, score
def build_dataframe(recs: List[EmbeddingRecord]) -> pd.DataFrame:
return pd.DataFrame({
'filepath': [r.filepath for r in recs],
'embedding': [r.embedding for r in recs],
})
def load_cluster_file(
path: str,
expected_n: int,
noise_label: int = -1,
) -> np.ndarray:
"""Load cluster labels from a JSON result file.
Supports formats:
- {"results": [ ... ]}
- [ ... ]
Each item may contain one of: cluster, cluster_id, label, is_noise, filepath.
If only is_noise exists: non-noise -> 0, noise -> noise_label.
If filepath present, mapping is done by filepath, otherwise by index order.
"""
try:
with open(path, 'r', encoding='utf-8') as f:
content = json.load(f)
except Exception as e:
raise RuntimeError(f'Không đọc được file cluster: {e}')
if isinstance(content, dict) and 'results' in content:
items = content['results']
elif isinstance(content, list):
items = content
else:
raise RuntimeError('Định dạng file cluster không hợp lệ (cần list hoặc có key "results").')
# Detect if filepath-based mapping
use_filepath = any(isinstance(it, dict) and 'filepath' in it for it in items)
labels = np.full(expected_n, noise_label, dtype=int)
if use_filepath:
# Build path->label
mapping = {}
for it in items:
if not isinstance(it, dict):
continue
fp = it.get('filepath')
if not fp:
continue
if 'cluster' in it:
val = it['cluster']
elif 'cluster_id' in it:
val = it['cluster_id']
elif 'label' in it:
val = it['label']
elif 'is_noise' in it:
val = (0 if not it.get('is_noise') else noise_label)
else:
val = 0
try:
mapping[str(fp)] = int(val)
except Exception:
continue
return labels, mapping # second value used later to map onto df
# Index-based mapping
collected = []
for it in items:
if not isinstance(it, dict):
# accept raw int labels
if isinstance(it, int):
collected.append(int(it))
continue
if 'cluster' in it:
val = it['cluster']
elif 'cluster_id' in it:
val = it['cluster_id']
elif 'label' in it:
val = it['label']
elif 'is_noise' in it:
val = (0 if not it.get('is_noise') else noise_label)
else:
val = 0
try:
collected.append(int(val))
except Exception:
collected.append(noise_label)
for i in range(min(expected_n, len(collected))):
labels[i] = collected[i]
return labels, None
def main(): # pragma: no cover - Streamlit entry
st.set_page_config(page_title="Embedding 3D Viewer", layout="wide")
st.title("🔍 Embedding 3D Viewer")
st.caption("Trực quan hóa tương quan embedding hóa đơn (Qwen2-VL).")
default_path = 'embeddings_factures_osteopathie_1k_qwen.json'
path = st.text_input('Đường dẫn file embedding', value=default_path)
col_top = st.columns(4)
sample_size = col_top[0].number_input('Sample (0 = all)', min_value=0, value=1000, step=100)
max_objects = col_top[1].number_input('Max objects đọc (0 = no limit)', min_value=0, value=0, step=500)
seed = col_top[2].number_input('Seed', min_value=0, value=42, step=1)
show_raw = col_top[3].checkbox('Hiện bảng raw', value=False)
algo = st.selectbox('Thuật toán giảm chiều', ['UMAP', 'PCA', 't-SNE'], index=0)
with st.expander('Tham số giảm chiều'):
if algo == 'UMAP':
umap_neighbors = st.slider('n_neighbors', 5, 100, 15, 1)
umap_min_dist = st.slider('min_dist', 0.0, 1.0, 0.1, 0.01)
tsne_perplexity = 30
elif algo == 't-SNE':
tsne_perplexity = st.slider('perplexity', 5, 100, 30, 1)
umap_neighbors = 15
umap_min_dist = 0.1
else:
umap_neighbors = 15
umap_min_dist = 0.1
tsne_perplexity = 30
with st.expander('Phân cụm & màu sắc'):
cluster_source = st.radio('Nguồn cluster', ['KMeans','Load file','None'], index=0, horizontal=True)
n_clusters = st.slider('Số cluster KMeans', 2, 100, 10, 1, disabled=(cluster_source != 'KMeans'))
cluster_file_path = st.text_input('Đường dẫn file cluster (JSON)', value='cluster/dbscan_results.json', disabled=(cluster_source != 'Load file'))
noise_label = st.number_input('Giá trị label cho noise', value=-1, step=1, disabled=(cluster_source != 'Load file'))
path_filter = st.text_input('Filter substring (lọc filepath, comma = OR)', value='')
color_by_substring = st.text_input('Color theo substring (vd: osteo, facture)', value='')
palette_name = st.selectbox('Bảng màu', ['auto','okabe-ito','tol','plotly','bold','dark24','set3','d3','hcl'], index=0, help='Chọn bảng màu dễ phân biệt hơn (okabe-ito, tol = thân thiện cho người mù màu)')
marker_size = st.slider('Kích thước điểm', 2, 15, 5, 1)
st.caption('Nếu file cluster chỉ chứa is_noise: noise -> noise_label, còn lại -> 0.')
load_btn = st.button('🚀 Load & Giảm chiều', type='primary')
if load_btn:
if not os.path.isfile(path):
st.error(f'File không tồn tại: {path}')
return
with st.spinner('Đang load embeddings...'):
recs = load_embeddings(
path,
sample_size=sample_size or None,
sampling_seed=int(seed),
max_objects=max_objects or None,
)
st.success(f'Loaded {len(recs)} embeddings')
df = build_dataframe(recs)
dim = len(df['embedding'].iloc[0])
st.write(f'Chiều gốc: {dim}')
# Filter
if path_filter.strip():
tokens = [t.strip() for t in path_filter.split(',') if t.strip()]
if tokens:
mask = df['filepath'].apply(lambda p: any(tok.lower() in p.lower() for tok in tokens))
df = df[mask].reset_index(drop=True)
st.info(f'Filter còn {len(df)} bản ghi.')
if df.empty:
return
X = np.vstack(df['embedding'].values).astype(np.float32)
with st.spinner('Đang giảm chiều...'):
coords, meta = reduce_embeddings(
X,
algo,
random_state=int(seed),
umap_neighbors=umap_neighbors,
umap_min_dist=umap_min_dist,
tsne_perplexity=tsne_perplexity,
)
df[['x', 'y', 'z']] = coords
st.write('Meta:', meta)
# Clustering (three modes)
if cluster_source == 'KMeans':
with st.spinner('KMeans clustering...'):
labels, sil = kmeans_cluster(coords, n_clusters, seed)
df['cluster'] = labels
if not math.isnan(sil):
st.write(f'Silhouette: {sil:.4f}')
else:
st.write('Silhouette: N/A')
elif cluster_source == 'Load file':
if not os.path.isfile(cluster_file_path):
st.error(f'Không tìm thấy file cluster: {cluster_file_path}')
return
with st.spinner('Đang nạp label từ file cluster...'):
loaded_labels, mapping = load_cluster_file(cluster_file_path, len(df), noise_label=int(noise_label))
if mapping is not None:
# map by filepath
labels = []
miss = 0
for fp in df['filepath']:
lab = mapping.get(fp)
if lab is None:
lab = int(noise_label)
miss += 1
labels.append(lab)
labels = np.array(labels, dtype=int)
if miss:
st.warning(f'{miss} filepath không tìm thấy trong file cluster gán noise.')
else:
labels = loaded_labels
df['cluster'] = labels
# Try silhouette when >1 cluster and not only noise
uniq = set(labels)
if len([u for u in uniq if u != int(noise_label)]) > 1:
try:
mask = labels != int(noise_label)
sil = silhouette_score(coords[mask], labels[mask])
st.write(f'Silhouette (exclude noise): {sil:.4f}')
except Exception:
st.write('Silhouette: N/A')
else:
st.write('Silhouette: N/A')
else: # None
df['cluster'] = -1
st.write('Không áp dụng clustering.')
# Color scheme
if color_by_substring.strip():
subs = [s.strip() for s in color_by_substring.split(',') if s.strip()]
def color_from_sub(p: str) -> str:
for i, ssub in enumerate(subs):
if ssub.lower() in p.lower():
return ssub
return 'other'
df['color_group'] = df['filepath'].apply(color_from_sub)
color_col = 'color_group'
else:
color_col = 'cluster'
# --- Palette handling -------------------------------------------------
def get_base_palette(name: str) -> List[str]:
name = name.lower()
if name == 'okabe-ito': # 8 colors, colorblind-safe
return ["#000000","#E69F00","#56B4E9","#009E73","#F0E442","#0072B2","#D55E00","#CC79A7"]
if name == 'tol': # Paul Tol (12)
return ["#4477AA","#66CCEE","#228833","#CCBB44","#EE6677","#AA3377","#BBBBBB","#000000","#EEDD88","#FFAABB","#99DDFF","#44BB99"]
if name == 'plotly':
from plotly.colors import qualitative as q
return q.Plotly
if name == 'bold':
from plotly.colors import qualitative as q
return q.Bold
if name == 'dark24':
from plotly.colors import qualitative as q
return q.Dark24
if name == 'set3':
from plotly.colors import qualitative as q
return q.Set3
if name == 'd3':
from plotly.colors import qualitative as q
return q.D3
return []
def generate_hcl_like(n: int) -> List[str]:
# Simple evenly spaced hues in HSV then adjust to look more balanced
cols = []
for i in range(n):
h = (i / n) % 1.0
s = 0.55 + 0.35 * ((i * 37) % 2) # alternate saturation
v = 0.85 if (i % 3) else 0.98
r, g, b = colorsys.hsv_to_rgb(h, s, v)
cols.append('#%02X%02X%02X' % (int(r*255), int(g*255), int(b*255)))
return cols
def build_palette(name: str, k: int) -> List[str]:
if name == 'auto':
return [] # let plotly decide
if name == 'hcl':
return generate_hcl_like(k)
base = get_base_palette(name)
if k <= len(base):
return base[:k]
# extend by generating extra colors using HSV golden ratio
cols = list(base)
gold = 0.61803398875
h = 0.1
while len(cols) < k:
h = (h + gold) % 1.0
r, g, b = colorsys.hsv_to_rgb(h, 0.6, 0.95)
newc = '#%02X%02X%02X' % (int(r*255), int(g*255), int(b*255))
if newc not in cols:
cols.append(newc)
return cols
color_display_col = color_col + '_display'
# ensure discrete (string) for numeric clusters
if np.issubdtype(df[color_col].dtype, np.number):
df[color_display_col] = df[color_col].astype(int).astype(str)
else:
df[color_display_col] = df[color_col].astype(str)
groups = df[color_display_col].unique()
palette_seq = build_palette(palette_name, len(groups)) if palette_name else []
import plotly.express as px
fig = px.scatter_3d(
df,
x='x', y='y', z='z',
color=color_display_col,
color_discrete_sequence=palette_seq if palette_seq else None,
hover_data={'filepath': True, 'cluster': True, 'x': ':.2f', 'y': ':.2f', 'z': ':.2f'},
title=f'Embedding 3D ({algo})',
opacity=0.9,
height=800,
)
fig.update_traces(marker={'size': int(marker_size)})
fig.update_layout(legend=dict(title='Nhóm', itemsizing='constant'))
st.plotly_chart(fig, use_container_width=True)
if show_raw:
st.dataframe(df.head(100))
# Download
out_csv = df[['filepath', 'x', 'y', 'z', 'cluster']].to_csv(index=False).encode('utf-8')
st.download_button('⬇️ Tải toạ độ 3D (CSV)', out_csv, file_name='embedding_3d.csv', mime='text/csv')
st.caption('Hoàn tất.')
if __name__ == '__main__': # pragma: no cover
# Khi chạy bằng 'streamlit run', sys.argv chỉ chứa tên file => ta luôn gọi main()
# Nếu muốn test nhanh CLI, có thể thêm arg '--cli-test'
if '--cli-test' in os.sys.argv:
test_path = 'embeddings_factures_osteopathie_1k_qwen.json'
if os.path.exists(test_path):
recs = load_embeddings(test_path, sample_size=5)
print(f'[CLI TEST] Loaded {len(recs)} embeddings dim={len(recs[0].embedding)}')
else:
print('[CLI TEST] Không tìm thấy file test.')
else:
main()

View File

@@ -0,0 +1,600 @@
"""
Streamlit app để trực quan hóa embedding 3 chiều (PCA / UMAP / t-SNE) + phân cụm.
Chạy:
streamlit run visual_data.py --server.port 8501
Yêu cầu cài đặt (một lần):
pip install streamlit plotly scikit-learn umap-learn numpy pandas
Tính năng:
- Load file JSON lớn chứa các object {"filepath": ..., "embedding": [...]} hoặc định dạng JSON lines.
- Tùy chọn sample n phần tử (random) để tăng tốc.
- Chọn thuật toán giảm chiều: PCA, UMAP, t-SNE.
- Tham số điều chỉnh: n_neighbors, min_dist (UMAP); perplexity (t-SNE); n_components=3.
- KMeans clustering (tuỳ chọn) để tô màu điểm; hoặc tô màu theo regex/substring trong tên file.
- Lọc theo từ khóa trong đường dẫn.
- Tải xuống toạ độ 3D + nhãn cluster.
File embedding quan sát được có thể không phải JSON array chuẩn; script sẽ thử:
1. Parse như JSON array.
2. Parse như JSON lines (mỗi dòng 1 object).
3. Parse thủ công bằng cách tìm pattern {"filepath": ... , "embedding": [ ... ]}.
Nếu kích thước > ~1e6 bytes, dùng đọc streaming để giảm RAM.
"""
from __future__ import annotations
import json
import os
import re
import math
import random
from dataclasses import dataclass
from typing import Iterable, List, Optional, Tuple, Dict, Any
import numpy as np
import pandas as pd
import streamlit as st
import colorsys
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
try:
import umap # type: ignore
except ImportError: # pragma: no cover
umap = None # handled later
@dataclass
class EmbeddingRecord:
filepath: str
embedding: List[float]
def _smart_json_object_stream(raw_text: str) -> Iterable[str]:
"""Yield JSON object strings from a large raw buffer.
Heuristic: find balanced braces starting with {"filepath": ...}.
This is a fallback when content is not standard array / jsonlines.
"""
brace = 0
buf = []
in_obj = False
for ch in raw_text:
if ch == '{':
if not in_obj:
in_obj = True
buf = ['{']
brace = 1
else:
brace += 1
buf.append(ch)
elif ch == '}':
if in_obj:
brace -= 1
buf.append('}')
if brace == 0:
yield ''.join(buf)
in_obj = False
else:
# stray closing
continue
else:
if in_obj:
buf.append(ch)
def load_embeddings(
path: str,
sample_size: Optional[int] = None,
sampling_seed: int = 42,
max_objects: Optional[int] = None,
) -> List[EmbeddingRecord]:
"""Load embeddings from a possibly large JSON / JSONL / raw file.
Args:
path: file path
sample_size: random sample (after load) if provided
sampling_seed: RNG seed
max_objects: hard cap to stop early (for speed)
"""
# size = os.path.getsize(path) # kích thước có thể dùng sau nếu muốn tối ưu đọc streaming
# First attempt: JSON array
records: List[EmbeddingRecord] = []
def to_rec(obj) -> Optional[EmbeddingRecord]:
if not isinstance(obj, dict):
return None
if 'embedding' in obj:
fp = str(obj.get('filepath') or obj.get('file_path') or obj.get('path') or '')
emb = obj['embedding']
if isinstance(emb, list) and fp:
return EmbeddingRecord(fp, emb)
return None
try:
with open(path, 'r', encoding='utf-8') as f:
text = f.read()
text_stripped = text.strip()
if text_stripped.startswith('[') and text_stripped.endswith(']'):
arr = json.loads(text_stripped)
for obj in arr:
rec = to_rec(obj)
if rec:
records.append(rec)
if max_objects and len(records) >= max_objects:
break
else:
raise ValueError('Not a JSON array')
except Exception:
# Retry as JSON lines
records = []
try:
with open(path, 'r', encoding='utf-8') as f:
for line in f:
line_strip = line.strip().rstrip(',')
if not line_strip:
continue
if not line_strip.startswith('{'):
continue
try:
obj = json.loads(line_strip)
rec = to_rec(obj)
if rec:
records.append(rec)
if max_objects and len(records) >= max_objects:
break
except json.JSONDecodeError:
continue
if not records:
raise ValueError('No JSONL records')
except Exception:
# Fallback: heuristic extraction
records = []
with open(path, 'r', encoding='utf-8') as f:
raw = f.read()
for obj_str in _smart_json_object_stream(raw):
if 'embedding' not in obj_str:
continue
# Clean possible trailing ',"
try:
# Attempt to fix malformed numbers like '1.2\n421875' (broken newline) by removing stray newlines inside arrays
fixed = re.sub(r"(\d)\n(\d)", r"\1\2", obj_str)
obj = json.loads(fixed)
except Exception:
continue
rec = to_rec(obj)
if rec:
records.append(rec)
if max_objects and len(records) >= max_objects:
break
if not records:
raise RuntimeError("Không load được embedding nào từ file.")
# Random sample if needed
if sample_size and sample_size < len(records):
random.seed(sampling_seed)
records = random.sample(records, sample_size)
return records
def reduce_embeddings(
X: np.ndarray,
method: str,
random_state: int = 42,
umap_neighbors: int = 15,
umap_min_dist: float = 0.1,
tsne_perplexity: int = 30,
tsne_learning_rate: str | float = 'auto',
) -> Tuple[np.ndarray, dict]:
"""Project high-dim embeddings to 3D.
Returns (coords (n,3), meta_info)
"""
meta = {"method": method}
if method == 'PCA':
pca = PCA(n_components=3, random_state=random_state)
coords = pca.fit_transform(X)
meta['explained_variance_ratio'] = pca.explained_variance_ratio_.tolist()
return coords, meta
if method == 'UMAP':
if umap is None:
raise RuntimeError("Chưa cài umap-learn: pip install umap-learn")
reducer = umap.UMAP(
n_components=3,
n_neighbors=umap_neighbors,
min_dist=umap_min_dist,
metric='cosine',
random_state=random_state,
)
coords = reducer.fit_transform(X)
meta['umap_graph_connectivity'] = float(reducer.graph_.getnnz())
return coords, meta
if method == 't-SNE':
perplexity = min(tsne_perplexity, max(5, (X.shape[0] - 1) // 3))
tsne = TSNE(
n_components=3,
perplexity=perplexity,
learning_rate=tsne_learning_rate,
init='pca',
random_state=random_state,
n_iter=1000,
verbose=0,
)
coords = tsne.fit_transform(X)
meta['effective_perplexity'] = perplexity
return coords, meta
raise ValueError(f"Unknown method {method}")
def kmeans_cluster(coords: np.ndarray, n_clusters: int, seed: int = 42) -> Tuple[np.ndarray, float]:
if n_clusters <= 1:
return np.zeros(coords.shape[0], dtype=int), float('nan')
km = KMeans(n_clusters=n_clusters, n_init='auto', random_state=seed)
labels = km.fit_predict(coords)
score = float('nan')
if len(set(labels)) > 1 and coords.shape[0] >= n_clusters * 5:
try:
score = silhouette_score(coords, labels)
except Exception:
pass
return labels, score
def build_dataframe(recs: List[EmbeddingRecord]) -> pd.DataFrame:
return pd.DataFrame({
'filepath': [r.filepath for r in recs],
'embedding': [r.embedding for r in recs],
})
def load_cluster_file(
path: str,
expected_n: int,
noise_label: int = -1,
) -> np.ndarray:
"""Load cluster labels from a JSON result file.
Supports formats:
- {"results": [ ... ]}
- [ ... ]
Each item may contain one of: cluster, cluster_id, label, is_noise, filepath.
If only is_noise exists: non-noise -> 0, noise -> noise_label.
If filepath present, mapping is done by filepath, otherwise by index order.
"""
try:
with open(path, 'r', encoding='utf-8') as f:
content = json.load(f)
except Exception as e:
raise RuntimeError(f'Không đọc được file cluster: {e}')
if isinstance(content, dict) and 'results' in content:
items = content['results']
elif isinstance(content, list):
items = content
else:
raise RuntimeError('Định dạng file cluster không hợp lệ (cần list hoặc có key "results").')
# Detect if filepath-based mapping
use_filepath = any(isinstance(it, dict) and 'filepath' in it for it in items)
labels = np.full(expected_n, noise_label, dtype=int)
if use_filepath:
# Build path->label
mapping = {}
for it in items:
if not isinstance(it, dict):
continue
fp = it.get('filepath')
if not fp:
continue
if 'cluster' in it:
val = it['cluster']
elif 'cluster_id' in it:
val = it['cluster_id']
elif 'label' in it:
val = it['label']
elif 'is_noise' in it:
val = (0 if not it.get('is_noise') else noise_label)
else:
val = 0
try:
mapping[str(fp)] = int(val)
except Exception:
continue
return labels, mapping # second value used later to map onto df
# Index-based mapping
collected = []
for it in items:
if not isinstance(it, dict):
# accept raw int labels
if isinstance(it, int):
collected.append(int(it))
continue
if 'cluster' in it:
val = it['cluster']
elif 'cluster_id' in it:
val = it['cluster_id']
elif 'label' in it:
val = it['label']
elif 'is_noise' in it:
val = (0 if not it.get('is_noise') else noise_label)
else:
val = 0
try:
collected.append(int(val))
except Exception:
collected.append(noise_label)
for i in range(min(expected_n, len(collected))):
labels[i] = collected[i]
return labels, None
def main(): # pragma: no cover - Streamlit entry
st.set_page_config(page_title="Embedding 3D Viewer", layout="wide")
st.title("🔍 Embedding 3D Viewer (Multi)")
st.caption("Mỗi lần nạp file sẽ thêm một đồ thị mới ở bên dưới.")
if 'plots' not in st.session_state:
st.session_state['plots']: List[Dict[str, Any]] = [] # type: ignore
with st.sidebar:
st.markdown('### ⚙️ Cấu hình chung')
default_path = 'embeddings_factures_osteopathie_1k_qwen.json'
path = st.text_input('Đường dẫn file embedding', value=default_path, key='path_input')
sample_size = st.number_input('Sample (0 = all)', min_value=0, value=1000, step=100, key='sample_size')
max_objects = st.number_input('Max objects đọc (0 = no limit)', min_value=0, value=0, step=500, key='max_objects')
seed = st.number_input('Seed', min_value=0, value=42, step=1, key='seed')
show_raw = st.checkbox('Hiện bảng raw (chỉ 100 dòng đầu mỗi plot)', value=False, key='show_raw')
algo = st.selectbox('Thuật toán giảm chiều', ['UMAP', 'PCA', 't-SNE'], index=0, key='algo')
if algo == 'UMAP':
umap_neighbors = st.slider('UMAP n_neighbors', 5, 100, 15, 1, key='umap_neighbors')
umap_min_dist = st.slider('UMAP min_dist', 0.0, 1.0, 0.1, 0.01, key='umap_min_dist')
tsne_perplexity = 30
elif algo == 't-SNE':
tsne_perplexity = st.slider('t-SNE perplexity', 5, 100, 30, 1, key='tsne_perplexity')
umap_neighbors = 15
umap_min_dist = 0.1
else:
umap_neighbors = 15
umap_min_dist = 0.1
tsne_perplexity = 30
st.markdown('### 🎨 Màu sắc & Cluster')
cluster_source = st.radio('Nguồn cluster', ['KMeans','Load file','None'], index=0, horizontal=False, key='cluster_source')
n_clusters = st.slider('Số cluster KMeans', 2, 100, 10, 1, disabled=(cluster_source != 'KMeans'), key='n_clusters')
cluster_file_path = st.text_input('File cluster JSON', value='cluster/dbscan_results.json', disabled=(cluster_source != 'Load file'), key='cluster_file')
noise_label = st.number_input('Noise label', value=-1, step=1, disabled=(cluster_source != 'Load file'), key='noise_label')
path_filter = st.text_input('Filter filepath (comma OR)', value='', key='path_filter')
color_by_substring = st.text_input('Color theo substring', value='', key='color_by_substring')
palette_name = st.selectbox('Bảng màu', ['auto','okabe-ito','tol','plotly','bold','dark24','set3','d3','hcl'], index=0, help='okabe-ito, tol = thân thiện mù màu', key='palette')
marker_size = st.slider('Kích thước điểm', 2, 15, 5, 1, key='marker_size')
st.caption('File cluster chỉ có is_noise: non-noise -> 0, noise -> noise_label.')
add_btn = st.button(' Thêm plot', type='primary')
clear_btn = st.button('🧹 Xoá tất cả plot')
if clear_btn:
st.session_state['plots'].clear()
st.success('Đã xoá toàn bộ plot.')
def process_and_add_plot():
path_local = path
if not os.path.isfile(path_local):
st.error(f'File không tồn tại: {path_local}')
return
with st.spinner(f'Loading embeddings: {os.path.basename(path_local)}'):
recs = load_embeddings(
path_local,
sample_size=sample_size or None,
sampling_seed=int(seed),
max_objects=max_objects or None,
)
df = build_dataframe(recs)
if df.empty:
st.warning('Không có embedding nào.')
return
# Filter
if path_filter.strip():
tokens = [t.strip() for t in path_filter.split(',') if t.strip()]
if tokens:
mask = df['filepath'].apply(lambda p: any(tok.lower() in p.lower() for tok in tokens))
df = df[mask].reset_index(drop=True)
if df.empty:
st.warning('Sau filter không còn bản ghi.')
return
# Reduce
X = np.vstack(df['embedding'].values).astype(np.float32)
with st.spinner('Giảm chiều...'):
coords, meta = reduce_embeddings(
X,
algo,
random_state=int(seed),
umap_neighbors=umap_neighbors,
umap_min_dist=umap_min_dist,
tsne_perplexity=tsne_perplexity,
)
df[['x','y','z']] = coords
# Clustering
if cluster_source == 'KMeans':
with st.spinner('KMeans...'):
labels, sil = kmeans_cluster(coords, n_clusters, int(seed))
df['cluster'] = labels
sil_msg = f'Silhouette: {sil:.4f}' if not math.isnan(sil) else 'Silhouette: N/A'
elif cluster_source == 'Load file':
if not os.path.isfile(cluster_file_path):
st.warning(f'Không thấy file cluster: {cluster_file_path}; gán -1')
df['cluster'] = -1
sil_msg = 'Silhouette: N/A'
else:
loaded_labels, mapping = load_cluster_file(cluster_file_path, len(df), noise_label=int(noise_label))
if mapping is not None:
labs = []
miss=0
for fp in df['filepath']:
val = mapping.get(fp, int(noise_label))
if fp not in mapping:
miss += 1
labs.append(val)
if miss:
st.info(f'{miss} filepath gán noise.')
df['cluster'] = np.array(labs, dtype=int)
else:
df['cluster'] = loaded_labels
uniq = set(df['cluster'])
if len([u for u in uniq if u != int(noise_label)]) > 1:
try:
mask_valid = df['cluster'].to_numpy() != int(noise_label)
sil = silhouette_score(coords[mask_valid], df.loc[mask_valid,'cluster'])
sil_msg = f'Silhouette(ex noise): {sil:.4f}'
except Exception:
sil_msg = 'Silhouette: N/A'
else:
sil_msg = 'Silhouette: N/A'
else:
df['cluster'] = -1
sil_msg = 'No clustering'
# Color grouping
if color_by_substring.strip():
subs = [s.strip() for s in color_by_substring.split(',') if s.strip()]
def color_from_sub(p: str) -> str:
for ssub in subs:
if ssub.lower() in p.lower():
return ssub
return 'other'
df['color_group'] = df['filepath'].apply(color_from_sub)
color_col = 'color_group'
else:
color_col = 'cluster'
# Palette utilities (copy of earlier helpers)
def get_base_palette(name: str) -> List[str]:
name = name.lower()
if name == 'okabe-ito':
return ["#000000","#E69F00","#56B4E9","#009E73","#F0E442","#0072B2","#D55E00","#CC79A7"]
if name == 'tol':
return ["#4477AA","#66CCEE","#228833","#CCBB44","#EE6677","#AA3377","#BBBBBB","#000000","#EEDD88","#FFAABB","#99DDFF","#44BB99"]
if name == 'plotly':
from plotly.colors import qualitative as q
return q.Plotly
if name == 'bold':
from plotly.colors import qualitative as q
return q.Bold
if name == 'dark24':
from plotly.colors import qualitative as q
return q.Dark24
if name == 'set3':
from plotly.colors import qualitative as q
return q.Set3
if name == 'd3':
from plotly.colors import qualitative as q
return q.D3
return []
def generate_hcl_like(n: int) -> List[str]:
cols=[]
for i in range(n):
h=(i/n)%1.0
s=0.55+0.35*((i*37)%2)
v=0.85 if (i%3) else 0.98
r,g,b = colorsys.hsv_to_rgb(h,s,v)
cols.append('#%02X%02X%02X' % (int(r*255), int(g*255), int(b*255)))
return cols
def build_palette(name: str, k: int) -> List[str]:
if name == 'auto':
return []
if name == 'hcl':
return generate_hcl_like(k)
base = get_base_palette(name)
if k <= len(base):
return base[:k]
cols=list(base)
gold=0.61803398875
h=0.1
while len(cols) < k:
h=(h+gold)%1.0
r,g,b = colorsys.hsv_to_rgb(h,0.6,0.95)
newc='#%02X%02X%02X' % (int(r*255), int(g*255), int(b*255))
if newc not in cols:
cols.append(newc)
return cols
color_display_col = color_col + '_display'
if np.issubdtype(df[color_col].dtype, np.number):
df[color_display_col] = df[color_col].astype(int).astype(str)
else:
df[color_display_col] = df[color_col].astype(str)
groups = df[color_display_col].unique()
palette_seq = build_palette(palette_name, len(groups)) if palette_name else []
import plotly.express as px
fig = px.scatter_3d(
df,
x='x', y='y', z='z',
color=color_display_col,
color_discrete_sequence=palette_seq if palette_seq else None,
hover_data={'filepath': True, 'cluster': True, 'x': ':.2f', 'y': ':.2f', 'z': ':.2f'},
title=f'{os.path.basename(path_local)} ({algo})',
opacity=0.9,
height=700,
)
fig.update_traces(marker={'size': int(marker_size)})
fig.update_layout(margin=dict(l=0,r=0,t=40,b=0))
out_csv = df[['filepath','x','y','z','cluster']].to_csv(index=False).encode('utf-8')
st.session_state['plots'].append({
'path': path_local,
'df_head': df.head(100) if show_raw else None,
'fig': fig,
'meta': meta,
'algo': algo,
'sil_msg': sil_msg,
'csv': out_csv,
'n': len(df),
'dim': len(df['embedding'].iloc[0]),
})
st.success(f'Đã thêm plot: {os.path.basename(path_local)}')
if add_btn:
process_and_add_plot()
# Render existing plots
if st.session_state['plots']:
st.markdown('---')
for idx, plot_data in enumerate(st.session_state['plots']):
container = st.container()
with container:
cols = st.columns([0.8,0.2])
with cols[0]:
st.subheader(f'#{idx+1} {os.path.basename(plot_data["path"])}')
with cols[1]:
if st.button('❌ Xoá', key=f'remove_{idx}'):
st.session_state['plots'].pop(idx)
st.experimental_rerun()
st.caption(f"Embeddings: {plot_data['n']} | Dim gốc: {plot_data['dim']} | {plot_data['sil_msg']}")
st.plotly_chart(plot_data['fig'], use_container_width=True)
with st.expander('Meta / Thông tin thêm'):
st.json(plot_data['meta'])
st.download_button('⬇️ CSV', plot_data['csv'], file_name=f"embedding_3d_{idx+1}.csv", mime='text/csv', key=f'dl_{idx}')
if plot_data['df_head'] is not None:
st.dataframe(plot_data['df_head'])
if not st.session_state['plots']:
st.info('Chưa có plot nào. Chọn file ở sidebar và nhấn " Thêm plot".')
if __name__ == '__main__': # pragma: no cover
# Khi chạy bằng 'streamlit run', sys.argv chỉ chứa tên file => ta luôn gọi main()
# Nếu muốn test nhanh CLI, có thể thêm arg '--cli-test'
if '--cli-test' in os.sys.argv:
test_path = 'embeddings_factures_osteopathie_1k_qwen.json'
if os.path.exists(test_path):
recs = load_embeddings(test_path, sample_size=5)
print(f'[CLI TEST] Loaded {len(recs)} embeddings dim={len(recs[0].embedding)}')
else:
print('[CLI TEST] Không tìm thấy file test.')
else:
main()

783
check_filter/visual_data.py Normal file
View File

@@ -0,0 +1,783 @@
"""
Streamlit app để trực quan hóa embedding 3 chiều (PCA / UMAP / t-SNE) + phân cụm.
Chạy:
streamlit run visual_data.py --server.port 8501
Yêu cầu cài đặt (một lần):
pip install streamlit plotly scikit-learn umap-learn numpy pandas
Tính năng:
- Load file JSON lớn chứa các object {"filepath": ..., "embedding": [...]} hoặc định dạng JSON lines.
- Tùy chọn sample n phần tử (random) để tăng tốc.
- Chọn thuật toán giảm chiều: PCA, UMAP, t-SNE.
- Tham số điều chỉnh: n_neighbors, min_dist (UMAP); perplexity (t-SNE); n_components=3.
- KMeans clustering (tuỳ chọn) để tô màu điểm; hoặc tô màu theo regex/substring trong tên file.
- Lọc theo từ khóa trong đường dẫn.
- Tải xuống toạ độ 3D + nhãn cluster.
File embedding quan sát được có thể không phải JSON array chuẩn; script sẽ thử:
1. Parse như JSON array.
2. Parse như JSON lines (mỗi dòng 1 object).
3. Parse thủ công bằng cách tìm pattern {"filepath": ... , "embedding": [ ... ]}.
Nếu kích thước > ~1e6 bytes, dùng đọc streaming để giảm RAM.
"""
from __future__ import annotations
import json
import os
import re
import math
import random
from dataclasses import dataclass
from typing import Iterable, List, Optional, Tuple, Dict, Any
from datetime import datetime
import numpy as np
import pandas as pd
import streamlit as st
import colorsys
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
from sklearn.cluster import KMeans
from sklearn.metrics import silhouette_score
try:
import umap # type: ignore
except ImportError: # pragma: no cover
umap = None # handled later
@dataclass
class EmbeddingRecord:
filepath: str
embedding: List[float]
def _smart_json_object_stream(raw_text: str) -> Iterable[str]:
"""Yield JSON object strings from a large raw buffer.
Heuristic: find balanced braces starting with {"filepath": ...}.
This is a fallback when content is not standard array / jsonlines.
"""
brace = 0
buf = []
in_obj = False
for ch in raw_text:
if ch == '{':
if not in_obj:
in_obj = True
buf = ['{']
brace = 1
else:
brace += 1
buf.append(ch)
elif ch == '}':
if in_obj:
brace -= 1
buf.append('}')
if brace == 0:
yield ''.join(buf)
in_obj = False
else:
# stray closing
continue
else:
if in_obj:
buf.append(ch)
def load_embeddings(
path: str,
sample_size: Optional[int] = None,
sampling_seed: int = 42,
max_objects: Optional[int] = None,
) -> List[EmbeddingRecord]:
"""Load embeddings from a possibly large JSON / JSONL / raw file.
Args:
path: file path
sample_size: random sample (after load) if provided
sampling_seed: RNG seed
max_objects: hard cap to stop early (for speed)
"""
# size = os.path.getsize(path) # kích thước có thể dùng sau nếu muốn tối ưu đọc streaming
# First attempt: JSON array
records: List[EmbeddingRecord] = []
def to_rec(obj) -> Optional[EmbeddingRecord]:
if not isinstance(obj, dict):
return None
if 'embedding' in obj:
fp = str(obj.get('filepath') or obj.get('file_path') or obj.get('path') or '')
emb = obj['embedding']
if isinstance(emb, list) and fp:
return EmbeddingRecord(fp, emb)
return None
try:
with open(path, 'r', encoding='utf-8') as f:
text = f.read()
text_stripped = text.strip()
if text_stripped.startswith('[') and text_stripped.endswith(']'):
arr = json.loads(text_stripped)
for obj in arr:
rec = to_rec(obj)
if rec:
records.append(rec)
if max_objects and len(records) >= max_objects:
break
else:
raise ValueError('Not a JSON array')
except Exception:
# Retry as JSON lines
records = []
try:
with open(path, 'r', encoding='utf-8') as f:
for line in f:
line_strip = line.strip().rstrip(',')
if not line_strip:
continue
if not line_strip.startswith('{'):
continue
try:
obj = json.loads(line_strip)
rec = to_rec(obj)
if rec:
records.append(rec)
if max_objects and len(records) >= max_objects:
break
except json.JSONDecodeError:
continue
if not records:
raise ValueError('No JSONL records')
except Exception:
# Fallback: heuristic extraction
records = []
with open(path, 'r', encoding='utf-8') as f:
raw = f.read()
for obj_str in _smart_json_object_stream(raw):
if 'embedding' not in obj_str:
continue
# Clean possible trailing ',"
try:
# Attempt to fix malformed numbers like '1.2\n421875' (broken newline) by removing stray newlines inside arrays
fixed = re.sub(r"(\d)\n(\d)", r"\1\2", obj_str)
obj = json.loads(fixed)
except Exception:
continue
rec = to_rec(obj)
if rec:
records.append(rec)
if max_objects and len(records) >= max_objects:
break
if not records:
raise RuntimeError("Không load được embedding nào từ file.")
# Random sample if needed
if sample_size and sample_size < len(records):
random.seed(sampling_seed)
records = random.sample(records, sample_size)
return records
def reduce_embeddings(
X: np.ndarray,
method: str,
random_state: int = 42,
umap_neighbors: int = 15,
umap_min_dist: float = 0.1,
tsne_perplexity: int = 30,
tsne_learning_rate: str | float = 'auto',
) -> Tuple[np.ndarray, dict]:
"""Project high-dim embeddings to 3D.
Returns (coords (n,3), meta_info)
"""
meta = {"method": method}
if method == 'PCA':
pca = PCA(n_components=3, random_state=random_state)
coords = pca.fit_transform(X)
meta['explained_variance_ratio'] = pca.explained_variance_ratio_.tolist()
return coords, meta
if method == 'UMAP':
if umap is None:
raise RuntimeError("Chưa cài umap-learn: pip install umap-learn")
reducer = umap.UMAP(
n_components=3,
n_neighbors=umap_neighbors,
min_dist=umap_min_dist,
metric='cosine',
random_state=random_state,
)
coords = reducer.fit_transform(X)
meta['umap_graph_connectivity'] = float(reducer.graph_.getnnz())
return coords, meta
if method == 't-SNE':
perplexity = min(tsne_perplexity, max(5, (X.shape[0] - 1) // 3))
tsne = TSNE(
n_components=3,
perplexity=perplexity,
learning_rate=tsne_learning_rate,
init='pca',
random_state=random_state,
n_iter=1000,
verbose=0,
)
coords = tsne.fit_transform(X)
meta['effective_perplexity'] = perplexity
return coords, meta
raise ValueError(f"Unknown method {method}")
def kmeans_cluster(coords: np.ndarray, n_clusters: int, seed: int = 42) -> Tuple[np.ndarray, float]:
if n_clusters <= 1:
return np.zeros(coords.shape[0], dtype=int), float('nan')
km = KMeans(n_clusters=n_clusters, n_init='auto', random_state=seed)
labels = km.fit_predict(coords)
score = float('nan')
if len(set(labels)) > 1 and coords.shape[0] >= n_clusters * 5:
try:
score = silhouette_score(coords, labels)
except Exception:
pass
return labels, score
def build_dataframe(recs: List[EmbeddingRecord]) -> pd.DataFrame:
return pd.DataFrame({
'filepath': [r.filepath for r in recs],
'embedding': [r.embedding for r in recs],
})
def load_cluster_file(
path: str,
expected_n: int,
noise_label: int = -1,
) -> np.ndarray:
"""Load cluster labels from a JSON result file.
Supports formats:
- {"results": [ ... ]}
- [ ... ]
Each item may contain one of: cluster, cluster_id, label, is_noise, filepath.
If only is_noise exists: non-noise -> 0, noise -> noise_label.
If filepath present, mapping is done by filepath, otherwise by index order.
"""
try:
with open(path, 'r', encoding='utf-8') as f:
content = json.load(f)
except Exception as e:
raise RuntimeError(f'Không đọc được file cluster: {e}')
if isinstance(content, dict) and 'results' in content:
items = content['results']
elif isinstance(content, list):
items = content
else:
raise RuntimeError('Định dạng file cluster không hợp lệ (cần list hoặc có key "results").')
# Detect if filepath-based mapping
use_filepath = any(isinstance(it, dict) and 'filepath' in it for it in items)
labels = np.full(expected_n, noise_label, dtype=int)
if use_filepath:
# Build path->label
mapping = {}
for it in items:
if not isinstance(it, dict):
continue
fp = it.get('filepath')
if not fp:
continue
if 'cluster' in it:
val = it['cluster']
elif 'cluster_id' in it:
val = it['cluster_id']
elif 'label' in it:
val = it['label']
elif 'is_noise' in it:
val = (0 if not it.get('is_noise') else noise_label)
else:
val = 0
try:
mapping[str(fp)] = int(val)
except Exception:
continue
return labels, mapping # second value used later to map onto df
# Index-based mapping
collected = []
for it in items:
if not isinstance(it, dict):
# accept raw int labels
if isinstance(it, int):
collected.append(int(it))
continue
if 'cluster' in it:
val = it['cluster']
elif 'cluster_id' in it:
val = it['cluster_id']
elif 'label' in it:
val = it['label']
elif 'is_noise' in it:
val = (0 if not it.get('is_noise') else noise_label)
else:
val = 0
try:
collected.append(int(val))
except Exception:
collected.append(noise_label)
for i in range(min(expected_n, len(collected))):
labels[i] = collected[i]
return labels, None
def main(): # pragma: no cover - Streamlit entry
st.set_page_config(page_title="Embedding 3D Viewer", layout="wide")
st.title("🔍 Embedding 3D Viewer (Multi)")
st.caption("Mỗi lần nạp file sẽ thêm một đồ thị mới ở bên dưới.")
if 'plots' not in st.session_state:
st.session_state['plots']: List[Dict[str, Any]] = [] # type: ignore
if 'cache_loaded' not in st.session_state:
st.session_state['cache_loaded'] = False
CACHE_DIR = '.visual_cache'
INDEX_FILE = os.path.join(CACHE_DIR, 'index.json')
def ensure_cache_dir():
if not os.path.isdir(CACHE_DIR):
os.makedirs(CACHE_DIR, exist_ok=True)
def save_cache():
# Persist plots metadata + data (CSV) for future sessions
ensure_cache_dir()
index: List[Dict[str, Any]] = []
for i, p in enumerate(st.session_state['plots']):
# Data handling
data_csv_path = p.get('data_csv_path')
if not data_csv_path:
# Build a dataframe back from figure data or stored head (we stored csv bytes earlier)
# We stored only partial head in 'df_head', so we keep an internal full CSV bytes previously saved in 'csv'
# Instead, we require full dataset at creation; we saved it in 'csv'. We'll re-use it.
data_csv_path = os.path.join(CACHE_DIR, f'plot_{i+1}.csv')
try:
with open(data_csv_path, 'wb') as fcsv:
fcsv.write(p['csv'])
p['data_csv_path'] = data_csv_path
except Exception:
continue
index.append({
'path': p.get('path'),
'name': p.get('name'),
'algo': p.get('algo'),
'sil_msg': p.get('sil_msg'),
'meta': p.get('meta'),
'marker_size': p.get('marker_size', 5),
'palette_name': p.get('palette_name'),
'data_csv': os.path.basename(data_csv_path),
'timestamp': p.get('timestamp'),
})
try:
with open(INDEX_FILE, 'w', encoding='utf-8') as f:
json.dump({'plots': index, 'saved_at': datetime.utcnow().isoformat()}, f, ensure_ascii=False, indent=2)
except Exception as e:
st.warning(f'Lưu cache lỗi: {e}')
def load_cache():
if st.session_state['cache_loaded']:
return
if not os.path.isfile(INDEX_FILE):
st.session_state['cache_loaded'] = True
return
try:
with open(INDEX_FILE, 'r', encoding='utf-8') as f:
data = json.load(f)
plots_meta = data.get('plots', [])
for meta_entry in plots_meta:
csv_file = os.path.join(CACHE_DIR, meta_entry.get('data_csv',''))
if not os.path.isfile(csv_file):
continue
try:
df = pd.read_csv(csv_file)
except Exception:
continue
# Reconstruct figure
import plotly.express as px
color_display_col = 'color_display' if 'color_display' in df.columns else ('cluster_display' if 'cluster_display' in df.columns else 'cluster')
# Build palette again based on unique groups and stored palette_name
palette_name_load = meta_entry.get('palette_name','auto')
def get_base_palette(name: str) -> List[str]:
name = name.lower()
if name == 'okabe-ito':
return ["#000000","#E69F00","#56B4E9","#009E73","#F0E442","#0072B2","#D55E00","#CC79A7"]
if name == 'tol':
return ["#4477AA","#66CCEE","#228833","#CCBB44","#EE6677","#AA3377","#BBBBBB","#000000","#EEDD88","#FFAABB","#99DDFF","#44BB99"]
if name == 'plotly':
from plotly.colors import qualitative as q
return q.Plotly
if name == 'bold':
from plotly.colors import qualitative as q
return q.Bold
if name == 'dark24':
from plotly.colors import qualitative as q
return q.Dark24
if name == 'set3':
from plotly.colors import qualitative as q
return q.Set3
if name == 'd3':
from plotly.colors import qualitative as q
return q.D3
return []
def generate_hcl_like(n: int) -> List[str]:
cols = []
for i in range(n):
h = (i / n) % 1.0
s = 0.55 + 0.35 * ((i * 37) % 2)
v = 0.85 if (i % 3) else 0.98
r, g, b = colorsys.hsv_to_rgb(h, s, v)
cols.append('#%02X%02X%02X' % (int(r * 255), int(g * 255), int(b * 255)))
return cols
def build_palette(name: str, k: int) -> List[str]:
if name == 'auto':
return []
if name == 'hcl':
return generate_hcl_like(k)
base = get_base_palette(name)
if k <= len(base):
return base[:k]
cols = list(base)
gold = 0.61803398875
h = 0.1
while len(cols) < k:
h = (h + gold) % 1.0
r, g, b = colorsys.hsv_to_rgb(h, 0.6, 0.95)
newc = '#%02X%02X%02X' % (int(r * 255), int(g * 255), int(b * 255))
if newc not in cols:
cols.append(newc)
return cols
groups = df[color_display_col].astype(str).unique() if color_display_col in df.columns else []
palette_seq = build_palette(palette_name_load, len(groups)) if palette_name_load else []
fig = px.scatter_3d(
df,
x='x', y='y', z='z',
color=color_display_col if color_display_col in df.columns else None,
color_discrete_sequence=palette_seq if palette_seq else None,
hover_data={'filepath': True, 'cluster': True},
title=f'{os.path.basename(meta_entry.get("path",""))} ({meta_entry.get("algo")})',
opacity=0.9,
height=700,
)
fig.update_traces(marker={'size': int(meta_entry.get('marker_size',5))})
fig.update_layout(margin=dict(l=0,r=0,t=40,b=0))
# Reconstruct csv bytes
with open(csv_file, 'rb') as fcsv:
csv_bytes = fcsv.read()
st.session_state['plots'].append({
'path': meta_entry.get('path'),
'name': meta_entry.get('name'),
'fig': fig,
'algo': meta_entry.get('algo'),
'meta': meta_entry.get('meta',{}),
'sil_msg': meta_entry.get('sil_msg'),
'csv': csv_bytes,
'n': int(df.shape[0]),
'dim': int(len([c for c in df.columns if c.startswith('x') or c.startswith('y') or c.startswith('z')]) or 3),
'palette_name': meta_entry.get('palette_name'),
'marker_size': meta_entry.get('marker_size',5),
'data_csv_path': csv_file,
'timestamp': meta_entry.get('timestamp'),
'df_head': None, # không lưu raw để nhẹ; None cho an toàn
})
st.session_state['cache_loaded'] = True
except Exception as e:
st.warning(f'Lỗi load cache: {e}')
st.session_state['cache_loaded'] = True
# Load cache only once per session (initial render)
load_cache()
with st.sidebar:
st.markdown('### ⚙️ Cấu hình chung')
default_path = 'embeddings_factures_osteopathie_1k_qwen.json'
path = st.text_input('Đường dẫn file embedding', value=default_path, key='path_input')
plot_name_input = st.text_input('Tên plot (tuỳ chọn)', value='', key='plot_name')
sample_size = st.number_input('Sample (0 = all)', min_value=0, value=0, step=100, key='sample_size')
max_objects = st.number_input('Max objects đọc (0 = no limit)', min_value=0, value=0, step=500, key='max_objects')
seed = st.number_input('Seed', min_value=0, value=42, step=1, key='seed')
show_raw = st.checkbox('Hiện bảng raw (chỉ 100 dòng đầu mỗi plot)', value=False, key='show_raw')
algo = st.selectbox('Thuật toán giảm chiều', ['UMAP', 'PCA', 't-SNE'], index=0, key='algo')
if algo == 'UMAP':
umap_neighbors = st.slider('UMAP n_neighbors', 5, 100, 15, 1, key='umap_neighbors')
umap_min_dist = st.slider('UMAP min_dist', 0.0, 1.0, 0.1, 0.01, key='umap_min_dist')
tsne_perplexity = 30
elif algo == 't-SNE':
tsne_perplexity = st.slider('t-SNE perplexity', 5, 100, 30, 1, key='tsne_perplexity')
umap_neighbors = 15
umap_min_dist = 0.1
else:
umap_neighbors = 15
umap_min_dist = 0.1
tsne_perplexity = 30
st.markdown('### 🎨 Màu sắc & Cluster')
cluster_source = st.radio('Nguồn cluster', ['KMeans','Load file','None'], index=0, horizontal=False, key='cluster_source')
n_clusters = st.slider('Số cluster KMeans', 2, 100, 10, 1, disabled=(cluster_source != 'KMeans'), key='n_clusters')
cluster_file_path = st.text_input('File cluster JSON', value='cluster/dbscan_results.json', disabled=(cluster_source != 'Load file'), key='cluster_file')
noise_label = st.number_input('Noise label', value=-1, step=1, disabled=(cluster_source != 'Load file'), key='noise_label')
path_filter = st.text_input('Filter filepath (comma OR)', value='', key='path_filter')
color_by_substring = st.text_input('Color theo substring', value='', key='color_by_substring')
palette_name = st.selectbox('Bảng màu', ['auto','okabe-ito','tol','plotly','bold','dark24','set3','d3','hcl'], index=0, help='okabe-ito, tol = thân thiện mù màu', key='palette')
marker_size = st.slider('Kích thước điểm', 2, 15, 3, 1, key='marker_size')
st.caption('File cluster chỉ có is_noise: non-noise -> 0, noise -> noise_label.')
add_btn = st.button(' Thêm plot', type='primary')
clear_btn = st.button('🧹 Xoá tất cả plot')
if clear_btn:
st.session_state['plots'].clear()
# Clear cache directory
try:
if os.path.isdir(CACHE_DIR):
for f in os.listdir(CACHE_DIR):
try:
os.remove(os.path.join(CACHE_DIR,f))
except Exception:
pass
os.rmdir(CACHE_DIR)
except Exception:
pass
st.success('Đã xoá toàn bộ plot (và cache).')
def process_and_add_plot():
path_local = path
if not os.path.isfile(path_local):
st.error(f'File không tồn tại: {path_local}')
return
with st.spinner(f'Loading embeddings: {os.path.basename(path_local)}'):
recs = load_embeddings(
path_local,
sample_size=sample_size or None,
sampling_seed=int(seed),
max_objects=max_objects or None,
)
df = build_dataframe(recs)
if df.empty:
st.warning('Không có embedding nào.')
return
# Filter
if path_filter.strip():
tokens = [t.strip() for t in path_filter.split(',') if t.strip()]
if tokens:
mask = df['filepath'].apply(lambda p: any(tok.lower() in p.lower() for tok in tokens))
df = df[mask].reset_index(drop=True)
if df.empty:
st.warning('Sau filter không còn bản ghi.')
return
# Reduce
X = np.vstack(df['embedding'].values).astype(np.float32)
with st.spinner('Giảm chiều...'):
coords, meta = reduce_embeddings(
X,
algo,
random_state=int(seed),
umap_neighbors=umap_neighbors,
umap_min_dist=umap_min_dist,
tsne_perplexity=tsne_perplexity,
)
df[['x','y','z']] = coords
# Clustering
if cluster_source == 'KMeans':
with st.spinner('KMeans...'):
labels, sil = kmeans_cluster(coords, n_clusters, int(seed))
df['cluster'] = labels
sil_msg = f'Silhouette: {sil:.4f}' if not math.isnan(sil) else 'Silhouette: N/A'
elif cluster_source == 'Load file':
if not os.path.isfile(cluster_file_path):
st.warning(f'Không thấy file cluster: {cluster_file_path}; gán -1')
df['cluster'] = -1
sil_msg = 'Silhouette: N/A'
else:
loaded_labels, mapping = load_cluster_file(cluster_file_path, len(df), noise_label=int(noise_label))
if mapping is not None:
labs = []
miss=0
for fp in df['filepath']:
val = mapping.get(fp, int(noise_label))
if fp not in mapping:
miss += 1
labs.append(val)
if miss:
st.info(f'{miss} filepath gán noise.')
df['cluster'] = np.array(labs, dtype=int)
else:
df['cluster'] = loaded_labels
uniq = set(df['cluster'])
if len([u for u in uniq if u != int(noise_label)]) > 1:
try:
mask_valid = df['cluster'].to_numpy() != int(noise_label)
sil = silhouette_score(coords[mask_valid], df.loc[mask_valid,'cluster'])
sil_msg = f'Silhouette(ex noise): {sil:.4f}'
except Exception:
sil_msg = 'Silhouette: N/A'
else:
sil_msg = 'Silhouette: N/A'
else:
df['cluster'] = -1
sil_msg = 'No clustering'
# Color grouping
if color_by_substring.strip():
subs = [s.strip() for s in color_by_substring.split(',') if s.strip()]
def color_from_sub(p: str) -> str:
for ssub in subs:
if ssub.lower() in p.lower():
return ssub
return 'other'
df['color_group'] = df['filepath'].apply(color_from_sub)
color_col = 'color_group'
else:
color_col = 'cluster'
# Palette utilities (copy of earlier helpers)
def get_base_palette(name: str) -> List[str]:
name = name.lower()
if name == 'okabe-ito':
return ["#000000","#E69F00","#56B4E9","#009E73","#F0E442","#0072B2","#D55E00","#CC79A7"]
if name == 'tol':
return ["#4477AA","#66CCEE","#228833","#CCBB44","#EE6677","#AA3377","#BBBBBB","#000000","#EEDD88","#FFAABB","#99DDFF","#44BB99"]
if name == 'plotly':
from plotly.colors import qualitative as q
return q.Plotly
if name == 'bold':
from plotly.colors import qualitative as q
return q.Bold
if name == 'dark24':
from plotly.colors import qualitative as q
return q.Dark24
if name == 'set3':
from plotly.colors import qualitative as q
return q.Set3
if name == 'd3':
from plotly.colors import qualitative as q
return q.D3
return []
def generate_hcl_like(n: int) -> List[str]:
cols=[]
for i in range(n):
h=(i/n)%1.0
s=0.55+0.35*((i*37)%2)
v=0.85 if (i%3) else 0.98
r,g,b = colorsys.hsv_to_rgb(h,s,v)
cols.append('#%02X%02X%02X' % (int(r*255), int(g*255), int(b*255)))
return cols
def build_palette(name: str, k: int) -> List[str]:
if name == 'auto':
return []
if name == 'hcl':
return generate_hcl_like(k)
base = get_base_palette(name)
if k <= len(base):
return base[:k]
cols=list(base)
gold=0.61803398875
h=0.1
while len(cols) < k:
h=(h+gold)%1.0
r,g,b = colorsys.hsv_to_rgb(h,0.6,0.95)
newc='#%02X%02X%02X' % (int(r*255), int(g*255), int(b*255))
if newc not in cols:
cols.append(newc)
return cols
color_display_col = color_col + '_display'
if np.issubdtype(df[color_col].dtype, np.number):
df[color_display_col] = df[color_col].astype(int).astype(str)
else:
df[color_display_col] = df[color_col].astype(str)
groups = df[color_display_col].unique()
palette_seq = build_palette(palette_name, len(groups)) if palette_name else []
import plotly.express as px
fig = px.scatter_3d(
df,
x='x', y='y', z='z',
color=color_display_col,
color_discrete_sequence=palette_seq if palette_seq else None,
hover_data={'filepath': True, 'cluster': True, 'x': ':.2f', 'y': ':.2f', 'z': ':.2f'},
title=f'{os.path.basename(path_local)} ({algo})',
opacity=0.9,
height=700,
)
fig.update_traces(marker={'size': int(marker_size)})
fig.update_layout(margin=dict(l=0,r=0,t=40,b=0))
out_csv = df[['filepath','x','y','z','cluster']].to_csv(index=False).encode('utf-8')
st.session_state['plots'].append({
'path': path_local,
'name': plot_name_input.strip() or os.path.basename(path_local),
'df_head': df.head(100) if show_raw else None,
'fig': fig,
'meta': meta,
'algo': algo,
'sil_msg': sil_msg,
'csv': out_csv,
'n': len(df),
'dim': len(df['embedding'].iloc[0]),
'palette_name': palette_name,
'marker_size': marker_size,
'timestamp': datetime.utcnow().isoformat(),
})
# Persist new state
save_cache()
st.success(f'Đã thêm plot: {os.path.basename(path_local)}')
if add_btn:
process_and_add_plot()
# Render existing plots
if st.session_state['plots']:
st.markdown('---')
for idx, plot_data in enumerate(st.session_state['plots']):
container = st.container()
with container:
cols = st.columns([0.8,0.2])
with cols[0]:
st.subheader(f'#{idx+1} {plot_data.get("name", os.path.basename(plot_data["path"]))}')
with cols[1]:
if st.button('❌ Xoá', key=f'remove_{idx}'):
st.session_state['plots'].pop(idx)
save_cache()
st.experimental_rerun()
# Rename inline
new_name = st.text_input('Đổi tên plot', value=plot_data.get('name', ''), key=f'rename_{idx}')
if new_name.strip() and new_name.strip() != plot_data.get('name'):
plot_data['name'] = new_name.strip()
save_cache()
st.caption(f"Embeddings: {plot_data['n']} | Dim gốc: {plot_data['dim']} | {plot_data['sil_msg']}")
st.plotly_chart(plot_data['fig'], use_container_width=True)
with st.expander('Meta / Thông tin thêm'):
st.json(plot_data['meta'])
st.download_button('⬇️ CSV', plot_data['csv'], file_name=f"embedding_3d_{idx+1}.csv", mime='text/csv', key=f'dl_{idx}')
df_head_cached = plot_data.get('df_head')
if df_head_cached is not None:
st.dataframe(df_head_cached)
if not st.session_state['plots']:
st.info('Chưa có plot nào. Chọn file ở sidebar và nhấn " Thêm plot".')
if __name__ == '__main__': # pragma: no cover
# Khi chạy bằng 'streamlit run', sys.argv chỉ chứa tên file => ta luôn gọi main()
# Nếu muốn test nhanh CLI, có thể thêm arg '--cli-test'
if '--cli-test' in os.sys.argv:
test_path = 'embeddings_factures_osteopathie_1k_qwen.json'
if os.path.exists(test_path):
recs = load_embeddings(test_path, sample_size=5)
print(f'[CLI TEST] Loaded {len(recs)} embeddings dim={len(recs[0].embedding)}')
else:
print('[CLI TEST] Không tìm thấy file test.')
else:
main()

View File

@@ -0,0 +1,670 @@
#!/usr/bin/env python3
"""
Simple script to run automatic clustering methods (DBSCAN, Mean Shift, Affinity Propagation)
These methods don't require specifying the number of clusters beforehand.
"""
import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN, MeanShift, AffinityPropagation
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA
import argparse
import warnings
warnings.filterwarnings('ignore')
class AutoClustering:
def __init__(self, embeddings_path):
self.embeddings_path = embeddings_path
self.embeddings = None
self.file_paths = None
self.load_embeddings()
def load_embeddings(self):
"""Load embeddings from JSON file"""
print(f"Loading embeddings from {self.embeddings_path}...")
with open(self.embeddings_path, 'r') as f:
data = json.load(f)
self.file_paths = []
embeddings_list = []
for item in data:
self.file_paths.append(item['filepath'])
embeddings_list.append(item['embedding'])
self.embeddings = np.array(embeddings_list, dtype=np.float32)
print(f"Loaded {len(self.file_paths)} samples with embedding dimension {self.embeddings.shape[1]}")
# Standardize embeddings for better clustering
self.scaler = StandardScaler()
self.embeddings_scaled = self.scaler.fit_transform(self.embeddings)
def run_dbscan(self):
"""Run DBSCAN with extensive grid search for parameter estimation"""
print("\n" + "="*50)
print("RUNNING DBSCAN CLUSTERING WITH EXTENSIVE GRID SEARCH")
print("="*50)
# Method 1: K-nearest neighbors approach with multiple k values
eps_candidates = []
# Try different k values for nearest neighbors
k_values = [5, 10, 15, 20, 25, 30]
for k in k_values:
k_actual = min(k, len(self.embeddings_scaled) // 4)
if k_actual < 3:
continue
neighbors = NearestNeighbors(n_neighbors=k_actual)
neighbors_fit = neighbors.fit(self.embeddings_scaled)
distances, indices = neighbors_fit.kneighbors(self.embeddings_scaled)
# Sort distances and use k-th nearest neighbor distance
distances = np.sort(distances, axis=0)
kth_distances = distances[:, k_actual-1]
# Multiple percentile thresholds for each k
percentiles = [60, 65, 70, 75, 80, 85, 90, 95]
for p in percentiles:
eps_candidates.append(np.percentile(kth_distances, p))
# Method 2: Statistical measures
# Mean and std of pairwise distances (sampled for efficiency)
sample_size = min(1000, len(self.embeddings_scaled))
sample_indices = np.random.choice(len(self.embeddings_scaled), sample_size, replace=False)
sample_data = self.embeddings_scaled[sample_indices]
from scipy.spatial.distance import pdist
pairwise_distances = pdist(sample_data)
# Add statistical measures as eps candidates
eps_candidates.extend([
np.mean(pairwise_distances) * 0.3,
np.mean(pairwise_distances) * 0.4,
np.mean(pairwise_distances) * 0.5,
np.mean(pairwise_distances) * 0.6,
np.mean(pairwise_distances) * 0.7,
np.median(pairwise_distances) * 0.3,
np.median(pairwise_distances) * 0.4,
np.median(pairwise_distances) * 0.5,
np.median(pairwise_distances) * 0.6,
np.std(pairwise_distances) * 0.5,
np.std(pairwise_distances) * 0.8,
np.std(pairwise_distances) * 1.0,
np.std(pairwise_distances) * 1.2
])
# Method 3: Manual eps values for different scales
manual_eps = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
1.2, 1.5, 1.8, 2.0, 2.5, 3.0, 3.5, 4.0, 5.0]
eps_candidates.extend(manual_eps)
# Remove duplicates and invalid values, then sort
eps_candidates = sorted(list(set([eps for eps in eps_candidates if eps > 0])))
# Extensive min_samples candidates
min_samples_candidates = [3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 18, 20, 25, 30, 35, 40, 50]
# Filter min_samples based on dataset size
# max_min_samples = len(self.embeddings_scaled) // 10 # At most 10% of data
# min_samples_candidates = [ms for ms in min_samples_candidates if ms <= max_min_samples]
min_samples_candidates = [60]
best_score = -1
best_params = None
best_labels = None
print(f"Testing {len(eps_candidates)} eps values x {len(min_samples_candidates)} min_samples values")
print(f"Total combinations: {len(eps_candidates) * len(min_samples_candidates)}")
print("This may take a while...\n")
# Track all results for analysis
all_results = []
total_combinations = len(eps_candidates) * len(min_samples_candidates)
current_combination = 0
for eps in eps_candidates:
for min_samples in min_samples_candidates:
current_combination += 1
# Progress indicator
if current_combination % 50 == 0 or current_combination == total_combinations:
progress = (current_combination / total_combinations) * 100
print(f"Progress: {current_combination}/{total_combinations} ({progress:.1f}%)")
try:
dbscan = DBSCAN(eps=eps, min_samples=min_samples)
labels = dbscan.fit_predict(self.embeddings_scaled)
# Check if we have meaningful clusters
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
n_noise = list(labels).count(-1)
noise_ratio = n_noise / len(labels)
# Store result for analysis
result_info = {
'eps': eps,
'min_samples': min_samples,
'n_clusters': n_clusters,
'n_noise': n_noise,
'noise_ratio': noise_ratio
}
# Check if we have meaningful clusters
if n_clusters >= 2 and noise_ratio < 0.9: # Not too many noise points
# Calculate silhouette score (excluding noise)
mask = labels != -1
if np.sum(mask) > 1:
try:
score = silhouette_score(self.embeddings_scaled[mask], labels[mask])
result_info['silhouette_score'] = score
# Print promising results
if score > 0.1: # Only show decent scores
print(f"eps={eps:.4f}, min_samples={min_samples}: {n_clusters} clusters, {n_noise} noise ({noise_ratio:.1%}), silhouette={score:.4f}")
if score > best_score:
best_score = score
best_params = (eps, min_samples)
best_labels = labels
except Exception:
result_info['silhouette_score'] = None
else:
result_info['silhouette_score'] = None
all_results.append(result_info)
except Exception as e:
# Skip problematic parameter combinations
continue
# Analysis of results
print("\n" + "="*50)
print("DBSCAN GRID SEARCH ANALYSIS")
print("="*50)
if all_results:
# Convert to numpy for easier analysis
import pandas as pd
df_results = pd.DataFrame(all_results)
print(f"Total parameter combinations tested: {len(df_results)}")
# Valid results (with clusters)
valid_results = df_results[df_results['n_clusters'] >= 2]
print(f"Combinations that produced clusters: {len(valid_results)}")
if len(valid_results) > 0:
# Best silhouette scores
scored_results = valid_results.dropna(subset=['silhouette_score'])
if len(scored_results) > 0:
print(f"Combinations with valid silhouette scores: {len(scored_results)}")
print(f"Best silhouette score: {scored_results['silhouette_score'].max():.4f}")
print(f"Mean silhouette score: {scored_results['silhouette_score'].mean():.4f}")
# Top 5 results
top_results = scored_results.nlargest(5, 'silhouette_score')
print("\nTop 5 parameter combinations:")
for idx, row in top_results.iterrows():
print(f" eps={row['eps']:.4f}, min_samples={row['min_samples']}: "
f"{row['n_clusters']} clusters, silhouette={row['silhouette_score']:.4f}")
# Cluster count distribution
cluster_counts = valid_results['n_clusters'].value_counts().sort_index()
print(f"\nCluster count distribution:")
for n_clusters, count in cluster_counts.items():
print(f" {n_clusters} clusters: {count} parameter combinations")
print(f"\n📁 SAVING DETAILED RESULTS...")
print("="*30)
# Save detailed grid search results to JSON file
self.save_dbscan_grid_search_results(all_results, best_params, best_score)
if best_labels is not None:
n_clusters = len(set(best_labels)) - (1 if -1 in best_labels else 0)
n_noise = list(best_labels).count(-1)
print(f"\nBest DBSCAN result:")
print(f"Parameters: eps={best_params[0]:.4f}, min_samples={best_params[1]}")
print(f"Number of clusters: {n_clusters}")
print(f"Number of noise points: {n_noise} ({n_noise/len(best_labels)*100:.1f}%)")
print(f"Silhouette score: {best_score:.4f}")
return best_labels
else:
print("DBSCAN could not find suitable clusters with the extensive grid search")
print("Consider:")
print("- Adjusting the embedding space (different model or preprocessing)")
print("- Using different clustering algorithms")
print("- Manual parameter tuning based on domain knowledge")
return None
def save_dbscan_grid_search_results(self, all_results, best_params, best_score):
"""Save detailed DBSCAN grid search results to JSON file"""
import datetime
# Prepare comprehensive results data
grid_search_data = {
"experiment_info": {
"timestamp": datetime.datetime.now().isoformat(),
"dataset_path": self.embeddings_path,
"total_samples": len(self.file_paths),
"embedding_dimension": self.embeddings.shape[1],
"total_combinations_tested": len(all_results)
},
"best_result": {
"eps": best_params[0] if best_params else None,
"min_samples": best_params[1] if best_params else None,
"silhouette_score": best_score if best_score > -1 else None
},
"all_trials": []
}
# Add all trial results
for i, result in enumerate(all_results):
trial_data = {
"trial_id": i + 1,
"parameters": {
"eps": result['eps'],
"min_samples": result['min_samples']
},
"results": {
"n_clusters": result['n_clusters'],
"n_noise": result['n_noise'],
"noise_ratio": result['noise_ratio'],
"silhouette_score": result['silhouette_score']
},
"status": "success" if result['silhouette_score'] is not None else "failed"
}
grid_search_data["all_trials"].append(trial_data)
# Calculate summary statistics
valid_trials = [t for t in grid_search_data["all_trials"] if t["status"] == "success"]
if valid_trials:
silhouette_scores = [t["results"]["silhouette_score"] for t in valid_trials if t["results"]["silhouette_score"] is not None]
if silhouette_scores:
grid_search_data["summary_statistics"] = {
"total_trials": len(all_results),
"successful_trials": len(valid_trials),
"success_rate": len(valid_trials) / len(all_results),
"best_silhouette_score": max(silhouette_scores),
"worst_silhouette_score": min(silhouette_scores),
"mean_silhouette_score": sum(silhouette_scores) / len(silhouette_scores),
"median_silhouette_score": sorted(silhouette_scores)[len(silhouette_scores)//2]
}
# Top 10 results
sorted_valid_trials = sorted(valid_trials,
key=lambda x: x["results"]["silhouette_score"] if x["results"]["silhouette_score"] is not None else -1,
reverse=True)
grid_search_data["top_10_results"] = sorted_valid_trials[:10]
# Parameter analysis
eps_values = [t["parameters"]["eps"] for t in valid_trials]
min_samples_values = [t["parameters"]["min_samples"] for t in valid_trials]
grid_search_data["parameter_analysis"] = {
"eps_range": {
"min": min(eps_values),
"max": max(eps_values),
"mean": sum(eps_values) / len(eps_values)
},
"min_samples_range": {
"min": min(min_samples_values),
"max": max(min_samples_values),
"mean": sum(min_samples_values) / len(min_samples_values)
}
}
# Save to file with timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"dbscan_grid_search_detailed_{timestamp}.json"
with open(filename, 'w') as f:
json.dump(grid_search_data, f, indent=4, ensure_ascii=False)
print(f"Detailed grid search results saved to: {filename}")
# Also save a CSV summary for easy analysis
csv_filename = f"dbscan_grid_search_summary_{timestamp}.csv"
self.save_grid_search_csv(all_results, csv_filename)
print(f"Grid search summary CSV saved to: {csv_filename}")
def save_grid_search_csv(self, all_results, filename):
"""Save grid search results as CSV for easy analysis"""
import csv
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['trial_id', 'eps', 'min_samples', 'n_clusters', 'n_noise',
'noise_ratio', 'silhouette_score', 'status']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for i, result in enumerate(all_results):
writer.writerow({
'trial_id': i + 1,
'eps': result['eps'],
'min_samples': result['min_samples'],
'n_clusters': result['n_clusters'],
'n_noise': result['n_noise'],
'noise_ratio': result['noise_ratio'],
'silhouette_score': result['silhouette_score'],
'status': 'success' if result['silhouette_score'] is not None else 'failed'
})
def run_mean_shift(self):
"""Run Mean Shift clustering"""
print("\n" + "="*50)
print("RUNNING MEAN SHIFT CLUSTERING")
print("="*50)
# Estimate bandwidth using different percentiles
from sklearn.cluster import estimate_bandwidth
# Try different bandwidth estimation methods
bandwidth_candidates = []
# Method 1: sklearn's estimate_bandwidth
try:
bw_est = estimate_bandwidth(self.embeddings_scaled, quantile=0.3, n_samples=min(500, len(self.embeddings_scaled)))
if bw_est > 0:
bandwidth_candidates.extend([bw_est * 0.5, bw_est, bw_est * 1.5])
except:
pass
# Method 2: nearest neighbor distances
neighbors = NearestNeighbors(n_neighbors=10)
neighbors_fit = neighbors.fit(self.embeddings_scaled)
distances, _ = neighbors_fit.kneighbors(self.embeddings_scaled)
mean_dist = np.mean(distances[:, 1:]) # Exclude self-distance
bandwidth_candidates.extend([mean_dist * 0.5, mean_dist, mean_dist * 1.5])
# Remove duplicates and invalid values
bandwidth_candidates = list(set([bw for bw in bandwidth_candidates if bw > 0]))
if not bandwidth_candidates:
bandwidth_candidates = [0.5, 1.0, 1.5, 2.0]
best_score = -1
best_bandwidth = None
best_labels = None
print("Testing different bandwidth values...")
for bandwidth in bandwidth_candidates:
try:
mean_shift = MeanShift(bandwidth=bandwidth)
labels = mean_shift.fit_predict(self.embeddings_scaled)
n_clusters = len(set(labels))
if 2 <= n_clusters <= len(self.embeddings_scaled) // 3:
score = silhouette_score(self.embeddings_scaled, labels)
print(f"bandwidth={bandwidth:.4f}: {n_clusters} clusters, silhouette={score:.4f}")
if score > best_score:
best_score = score
best_bandwidth = bandwidth
best_labels = labels
except Exception as e:
print(f"bandwidth={bandwidth:.4f}: failed ({str(e)[:50]}...)")
continue
if best_labels is not None:
n_clusters = len(set(best_labels))
print(f"\nBest Mean Shift result:")
print(f"Bandwidth: {best_bandwidth:.4f}")
print(f"Number of clusters: {n_clusters}")
print(f"Silhouette score: {best_score:.4f}")
return best_labels
else:
print("Mean Shift could not find suitable clusters")
return None
def run_affinity_propagation(self):
"""Run Affinity Propagation clustering"""
print("\n" + "="*50)
print("RUNNING AFFINITY PROPAGATION CLUSTERING")
print("="*50)
# Calculate similarity matrix
similarities = -np.sum((self.embeddings_scaled[:, np.newaxis] - self.embeddings_scaled)**2, axis=2)
# Try different preference values (percentiles of similarity matrix diagonal)
preference_candidates = [
np.percentile(similarities.diagonal(), 10),
np.percentile(similarities.diagonal(), 25),
np.percentile(similarities.diagonal(), 50),
np.median(similarities),
np.percentile(similarities.diagonal(), 75)
]
damping_candidates = [0.5, 0.7, 0.8, 0.9]
best_score = -1
best_params = None
best_labels = None
print("Testing different parameter combinations...")
for preference in preference_candidates:
for damping in damping_candidates:
try:
affinity_prop = AffinityPropagation(
preference=preference,
damping=damping,
random_state=42,
max_iter=200
)
labels = affinity_prop.fit_predict(self.embeddings_scaled)
n_clusters = len(set(labels))
if 2 <= n_clusters <= len(self.embeddings_scaled) // 3:
score = silhouette_score(self.embeddings_scaled, labels)
print(f"preference={preference:.2f}, damping={damping:.1f}: {n_clusters} clusters, silhouette={score:.4f}")
if score > best_score:
best_score = score
best_params = (preference, damping)
best_labels = labels
except Exception as e:
print(f"preference={preference:.2f}, damping={damping:.1f}: failed ({str(e)[:30]}...)")
continue
if best_labels is not None:
n_clusters = len(set(best_labels))
print(f"\nBest Affinity Propagation result:")
print(f"Parameters: preference={best_params[0]:.2f}, damping={best_params[1]:.1f}")
print(f"Number of clusters: {n_clusters}")
print(f"Silhouette score: {best_score:.4f}")
return best_labels
else:
print("Affinity Propagation could not find suitable clusters")
return None
def visualize_results(self, results_dict):
"""Visualize clustering results using PCA"""
if not results_dict:
print("No results to visualize")
return
# Reduce dimensions for visualization
pca = PCA(n_components=2, random_state=42)
embeddings_2d = pca.fit_transform(self.embeddings_scaled)
n_methods = len(results_dict)
fig, axes = plt.subplots(1, n_methods, figsize=(5*n_methods, 4))
if n_methods == 1:
axes = [axes]
for idx, (method_name, labels) in enumerate(results_dict.items()):
# Handle noise points in DBSCAN (label -1)
unique_labels = set(labels)
colors = plt.cm.Set1(np.linspace(0, 1, len(unique_labels)))
for label, color in zip(unique_labels, colors):
if label == -1:
# Noise points in black
mask = labels == label
axes[idx].scatter(embeddings_2d[mask, 0], embeddings_2d[mask, 1],
c='black', marker='x', s=20, alpha=0.5, label='Noise')
else:
mask = labels == label
axes[idx].scatter(embeddings_2d[mask, 0], embeddings_2d[mask, 1],
c=[color], s=50, alpha=0.7, label=f'Cluster {label}')
axes[idx].set_title(f'{method_name}\n({len(set(labels)) - (1 if -1 in labels else 0)} clusters)')
axes[idx].set_xlabel('PCA Component 1')
axes[idx].set_ylabel('PCA Component 2')
axes[idx].grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('auto_clustering_results.png', dpi=300, bbox_inches='tight')
plt.show()
print(f"\nVisualization saved as 'auto_clustering_results.png'")
def save_results(self, results_dict):
"""Save clustering results to JSON files"""
print(results_dict.items())
check_method_name = []
print(len(results_dict))
for method_name, labels in results_dict.items():
check_method_name.append(method_name)
# Create results for each method
method_results = []
print(method_name == 'DBSCAN')
for filepath, label in zip(self.file_paths, labels):
if method_name == 'DBSCAN':
if label == -1:
is_noise = True
else:
is_noise = False
else:
is_noise = False
method_results.append({
"filepath": filepath,
"cluster": int(label),
"is_noise": is_noise
})
print('method_name', set(check_method_name))
print(method_results[0]['is_noise'])
print(method_results[0])
# Save to file
filename = f"{method_name.lower().replace(' ', '_')}_results.json"
with open(filename, 'w') as f:
json.dump({
"method": method_name,
"n_clusters": len(set(labels)) - (1 if -1 in labels else 0),
"n_samples": len(labels),
"results": method_results
}, f, indent=4)
print(f"Results saved to {filename}")
def run_all_methods(self):
"""Run all automatic clustering methods"""
print("\n" + "="*70)
print("AUTOMATIC CLUSTERING ANALYSIS")
print("="*70)
print(f"Dataset: {len(self.file_paths)} documents")
print(f"Embedding dimension: {self.embeddings.shape[1]}")
results = {}
# Run DBSCAN
dbscan_labels = self.run_dbscan()
if dbscan_labels is not None:
results["DBSCAN"] = dbscan_labels
# Run Mean Shift
# meanshift_labels = self.run_mean_shift()
# if meanshift_labels is not None:
# results["Mean Shift"] = meanshift_labels
# Run Affinity Propagation
# affinity_labels = self.run_affinity_propagation()
# if affinity_labels is not None:
# results["Affinity Propagation"] = affinity_labels
# Summary
if results:
print("\n" + "="*70)
print("SUMMARY OF RESULTS")
print("="*70)
for method, labels in results.items():
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
if method == "DBSCAN":
n_noise = list(labels).count(-1)
print(f"{method}: {n_clusters} clusters, {n_noise} noise points")
else:
print(f"{method}: {n_clusters} clusters")
# Calculate agreement between methods if multiple succeeded
if len(results) > 1:
from sklearn.metrics import adjusted_rand_score
print("\nMethod Agreement (Adjusted Rand Index):")
method_names = list(results.keys())
for i in range(len(method_names)):
for j in range(i+1, len(method_names)):
ari = adjusted_rand_score(results[method_names[i]], results[method_names[j]])
print(f"{method_names[i]} vs {method_names[j]}: {ari:.4f}")
# Visualize and save results
self.visualize_results(results)
self.save_results(results)
else:
print("\nNo automatic clustering method found suitable clusters.")
print("This might indicate:")
print("- Data doesn't have clear cluster structure")
print("- Embeddings need different preprocessing")
print("- Different parameter ranges needed")
return results
def main():
parser = argparse.ArgumentParser(description="Run automatic clustering methods on document embeddings")
parser.add_argument("--embeddings_path", help="Path to embeddings JSON file")
parser.add_argument("--method", choices=['dbscan', 'meanshift', 'affinity', 'all'], default='all',
help="Which automatic method to run")
args = parser.parse_args()
# Initialize clustering
clustering = AutoClustering(args.embeddings_path)
# Run selected method(s)
if args.method == 'all':
clustering.run_all_methods()
elif args.method == 'dbscan':
labels = clustering.run_dbscan()
if labels is not None:
clustering.visualize_results({"DBSCAN": labels})
clustering.save_results({"DBSCAN": labels})
elif args.method == 'meanshift':
labels = clustering.run_mean_shift()
if labels is not None:
clustering.visualize_results({"Mean Shift": labels})
clustering.save_results({"Mean Shift": labels})
elif args.method == 'affinity':
labels = clustering.run_affinity_propagation()
if labels is not None:
clustering.visualize_results({"Affinity Propagation": labels})
clustering.save_results({"Affinity Propagation": labels})
if __name__ == "__main__":
main()

711
cluster/auto_cluster.py Normal file
View File

@@ -0,0 +1,711 @@
#!/usr/bin/env python3
"""
Simple script to run automatic clustering methods (DBSCAN, Mean Shift, Affinity Propagation)
These methods don't require specifying the number of clusters beforehand.
"""
import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN, MeanShift, AffinityPropagation
from sklearn.preprocessing import normalize
from sklearn.metrics import silhouette_score
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA
import argparse
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime
import numpy as np
def value_counts(a, dropna=False):
a_flat = a.ravel()
if dropna and np.issubdtype(a.dtype, np.floating):
mask = ~np.isnan(a_flat)
a_flat = a_flat[mask]
uniq, counts = np.unique(a_flat, return_counts=True)
uniq = list(uniq)
counts = list(counts)
return dict(zip(uniq, counts))
class AutoClustering:
def __init__(self, embeddings_path):
self.embeddings_path = embeddings_path
self.embeddings = None
self.file_paths = None
self.load_embeddings()
def load_embeddings(self):
"""Load embeddings from JSON file"""
print(f"Loading embeddings from {self.embeddings_path}...")
with open(self.embeddings_path, 'r') as f:
data = json.load(f)
self.file_paths = []
embeddings_list = []
for item in data:
self.file_paths.append(item['filepath'])
embeddings_list.append(item['embedding'])
self.embeddings = np.array(embeddings_list, dtype=np.float32)
print(f"Loaded {len(self.file_paths)} samples with embedding dimension {self.embeddings.shape[1]}")
# Normalize embeddings using L2 normalization for cosine distance
self.embeddings_normalized = normalize(self.embeddings, norm='l2', axis=1)
print("Applied L2 normalization to embeddings")
sims = cosine_similarity(self.embeddings)
print(self.embeddings.shape)
# lấy upper triangle exclude diagonal để inspect
triu_idxs = np.triu_indices_from(sims, k=1)
dist_vals = sims[triu_idxs]
print(dist_vals.shape)
print("mean sim:", dist_vals.mean(), "std:", dist_vals.std())
def run_dbscan(self):
"""Run DBSCAN with extensive grid search for parameter estimation"""
print("\n" + "="*50)
print("RUNNING DBSCAN CLUSTERING WITH EXTENSIVE GRID SEARCH")
print("="*50)
# Method 1: K-nearest neighbors approach with multiple k values
# eps_candidates = []
# # Try different k values for nearest neighbors with cosine metric
# k_values = [5, 10, 15, 20, 25, 30]
# for k in k_values:
# k_actual = min(k, len(self.embeddings_normalized) // 4)
# if k_actual < 3:
# continue
# neighbors = NearestNeighbors(n_neighbors=k_actual, metric='cosine')
# neighbors_fit = neighbors.fit(self.embeddings_normalized)
# distances, indices = neighbors_fit.kneighbors(self.embeddings_normalized)
# # Sort distances and use k-th nearest neighbor distance
# distances = np.sort(distances, axis=0)
# kth_distances = distances[:, k_actual-1]
# # Multiple percentile thresholds for each k
# percentiles = [60, 65, 70, 75, 80, 85, 90, 95]
# for p in percentiles:
# eps_candidates.append(np.percentile(kth_distances, p))
# # Method 2: Statistical measures using cosine distances
# # Calculate cosine distances for a sample of data points
# sample_size = min(1000, len(self.embeddings_normalized))
# sample_indices = np.random.choice(len(self.embeddings_normalized), sample_size, replace=False)
# sample_data = self.embeddings_normalized[sample_indices]
# from scipy.spatial.distance import pdist
# cosine_distances = pdist(sample_data, metric='cosine')
# # Add statistical measures as eps candidates using cosine distances
# eps_candidates.extend([
# np.mean(cosine_distances) * 0.3,
# np.mean(cosine_distances) * 0.4,
# np.mean(cosine_distances) * 0.5,
# np.mean(cosine_distances) * 0.6,
# np.mean(cosine_distances) * 0.7,
# np.median(cosine_distances) * 0.3,
# np.median(cosine_distances) * 0.4,
# np.median(cosine_distances) * 0.5,
# np.median(cosine_distances) * 0.6,
# np.std(cosine_distances) * 0.5,
# np.std(cosine_distances) * 0.8,
# np.std(cosine_distances) * 1.0,
# np.std(cosine_distances) * 1.2
# ])
# Method 3: Manual eps values for cosine distances (0-2 range)
manual_eps = [0.001, 0.002, 0.005, 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5,
0.6, 0.7, 0.8, 0.9, 1.0]
# eps_candidates.extend(manual_eps)
# eps_candidates = manual_eps
eps_candidates = [0.2]
# Remove duplicates and invalid values, then sort
eps_candidates = sorted(list(set([eps for eps in eps_candidates if eps > 0])))
# Extensive min_samples candidates
# min_samples_candidates = [3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 18, 20, 25, 30, 35, 40, 50]
# Filter min_samples based on dataset size
# max_min_samples = len(self.embeddings_normalized) // 10 # At most 10% of data
# min_samples_candidates = [ms for ms in min_samples_candidates if ms <= max_min_samples]
min_samples_candidates = [50]
best_score = -1
best_params = None
best_labels = None
print(f"Testing {len(eps_candidates)} eps values x {len(min_samples_candidates)} min_samples values")
print(f"Total combinations: {len(eps_candidates) * len(min_samples_candidates)}")
print("This may take a while...\n")
# Track all results for analysis
all_results = []
total_combinations = len(eps_candidates) * len(min_samples_candidates)
current_combination = 0
for eps in eps_candidates:
for min_samples in min_samples_candidates:
current_combination += 1
# Progress indicator
if current_combination % 50 == 0 or current_combination == total_combinations:
progress = (current_combination / total_combinations) * 100
print(f"Progress: {current_combination}/{total_combinations} ({progress:.1f}%)")
try:
dbscan = DBSCAN(eps=eps, min_samples=min_samples, metric='cosine')
labels = dbscan.fit_predict(self.embeddings_normalized)
# Check if we have meaningful clusters
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
n_noise = list(labels).count(-1)
noise_ratio = n_noise / len(labels)
# Store result for analysis
result_info = {
'eps': eps,
'min_samples': min_samples,
'n_clusters': n_clusters,
'n_noise': n_noise,
'noise_ratio': noise_ratio
}
# Check if we have meaningful clusters
print(n_clusters, n_noise, noise_ratio, eps, min_samples)
if n_clusters >= 2 and noise_ratio < 0.9: # Not too many noise points
# Calculate silhouette score (excluding noise) using cosine metric
mask = labels != -1
if np.sum(mask) > 1:
try:
score = silhouette_score(self.embeddings_normalized[mask], labels[mask], metric='cosine')
result_info['silhouette_score'] = score
# Print promising results
if score > 0.1: # Only show decent scores
print(f"eps={eps:.4f}, min_samples={min_samples}: {n_clusters} clusters, {n_noise} noise ({noise_ratio:.1%}), silhouette={score:.4f}")
print(value_counts(labels))
if score > best_score:
best_score = score
best_params = (eps, min_samples)
best_labels = labels
except Exception:
result_info['silhouette_score'] = None
else:
result_info['silhouette_score'] = None
all_results.append(result_info)
except Exception as e:
import traceback
traceback.print_exc()
# Skip problematic parameter combinations
continue
# Analysis of results
print("\n" + "="*50)
print("DBSCAN GRID SEARCH ANALYSIS")
print("="*50)
if all_results:
# Convert to numpy for easier analysis
import pandas as pd
df_results = pd.DataFrame(all_results)
print(f"Total parameter combinations tested: {len(df_results)}")
# Valid results (with clusters)
valid_results = df_results[df_results['n_clusters'] >= 2]
print(f"Combinations that produced clusters: {len(valid_results)}")
if len(valid_results) > 0:
# Best silhouette scores
scored_results = valid_results.dropna(subset=['silhouette_score'])
if len(scored_results) > 0:
print(f"Combinations with valid silhouette scores: {len(scored_results)}")
print(f"Best silhouette score: {scored_results['silhouette_score'].max():.4f}")
print(f"Mean silhouette score: {scored_results['silhouette_score'].mean():.4f}")
# Top 5 results
top_results = scored_results.nlargest(5, 'silhouette_score')
print("\nTop 5 parameter combinations:")
for idx, row in top_results.iterrows():
print(f" eps={row['eps']:.4f}, min_samples={row['min_samples']}: "
f"{row['n_clusters']} clusters, silhouette={row['silhouette_score']:.4f}")
# Cluster count distribution
cluster_counts = valid_results['n_clusters'].value_counts().sort_index()
print("\nCluster count distribution:")
for n_clusters, count in cluster_counts.items():
print(f" {n_clusters} clusters: {count} parameter combinations")
print("\n📁 SAVING DETAILED RESULTS...")
print("="*30)
# Save detailed grid search results to JSON file
self.save_dbscan_grid_search_results(all_results, best_params, best_score)
if best_labels is not None:
n_clusters = len(set(best_labels)) - (1 if -1 in best_labels else 0)
n_noise = list(best_labels).count(-1)
print("\nBest DBSCAN result:")
print(f"Parameters: eps={best_params[0]:.4f}, min_samples={best_params[1]}")
print(f"Number of clusters: {n_clusters}")
print(f"Number of noise points: {n_noise} ({n_noise/len(best_labels)*100:.1f}%)")
print(f"Silhouette score: {best_score:.4f}")
return best_labels
else:
print("DBSCAN could not find suitable clusters with the extensive grid search")
print("Consider:")
print("- Adjusting the embedding space (different model or preprocessing)")
print("- Using different clustering algorithms")
print("- Manual parameter tuning based on domain knowledge")
return None
def save_dbscan_grid_search_results(self, all_results, best_params, best_score):
"""Save detailed DBSCAN grid search results to JSON file"""
import datetime
# Prepare comprehensive results data
grid_search_data = {
"experiment_info": {
"timestamp": datetime.datetime.now().isoformat(),
"dataset_path": self.embeddings_path,
"total_samples": len(self.file_paths),
"embedding_dimension": self.embeddings.shape[1],
"total_combinations_tested": len(all_results)
},
"best_result": {
"eps": best_params[0] if best_params else None,
"min_samples": best_params[1] if best_params else None,
"silhouette_score": best_score if best_score > -1 else None
},
"all_trials": []
}
# Add all trial results
for i, result in enumerate(all_results):
trial_data = {
"trial_id": i + 1,
"parameters": {
"eps": result['eps'],
"min_samples": result['min_samples']
},
"results": {
"n_clusters": result['n_clusters'],
"n_noise": result['n_noise'],
"noise_ratio": result['noise_ratio'],
"silhouette_score": result['silhouette_score']
},
"status": "success" if result['silhouette_score'] is not None else "failed"
}
grid_search_data["all_trials"].append(trial_data)
# Calculate summary statistics
valid_trials = [t for t in grid_search_data["all_trials"] if t["status"] == "success"]
if valid_trials:
silhouette_scores = [t["results"]["silhouette_score"] for t in valid_trials if t["results"]["silhouette_score"] is not None]
if silhouette_scores:
grid_search_data["summary_statistics"] = {
"total_trials": len(all_results),
"successful_trials": len(valid_trials),
"success_rate": len(valid_trials) / len(all_results),
"best_silhouette_score": max(silhouette_scores),
"worst_silhouette_score": min(silhouette_scores),
"mean_silhouette_score": sum(silhouette_scores) / len(silhouette_scores),
"median_silhouette_score": sorted(silhouette_scores)[len(silhouette_scores)//2]
}
# Top 10 results
sorted_valid_trials = sorted(valid_trials,
key=lambda x: x["results"]["silhouette_score"] if x["results"]["silhouette_score"] is not None else -1,
reverse=True)
grid_search_data["top_10_results"] = sorted_valid_trials[:10]
# Parameter analysis
eps_values = [t["parameters"]["eps"] for t in valid_trials]
min_samples_values = [t["parameters"]["min_samples"] for t in valid_trials]
grid_search_data["parameter_analysis"] = {
"eps_range": {
"min": min(eps_values),
"max": max(eps_values),
"mean": sum(eps_values) / len(eps_values)
},
"min_samples_range": {
"min": min(min_samples_values),
"max": max(min_samples_values),
"mean": sum(min_samples_values) / len(min_samples_values)
}
}
# Save to file with timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
# filename = f"dbscan_grid_search_detailed_{timestamp}.json"
filename = "dbscan_grid_search_detailed.json"
print(grid_search_data.keys())
print(type(grid_search_data['parameter_analysis']))
with open(filename, 'w') as f:
json.dump(grid_search_data, f, indent=4, ensure_ascii=False)
print(f"Detailed grid search results saved to: {filename}")
# Also save a CSV summary for easy analysis
# csv_filename = f"dbscan_grid_search_summary_{timestamp}.csv"
csv_filename = "dbscan_grid_search_summary.csv"
self.save_grid_search_csv(all_results, csv_filename)
print(f"Grid search summary CSV saved to: {csv_filename}")
def save_grid_search_csv(self, all_results, filename):
"""Save grid search results as CSV for easy analysis"""
import csv
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['trial_id', 'eps', 'min_samples', 'n_clusters', 'n_noise',
'noise_ratio', 'silhouette_score', 'status']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for i, result in enumerate(all_results):
writer.writerow({
'trial_id': i + 1,
'eps': result['eps'],
'min_samples': result['min_samples'],
'n_clusters': result['n_clusters'],
'n_noise': result['n_noise'],
'noise_ratio': result['noise_ratio'],
'silhouette_score': result['silhouette_score'],
'status': 'success' if result['silhouette_score'] is not None else 'failed'
})
def run_mean_shift(self):
"""Run Mean Shift clustering"""
print("\n" + "="*50)
print("RUNNING MEAN SHIFT CLUSTERING")
print("="*50)
# Estimate bandwidth using different percentiles with cosine metric
from sklearn.cluster import estimate_bandwidth
# Try different bandwidth estimation methods
bandwidth_candidates = []
# Method 1: sklearn's estimate_bandwidth (note: estimate_bandwidth doesn't support cosine directly)
try:
bw_est = estimate_bandwidth(self.embeddings_normalized, quantile=0.3, n_samples=min(500, len(self.embeddings_normalized)))
if bw_est > 0:
bandwidth_candidates.extend([bw_est * 0.5, bw_est, bw_est * 1.5])
except Exception:
pass
# Method 2: nearest neighbor cosine distances
neighbors = NearestNeighbors(n_neighbors=10, metric='cosine')
neighbors_fit = neighbors.fit(self.embeddings_normalized)
distances, _ = neighbors_fit.kneighbors(self.embeddings_normalized)
mean_dist = np.mean(distances[:, 1:]) # Exclude self-distance
bandwidth_candidates.extend([mean_dist * 0.5, mean_dist, mean_dist * 1.5])
# Remove duplicates and invalid values
bandwidth_candidates = list(set([bw for bw in bandwidth_candidates if bw > 0]))
if not bandwidth_candidates:
bandwidth_candidates = [0.5, 1.0, 1.5, 2.0]
best_score = -1
best_bandwidth = None
best_labels = None
print("Testing different bandwidth values...")
for bandwidth in bandwidth_candidates:
try:
mean_shift = MeanShift(bandwidth=bandwidth)
labels = mean_shift.fit_predict(self.embeddings_normalized)
n_clusters = len(set(labels))
if 2 <= n_clusters <= len(self.embeddings_normalized) // 3:
score = silhouette_score(self.embeddings_normalized, labels, metric='cosine')
print(f"bandwidth={bandwidth:.4f}: {n_clusters} clusters, silhouette={score:.4f}")
if score > best_score:
best_score = score
best_bandwidth = bandwidth
best_labels = labels
except Exception as e:
print(f"bandwidth={bandwidth:.4f}: failed ({str(e)[:50]}...)")
continue
if best_labels is not None:
n_clusters = len(set(best_labels))
print("\nBest Mean Shift result:")
print(f"Bandwidth: {best_bandwidth:.4f}")
print(f"Number of clusters: {n_clusters}")
print(f"Silhouette score: {best_score:.4f}")
return best_labels
else:
print("Mean Shift could not find suitable clusters")
return None
def run_affinity_propagation(self):
"""Run Affinity Propagation clustering"""
print("\n" + "="*50)
print("RUNNING AFFINITY PROPAGATION CLUSTERING")
print("="*50)
# Calculate similarity matrix using cosine similarity
# Convert cosine distance to cosine similarity: similarity = 1 - distance
from sklearn.metrics.pairwise import cosine_similarity
similarities = cosine_similarity(self.embeddings_normalized)
# Try different preference values (percentiles of similarity matrix diagonal)
preference_candidates = [
np.percentile(similarities.diagonal(), 10),
np.percentile(similarities.diagonal(), 25),
np.percentile(similarities.diagonal(), 50),
np.median(similarities),
np.percentile(similarities.diagonal(), 75)
]
damping_candidates = [0.5, 0.7, 0.8, 0.9]
best_score = -1
best_params = None
best_labels = None
print("Testing different parameter combinations...")
for preference in preference_candidates:
for damping in damping_candidates:
try:
affinity_prop = AffinityPropagation(
preference=preference,
damping=damping,
random_state=42,
max_iter=200
)
labels = affinity_prop.fit_predict(self.embeddings_normalized)
n_clusters = len(set(labels))
if 2 <= n_clusters <= len(self.embeddings_normalized) // 3:
score = silhouette_score(self.embeddings_normalized, labels, metric='cosine')
print(f"preference={preference:.2f}, damping={damping:.1f}: {n_clusters} clusters, silhouette={score:.4f}")
if score > best_score:
best_score = score
best_params = (preference, damping)
best_labels = labels
except Exception as e:
print(f"preference={preference:.2f}, damping={damping:.1f}: failed ({str(e)[:30]}...)")
continue
if best_labels is not None:
n_clusters = len(set(best_labels))
print("\nBest Affinity Propagation result:")
print(f"Parameters: preference={best_params[0]:.2f}, damping={best_params[1]:.1f}")
print(f"Number of clusters: {n_clusters}")
print(f"Silhouette score: {best_score:.4f}")
return best_labels
else:
print("Affinity Propagation could not find suitable clusters")
return None
def visualize_results(self, results_dict):
"""Visualize clustering results using PCA"""
if not results_dict:
print("No results to visualize")
return
# Reduce dimensions for visualization
pca = PCA(n_components=2, random_state=42)
embeddings_2d = pca.fit_transform(self.embeddings_normalized)
n_methods = len(results_dict)
fig, axes = plt.subplots(1, n_methods, figsize=(5*n_methods, 4))
if n_methods == 1:
axes = [axes]
for idx, (method_name, labels) in enumerate(results_dict.items()):
# Handle noise points in DBSCAN (label -1)
unique_labels = set(labels)
colors = plt.cm.Set1(np.linspace(0, 1, len(unique_labels)))
for label, color in zip(unique_labels, colors):
if label == -1:
# Noise points in black
mask = labels == label
axes[idx].scatter(embeddings_2d[mask, 0], embeddings_2d[mask, 1],
c='black', marker='x', s=20, alpha=0.5, label='Noise')
else:
mask = labels == label
axes[idx].scatter(embeddings_2d[mask, 0], embeddings_2d[mask, 1],
c=[color], s=50, alpha=0.7, label=f'Cluster {label}')
axes[idx].set_title(f'{method_name}\n({len(set(labels)) - (1 if -1 in labels else 0)} clusters)')
axes[idx].set_xlabel('PCA Component 1')
axes[idx].set_ylabel('PCA Component 2')
axes[idx].grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('auto_clustering_results.png', dpi=300, bbox_inches='tight')
plt.show()
print("\nVisualization saved as 'auto_clustering_results.png'")
def save_results(self, results_dict):
"""Save clustering results to JSON files"""
print(results_dict.items())
check_method_name = []
print(len(results_dict))
for method_name, labels in results_dict.items():
check_method_name.append(method_name)
# Create results for each method
method_results = []
print(method_name == 'DBSCAN')
for filepath, label in zip(self.file_paths, labels):
if method_name == 'DBSCAN':
if label == -1:
is_noise = True
else:
is_noise = False
else:
is_noise = False
method_results.append({
"filepath": filepath,
"cluster": int(label),
"is_noise": is_noise
})
print('method_name', set(check_method_name))
print(method_results[0]['is_noise'])
print(method_results[0])
# Save to file
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"{method_name.lower().replace(' ', '_')}_results_{timestamp}.json"
with open(filename, 'w') as f:
json.dump({
"method": method_name,
"n_clusters": len(set(labels)) - (1 if -1 in labels else 0),
"n_samples": len(labels),
"results": method_results
}, f, indent=4)
print(f"Results saved to {filename}")
def run_all_methods(self):
"""Run all automatic clustering methods"""
print("\n" + "="*70)
print("AUTOMATIC CLUSTERING ANALYSIS")
print("="*70)
print(f"Dataset: {len(self.file_paths)} documents")
print(f"Embedding dimension: {self.embeddings.shape[1]}")
results = {}
# Run DBSCAN
dbscan_labels = self.run_dbscan()
if dbscan_labels is not None:
results["DBSCAN"] = dbscan_labels
# Run Mean Shift
# meanshift_labels = self.run_mean_shift()
# if meanshift_labels is not None:
# results["Mean Shift"] = meanshift_labels
# Run Affinity Propagation
# affinity_labels = self.run_affinity_propagation()
# if affinity_labels is not None:
# results["Affinity Propagation"] = affinity_labels
# Summary
if results:
print("\n" + "="*70)
print("SUMMARY OF RESULTS")
print("="*70)
for method, labels in results.items():
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
if method == "DBSCAN":
n_noise = list(labels).count(-1)
print(f"{method}: {n_clusters} clusters, {n_noise} noise points")
else:
print(f"{method}: {n_clusters} clusters")
# Calculate agreement between methods if multiple succeeded
if len(results) > 1:
from sklearn.metrics import adjusted_rand_score
print("\nMethod Agreement (Adjusted Rand Index):")
method_names = list(results.keys())
for i in range(len(method_names)):
for j in range(i+1, len(method_names)):
ari = adjusted_rand_score(results[method_names[i]], results[method_names[j]])
print(f"{method_names[i]} vs {method_names[j]}: {ari:.4f}")
# Visualize and save results
self.visualize_results(results)
self.save_results(results)
else:
print("\nNo automatic clustering method found suitable clusters.")
print("This might indicate:")
print("- Data doesn't have clear cluster structure")
print("- Embeddings need different preprocessing")
print("- Different parameter ranges needed")
return results
def main():
parser = argparse.ArgumentParser(description="Run automatic clustering methods on document embeddings")
parser.add_argument("--embeddings_path", help="Path to embeddings JSON file")
parser.add_argument("--method", choices=['None', 'dbscan', 'meanshift', 'affinity', 'all'], default='all',
help="Which automatic method to run")
args = parser.parse_args()
# Initialize clustering
clustering = AutoClustering(args.embeddings_path)
# Run selected method(s)
if args.method == 'all':
clustering.run_all_methods()
elif args.method == 'dbscan':
labels = clustering.run_dbscan()
if labels is not None:
clustering.visualize_results({"DBSCAN": labels})
clustering.save_results({"DBSCAN": labels})
elif args.method == 'meanshift':
labels = clustering.run_mean_shift()
if labels is not None:
clustering.visualize_results({"Mean Shift": labels})
clustering.save_results({"Mean Shift": labels})
elif args.method == 'affinity':
labels = clustering.run_affinity_propagation()
if labels is not None:
clustering.visualize_results({"Affinity Propagation": labels})
clustering.save_results({"Affinity Propagation": labels})
elif args.method == 'None':
pass
if __name__ == "__main__":
main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 364 KiB

View File

@@ -0,0 +1,2 @@
trial_id,eps,min_samples,n_clusters,n_noise,noise_ratio,silhouette_score,status
1,0.2,50,5,374,0.13357142857142856,0.6100894212722778,success
1 trial_id eps min_samples n_clusters n_noise noise_ratio silhouette_score status
2 1 0.2 50 5 374 0.13357142857142856 0.6100894212722778 success

View File

@@ -0,0 +1,23 @@
trial_id,eps,min_samples,n_clusters,n_noise,noise_ratio,silhouette_score,status
1,0.001,50,0,2800,1.0,,failed
2,0.002,50,0,2800,1.0,,failed
3,0.005,50,0,2800,1.0,,failed
4,0.01,50,2,2436,0.87,0.8994060754776001,success
5,0.02,50,2,2220,0.7928571428571428,0.7592437863349915,success
6,0.03,50,1,2168,0.7742857142857142,,failed
7,0.04,50,1,2157,0.7703571428571429,,failed
8,0.05,50,2,2089,0.7460714285714286,0.8926841616630554,success
9,0.1,50,6,1204,0.43,0.6831505298614502,success
10,0.15,50,4,645,0.23035714285714284,0.6648684740066528,success
11,0.2,50,5,374,0.13357142857142856,0.6100894212722778,success
12,0.25,50,3,258,0.09214285714285714,0.41854172945022583,success
13,0.3,50,1,210,0.075,,failed
14,0.35,50,1,163,0.05821428571428571,,failed
15,0.4,50,1,145,0.05178571428571429,,failed
16,0.45,50,1,123,0.04392857142857143,,failed
17,0.5,50,1,107,0.038214285714285715,,failed
18,0.6,50,1,23,0.008214285714285714,,failed
19,0.7,50,1,0,0.0,,failed
20,0.8,50,1,0,0.0,,failed
21,0.9,50,1,0,0.0,,failed
22,1.0,50,1,0,0.0,,failed
1 trial_id eps min_samples n_clusters n_noise noise_ratio silhouette_score status
2 1 0.001 50 0 2800 1.0 failed
3 2 0.002 50 0 2800 1.0 failed
4 3 0.005 50 0 2800 1.0 failed
5 4 0.01 50 2 2436 0.87 0.8994060754776001 success
6 5 0.02 50 2 2220 0.7928571428571428 0.7592437863349915 success
7 6 0.03 50 1 2168 0.7742857142857142 failed
8 7 0.04 50 1 2157 0.7703571428571429 failed
9 8 0.05 50 2 2089 0.7460714285714286 0.8926841616630554 success
10 9 0.1 50 6 1204 0.43 0.6831505298614502 success
11 10 0.15 50 4 645 0.23035714285714284 0.6648684740066528 success
12 11 0.2 50 5 374 0.13357142857142856 0.6100894212722778 success
13 12 0.25 50 3 258 0.09214285714285714 0.41854172945022583 success
14 13 0.3 50 1 210 0.075 failed
15 14 0.35 50 1 163 0.05821428571428571 failed
16 15 0.4 50 1 145 0.05178571428571429 failed
17 16 0.45 50 1 123 0.04392857142857143 failed
18 17 0.5 50 1 107 0.038214285714285715 failed
19 18 0.6 50 1 23 0.008214285714285714 failed
20 19 0.7 50 1 0 0.0 failed
21 20 0.8 50 1 0 0.0 failed
22 21 0.9 50 1 0 0.0 failed
23 22 1.0 50 1 0 0.0 failed

View File

@@ -0,0 +1,23 @@
trial_id,eps,min_samples,n_clusters,n_noise,noise_ratio,silhouette_score,status
1,0.001,50,0,2800,1.0,,failed
2,0.002,50,0,2800,1.0,,failed
3,0.005,50,0,2800,1.0,,failed
4,0.01,50,2,2436,0.87,0.8994060754776001,success
5,0.02,50,2,2220,0.7928571428571428,0.7592437863349915,success
6,0.03,50,1,2168,0.7742857142857142,,failed
7,0.04,50,1,2157,0.7703571428571429,,failed
8,0.05,50,2,2089,0.7460714285714286,0.8926841616630554,success
9,0.1,50,6,1204,0.43,0.6831505298614502,success
10,0.15,50,4,645,0.23035714285714284,0.6648684740066528,success
11,0.2,50,5,374,0.13357142857142856,0.6100894212722778,success
12,0.25,50,3,258,0.09214285714285714,0.41854172945022583,success
13,0.3,50,1,210,0.075,,failed
14,0.35,50,1,163,0.05821428571428571,,failed
15,0.4,50,1,145,0.05178571428571429,,failed
16,0.45,50,1,123,0.04392857142857143,,failed
17,0.5,50,1,107,0.038214285714285715,,failed
18,0.6,50,1,23,0.008214285714285714,,failed
19,0.7,50,1,0,0.0,,failed
20,0.8,50,1,0,0.0,,failed
21,0.9,50,1,0,0.0,,failed
22,1.0,50,1,0,0.0,,failed
1 trial_id eps min_samples n_clusters n_noise noise_ratio silhouette_score status
2 1 0.001 50 0 2800 1.0 failed
3 2 0.002 50 0 2800 1.0 failed
4 3 0.005 50 0 2800 1.0 failed
5 4 0.01 50 2 2436 0.87 0.8994060754776001 success
6 5 0.02 50 2 2220 0.7928571428571428 0.7592437863349915 success
7 6 0.03 50 1 2168 0.7742857142857142 failed
8 7 0.04 50 1 2157 0.7703571428571429 failed
9 8 0.05 50 2 2089 0.7460714285714286 0.8926841616630554 success
10 9 0.1 50 6 1204 0.43 0.6831505298614502 success
11 10 0.15 50 4 645 0.23035714285714284 0.6648684740066528 success
12 11 0.2 50 5 374 0.13357142857142856 0.6100894212722778 success
13 12 0.25 50 3 258 0.09214285714285714 0.41854172945022583 success
14 13 0.3 50 1 210 0.075 failed
15 14 0.35 50 1 163 0.05821428571428571 failed
16 15 0.4 50 1 145 0.05178571428571429 failed
17 16 0.45 50 1 123 0.04392857142857143 failed
18 17 0.5 50 1 107 0.038214285714285715 failed
19 18 0.6 50 1 23 0.008214285714285714 failed
20 19 0.7 50 1 0 0.0 failed
21 20 0.8 50 1 0 0.0 failed
22 21 0.9 50 1 0 0.0 failed
23 22 1.0 50 1 0 0.0 failed

Binary file not shown.

After

Width:  |  Height:  |  Size: 747 KiB

649
cluster/gmm_extensive.py Normal file
View File

@@ -0,0 +1,649 @@
#!/usr/bin/env python3
"""
Extensive Gaussian Mixture Model clustering with grid search for optimal parameters
Includes BIC and AIC metrics for model selection
"""
import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from sklearn.decomposition import PCA
import datetime
import csv
import argparse
import warnings
warnings.filterwarnings('ignore')
class GMMExtensiveClustering:
def __init__(self, embeddings_path):
self.embeddings_path = embeddings_path
self.embeddings = None
self.file_paths = None
self.load_embeddings()
def load_embeddings(self):
"""Load embeddings from JSON file"""
print(f"Loading embeddings from {self.embeddings_path}...")
with open(self.embeddings_path, 'r') as f:
data = json.load(f)
self.file_paths = []
embeddings_list = []
for item in data:
self.file_paths.append(item['filepath'])
embeddings_list.append(item['embedding'])
self.embeddings = np.array(embeddings_list, dtype=np.float32)
print(f"Loaded {len(self.file_paths)} samples with embedding dimension {self.embeddings.shape[1]}")
# Standardize embeddings for better clustering
self.scaler = StandardScaler()
self.embeddings_scaled = self.scaler.fit_transform(self.embeddings)
def run_gmm_grid_search(self):
"""Run GMM with optimized grid search for faster execution"""
print("\n" + "="*70)
print("RUNNING GAUSSIAN MIXTURE MODEL CLUSTERING WITH OPTIMIZED GRID SEARCH")
print("="*70)
# Optimized GMM parameter candidates for faster execution
# Smart n_components range with larger steps
max_components = min(50, len(self.embeddings_scaled) // 20) # Reduced max and increased divisor
n_components_candidates = []
# Progressive step sizes: smaller steps for low numbers, larger for high
for n in range(2, max_components + 1):
if n <= 5:
n_components_candidates.append(n) # 2, 3, 4, 5
elif n <= 10:
if n % 2 == 0: # 6, 8, 10
n_components_candidates.append(n)
else:
if n % 3 == 2: # 11, 14, 17, 20
n_components_candidates.append(n)
# Reduced covariance types - focus on most important ones
covariance_types = [
# 'full', 'diag',
'tied', 'spherical'
] # Removed 'tied' and 'spherical' as they're less common
# Simplified regularization - focus on key values
reg_covar_candidates = [1e-5, 1e-4, 1e-3] # Removed extreme values
# Reduced n_init - 1 is often sufficient for good initialization methods
n_init_candidates = [1, 5] # Removed 10 to save time
# Focus on best initialization methods
init_params_candidates = ['kmeans', 'k-means++'] # Removed 'random' and 'random_from_data'
# Simplified max_iter - most problems converge quickly
max_iter_candidates = [100, 300] # Removed 500, added 300 as middle ground
print(f"Optimized parameter combinations:")
print(f" - n_components: {len(n_components_candidates)} values {n_components_candidates}")
print(f" - covariance_types: {len(covariance_types)} options {covariance_types}")
print(f" - reg_covar: {len(reg_covar_candidates)} values {reg_covar_candidates}")
print(f" - n_init: {len(n_init_candidates)} values {n_init_candidates}")
print(f" - init_params: {len(init_params_candidates)} options {init_params_candidates}")
print(f" - max_iter: {len(max_iter_candidates)} values {max_iter_candidates}")
total_combinations = (len(n_components_candidates) * len(covariance_types) *
len(reg_covar_candidates) * len(n_init_candidates) *
len(init_params_candidates) * len(max_iter_candidates))
print(f"Total combinations: {total_combinations} (optimized for speed)")
# Estimate time
estimated_time_per_combination = 0.5 # seconds (conservative estimate)
estimated_total_time = total_combinations * estimated_time_per_combination
print(f"Estimated runtime: {estimated_total_time/60:.1f} minutes")
print("This should be much faster...\n")
# Track all results for analysis
all_results = []
# Early stopping criteria for speed optimization
early_stopping_threshold = 0.7 # If we find a very good silhouette score, we can be less exhaustive
good_results_found = 0
max_good_results = 5 # Stop early if we find several very good results
best_bic_score = float('inf')
best_aic_score = float('inf')
best_silhouette_score = -1
best_params_bic = None
best_params_aic = None
best_params_silhouette = None
best_labels_bic = None
best_labels_aic = None
best_labels_silhouette = None
current_combination = 0
# Optimized iteration order: test simpler models first (fewer components, simpler covariance)
for covariance_type in covariance_types: # Start with covariance type
for n_components in n_components_candidates: # Then components
for init_params in init_params_candidates: # Good initialization methods
for reg_covar in reg_covar_candidates: # Regularization
for n_init in n_init_candidates: # Number of initializations
for max_iter in max_iter_candidates: # Iterations last
current_combination += 1
# Progress indicator with time estimation
if current_combination % 50 == 0 or current_combination == total_combinations:
progress = (current_combination / total_combinations) * 100
print(f"Progress: {current_combination}/{total_combinations} ({progress:.1f}%) - "
f"Best scores so far: BIC={best_bic_score:.2f}, Silhouette={best_silhouette_score:.3f}")
try:
# Early convergence check for faster models
tol = 1e-3 if n_components <= 5 else 1e-4 # Less strict tolerance for simple models
# Run GMM
gmm = GaussianMixture(
n_components=n_components,
covariance_type=covariance_type,
reg_covar=reg_covar,
n_init=n_init,
init_params=init_params,
max_iter=max_iter,
tol=tol, # Added tolerance for faster convergence
random_state=42
)
# Fit and predict
gmm.fit(self.embeddings_scaled)
labels = gmm.predict(self.embeddings_scaled)
# Quick validation - skip if model didn't converge properly
if not gmm.converged_ and max_iter <= 100:
continue # Skip non-converged simple models
# Calculate metrics
bic_score = gmm.bic(self.embeddings_scaled)
aic_score = gmm.aic(self.embeddings_scaled)
log_likelihood = gmm.score(self.embeddings_scaled)
# Only calculate clustering metrics if we have multiple clusters
if len(set(labels)) > 1:
silhouette = silhouette_score(self.embeddings_scaled, labels)
calinski_harabasz = calinski_harabasz_score(self.embeddings_scaled, labels)
davies_bouldin = davies_bouldin_score(self.embeddings_scaled, labels)
# Early stopping check
if silhouette > early_stopping_threshold:
good_results_found += 1
print(f"🎯 Excellent result found: n_comp={n_components}, cov={covariance_type}, "
f"silhouette={silhouette:.4f}")
else:
silhouette = -1
calinski_harabasz = 0
davies_bouldin = float('inf')
# Store result for analysis
result_info = {
'n_components': n_components,
'covariance_type': covariance_type,
'reg_covar': reg_covar,
'n_init': n_init,
'init_params': init_params,
'max_iter': max_iter,
'bic_score': bic_score,
'aic_score': aic_score,
'log_likelihood': log_likelihood,
'silhouette_score': silhouette,
'calinski_harabasz_score': calinski_harabasz,
'davies_bouldin_score': davies_bouldin,
'converged': gmm.converged_,
'n_iter': gmm.n_iter_,
'unique_clusters': len(set(labels))
}
all_results.append(result_info)
# Print promising results
if (silhouette > 0.3 and bic_score < np.percentile([r['bic_score'] for r in all_results], 25)):
print(f"n_components={n_components}, cov={covariance_type}, init={init_params}: "
f"BIC={bic_score:.2f}, AIC={aic_score:.2f}, silhouette={silhouette:.4f}")
# Track best results for different criteria
if bic_score < best_bic_score:
best_bic_score = bic_score
best_params_bic = {
'n_components': n_components,
'covariance_type': covariance_type,
'reg_covar': reg_covar,
'n_init': n_init,
'init_params': init_params,
'max_iter': max_iter
}
best_labels_bic = labels
if aic_score < best_aic_score:
best_aic_score = aic_score
best_params_aic = {
'n_components': n_components,
'covariance_type': covariance_type,
'reg_covar': reg_covar,
'n_init': n_init,
'init_params': init_params,
'max_iter': max_iter
}
best_labels_aic = labels
if silhouette > best_silhouette_score and len(set(labels)) > 1:
best_silhouette_score = silhouette
best_params_silhouette = {
'n_components': n_components,
'covariance_type': covariance_type,
'reg_covar': reg_covar,
'n_init': n_init,
'init_params': init_params,
'max_iter': max_iter
}
best_labels_silhouette = labels
# Early stopping check
if good_results_found >= 5 and silhouette > 0.6:
print(f"🛑 Early stopping triggered: Found {good_results_found} excellent results. "
f"Stopping at {current_combination}/{total_combinations} combinations.")
break
except Exception:
# Skip problematic parameter combinations
continue
# Break from nested loops if early stopping triggered
if good_results_found >= 5 and best_silhouette_score > 0.6:
break
if good_results_found >= 5 and best_silhouette_score > 0.6:
break
if good_results_found >= 5 and best_silhouette_score > 0.6:
break
if good_results_found >= 5 and best_silhouette_score > 0.6:
break
if good_results_found >= 5 and best_silhouette_score > 0.6:
break
# Analysis of results
print("\n" + "="*70)
print("GAUSSIAN MIXTURE MODEL GRID SEARCH ANALYSIS")
print("="*70)
if all_results:
import pandas as pd
df_results = pd.DataFrame(all_results)
print(f"Total parameter combinations tested: {len(df_results)}")
# Filter results with valid clustering (more than 1 cluster)
valid_results = df_results[df_results['unique_clusters'] > 1]
print(f"Combinations with valid clustering: {len(valid_results)}")
if len(valid_results) > 0:
# Best scores analysis
print(f"\nModel Selection Metrics:")
print(f"Best BIC score: {df_results['bic_score'].min():.2f}")
print(f"Best AIC score: {df_results['aic_score'].min():.2f}")
print(f"Best Log-Likelihood: {df_results['log_likelihood'].max():.2f}")
print(f"\nClustering Quality Metrics:")
print(f"Best silhouette score: {valid_results['silhouette_score'].max():.4f}")
print(f"Mean silhouette score: {valid_results['silhouette_score'].mean():.4f}")
print(f"Best Calinski-Harabasz score: {valid_results['calinski_harabasz_score'].max():.2f}")
print(f"Best Davies-Bouldin score: {valid_results['davies_bouldin_score'].min():.4f}")
# Top results by different criteria
print(f"\nTop 5 results by BIC (lower is better):")
top_bic = df_results.nsmallest(5, 'bic_score')
for idx, row in top_bic.iterrows():
print(f" n_comp={row['n_components']}, cov={row['covariance_type']}: "
f"BIC={row['bic_score']:.2f}, AIC={row['aic_score']:.2f}")
print(f"\nTop 5 results by AIC (lower is better):")
top_aic = df_results.nsmallest(5, 'aic_score')
for idx, row in top_aic.iterrows():
print(f" n_comp={row['n_components']}, cov={row['covariance_type']}: "
f"BIC={row['bic_score']:.2f}, AIC={row['aic_score']:.2f}")
if len(valid_results) > 0:
print(f"\nTop 5 results by Silhouette Score:")
top_silhouette = valid_results.nlargest(5, 'silhouette_score')
for idx, row in top_silhouette.iterrows():
print(f" n_comp={row['n_components']}, cov={row['covariance_type']}: "
f"silhouette={row['silhouette_score']:.4f}")
# Component count analysis
component_performance = df_results.groupby('n_components').agg({
'bic_score': 'min',
'aic_score': 'min',
'silhouette_score': 'max'
}).reset_index()
print(f"\nComponent count analysis (top 10 by BIC):")
top_components = component_performance.nsmallest(10, 'bic_score')
for idx, row in top_components.iterrows():
print(f" {row['n_components']} components: "
f"BIC={row['bic_score']:.2f}, AIC={row['aic_score']:.2f}, "
f"silhouette={row['silhouette_score']:.4f}")
print(f"\n📁 SAVING DETAILED RESULTS...")
print("="*30)
# Save detailed grid search results
self.save_gmm_grid_search_results(all_results,
best_params_bic, best_bic_score,
best_params_aic, best_aic_score,
best_params_silhouette, best_silhouette_score)
# Return best results based on BIC (primary), AIC (secondary), Silhouette (tertiary)
results = {
'bic': (best_labels_bic, best_params_bic, best_bic_score),
'aic': (best_labels_aic, best_params_aic, best_aic_score),
'silhouette': (best_labels_silhouette, best_params_silhouette, best_silhouette_score)
}
# Print best results
if best_labels_bic is not None:
print(f"\nBest GMM result by BIC:")
print(f"Parameters: {best_params_bic}")
print(f"BIC score: {best_bic_score:.2f}")
if best_labels_aic is not None:
print(f"\nBest GMM result by AIC:")
print(f"Parameters: {best_params_aic}")
print(f"AIC score: {best_aic_score:.2f}")
if best_labels_silhouette is not None:
print(f"\nBest GMM result by Silhouette:")
print(f"Parameters: {best_params_silhouette}")
print(f"Silhouette score: {best_silhouette_score:.4f}")
return results
def save_gmm_grid_search_results(self, all_results,
best_params_bic, best_bic_score,
best_params_aic, best_aic_score,
best_params_silhouette, best_silhouette_score):
"""Save detailed GMM grid search results to JSON file"""
# Prepare comprehensive results data
grid_search_data = {
"experiment_info": {
"timestamp": datetime.datetime.now().isoformat(),
"dataset_path": self.embeddings_path,
"total_samples": len(self.file_paths),
"embedding_dimension": self.embeddings.shape[1],
"total_combinations_tested": len(all_results),
"method": "Gaussian Mixture Model"
},
"best_results": {
"by_bic": {
"parameters": best_params_bic,
"bic_score": best_bic_score if best_bic_score != float('inf') else None
},
"by_aic": {
"parameters": best_params_aic,
"aic_score": best_aic_score if best_aic_score != float('inf') else None
},
"by_silhouette": {
"parameters": best_params_silhouette,
"silhouette_score": best_silhouette_score if best_silhouette_score > -1 else None
}
},
"all_trials": []
}
# Add all trial results
for i, result in enumerate(all_results):
trial_data = {
"trial_id": i + 1,
"parameters": {
"n_components": result['n_components'],
"covariance_type": result['covariance_type'],
"reg_covar": result['reg_covar'],
"n_init": result['n_init'],
"init_params": result['init_params'],
"max_iter": result['max_iter']
},
"results": {
"bic_score": result['bic_score'],
"aic_score": result['aic_score'],
"log_likelihood": result['log_likelihood'],
"silhouette_score": result['silhouette_score'],
"calinski_harabasz_score": result['calinski_harabasz_score'],
"davies_bouldin_score": result['davies_bouldin_score'],
"converged": result['converged'],
"n_iter": result['n_iter'],
"unique_clusters": result['unique_clusters']
}
}
grid_search_data["all_trials"].append(trial_data)
# Calculate summary statistics
if all_results:
bic_scores = [r['bic_score'] for r in all_results]
aic_scores = [r['aic_score'] for r in all_results]
log_likelihoods = [r['log_likelihood'] for r in all_results]
valid_silhouette = [r['silhouette_score'] for r in all_results if r['silhouette_score'] > -1]
grid_search_data["summary_statistics"] = {
"total_trials": len(all_results),
"valid_clustering_trials": len(valid_silhouette),
"bic_score": {
"best": min(bic_scores),
"worst": max(bic_scores),
"mean": sum(bic_scores) / len(bic_scores),
"median": sorted(bic_scores)[len(bic_scores)//2]
},
"aic_score": {
"best": min(aic_scores),
"worst": max(aic_scores),
"mean": sum(aic_scores) / len(aic_scores),
"median": sorted(aic_scores)[len(aic_scores)//2]
},
"log_likelihood": {
"best": max(log_likelihoods),
"worst": min(log_likelihoods),
"mean": sum(log_likelihoods) / len(log_likelihoods)
}
}
if valid_silhouette:
grid_search_data["summary_statistics"]["silhouette_score"] = {
"best": max(valid_silhouette),
"worst": min(valid_silhouette),
"mean": sum(valid_silhouette) / len(valid_silhouette),
"median": sorted(valid_silhouette)[len(valid_silhouette)//2]
}
# Top 10 results by different criteria
sorted_by_bic = sorted(all_results, key=lambda x: x['bic_score'])
sorted_by_aic = sorted(all_results, key=lambda x: x['aic_score'])
valid_results = [r for r in all_results if r['silhouette_score'] > -1]
sorted_by_silhouette = sorted(valid_results, key=lambda x: x['silhouette_score'], reverse=True)
grid_search_data["top_10_results"] = {
"by_bic": [],
"by_aic": [],
"by_silhouette": []
}
for i, result in enumerate(sorted_by_bic[:10]):
grid_search_data["top_10_results"]["by_bic"].append({
"rank": i + 1,
"parameters": {
"n_components": result['n_components'],
"covariance_type": result['covariance_type'],
"init_params": result['init_params']
},
"bic_score": result['bic_score'],
"aic_score": result['aic_score']
})
for i, result in enumerate(sorted_by_aic[:10]):
grid_search_data["top_10_results"]["by_aic"].append({
"rank": i + 1,
"parameters": {
"n_components": result['n_components'],
"covariance_type": result['covariance_type'],
"init_params": result['init_params']
},
"bic_score": result['bic_score'],
"aic_score": result['aic_score']
})
for i, result in enumerate(sorted_by_silhouette[:10]):
grid_search_data["top_10_results"]["by_silhouette"].append({
"rank": i + 1,
"parameters": {
"n_components": result['n_components'],
"covariance_type": result['covariance_type'],
"init_params": result['init_params']
},
"silhouette_score": result['silhouette_score']
})
# Save to file with timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"gmm_grid_search_detailed_{timestamp}.json"
# print()
# with open(filename, 'w') as f:
# json.dump(grid_search_data, f, indent=4, ensure_ascii=False)
print(f"Detailed grid search results saved to: {filename}")
# Also save a CSV summary for easy analysis
csv_filename = f"gmm_grid_search_summary_{timestamp}.csv"
self.save_grid_search_csv(all_results, csv_filename)
print(f"Grid search summary CSV saved to: {csv_filename}")
def save_grid_search_csv(self, all_results, filename):
"""Save grid search results as CSV for easy analysis"""
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['trial_id', 'n_components', 'covariance_type', 'reg_covar',
'n_init', 'init_params', 'max_iter', 'bic_score', 'aic_score',
'log_likelihood', 'silhouette_score', 'calinski_harabasz_score',
'davies_bouldin_score', 'converged', 'n_iter', 'unique_clusters']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for i, result in enumerate(all_results):
writer.writerow({
'trial_id': i + 1,
'n_components': result['n_components'],
'covariance_type': result['covariance_type'],
'reg_covar': result['reg_covar'],
'n_init': result['n_init'],
'init_params': result['init_params'],
'max_iter': result['max_iter'],
'bic_score': result['bic_score'],
'aic_score': result['aic_score'],
'log_likelihood': result['log_likelihood'],
'silhouette_score': result['silhouette_score'],
'calinski_harabasz_score': result['calinski_harabasz_score'],
'davies_bouldin_score': result['davies_bouldin_score'],
'converged': result['converged'],
'n_iter': result['n_iter'],
'unique_clusters': result['unique_clusters']
})
def visualize_results(self, results):
"""Visualize clustering results using PCA"""
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
# Reduce dimensions for visualization
pca = PCA(n_components=2, random_state=42)
embeddings_2d = pca.fit_transform(self.embeddings_scaled)
methods = ['bic', 'aic', 'silhouette']
titles = ['Best by BIC', 'Best by AIC', 'Best by Silhouette']
for idx, (method, title) in enumerate(zip(methods, titles)):
labels, params, score = results[method]
if labels is not None:
unique_labels = set(labels)
colors = plt.cm.Set3(np.linspace(0, 1, len(unique_labels)))
for label, color in zip(unique_labels, colors):
mask = labels == label
axes[idx].scatter(embeddings_2d[mask, 0], embeddings_2d[mask, 1],
c=[color], s=50, alpha=0.7, label=f'Cluster {label}')
axes[idx].set_title(f'{title}\nn_components={params["n_components"]}, '
f'cov={params["covariance_type"]}')
else:
axes[idx].text(0.5, 0.5, 'No valid clustering', ha='center', va='center',
transform=axes[idx].transAxes, fontsize=12)
axes[idx].set_title(f'{title}\n(Failed)')
axes[idx].set_xlabel('PCA Component 1')
axes[idx].set_ylabel('PCA Component 2')
axes[idx].grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('gmm_clustering_results.png', dpi=300, bbox_inches='tight')
plt.show()
print(f"Visualization saved as 'gmm_clustering_results.png'")
def save_clustering_results(self, results):
"""Save final clustering results to JSON files"""
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
for method in ['bic', 'aic', 'silhouette']:
labels, params, score = results[method]
if labels is not None:
clustering_results = []
for filepath, label in zip(self.file_paths, labels):
clustering_results.append({
"filepath": filepath,
"cluster": int(label)
})
filename = f"gmm_final_results_{method}_{timestamp}.json"
with open(filename, 'w') as f:
json.dump({
"method": f"GMM (best by {method.upper()})",
"parameters": params,
"n_components": params['n_components'],
"n_samples": len(labels),
f"{method}_score": score,
"results": clustering_results
}, f, indent=4)
print(f"Final clustering results ({method}) saved to: {filename}")
def main():
parser = argparse.ArgumentParser(description="Run extensive Gaussian Mixture Model clustering on document embeddings")
parser.add_argument("--embeddings_path", required=True, help="Path to embeddings JSON file")
args = parser.parse_args()
# Initialize clustering
clustering = GMMExtensiveClustering(args.embeddings_path)
# Run extensive grid search
results = clustering.run_gmm_grid_search()
if any(labels is not None for labels, _, _ in results.values()):
# Visualize and save results
clustering.visualize_results(results)
clustering.save_clustering_results(results)
print("\nGMM extensive clustering completed successfully!")
else:
print("\nGMM extensive clustering did not find suitable clusters.")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,414 @@
trial_id,n_components,covariance_type,reg_covar,n_init,init_params,max_iter,bic_score,aic_score,log_likelihood,silhouette_score,calinski_harabasz_score,davies_bouldin_score,converged,n_iter,unique_clusters
1,2,full,0.0001,1,kmeans,100,17260132.605124418,-7679507.0,2871.501,0.36924269795417786,1331.6861572265625,1.080913887973297,True,2,2
2,2,full,0.0001,1,kmeans,300,17260132.605124418,-7679507.0,2871.501,0.36924269795417786,1331.6861572265625,1.080913887973297,True,2,2
3,2,full,0.0001,5,kmeans,100,17260132.605124418,-7679507.0,2871.501,0.36924269795417786,1331.6861572265625,1.080913887973297,True,2,2
4,2,full,0.0001,5,kmeans,300,17260132.605124418,-7679507.0,2871.501,0.36924269795417786,1331.6861572265625,1.080913887973297,True,2,2
5,2,full,0.001,1,kmeans,100,20844797.605124418,-4094842.0,2231.382,0.36924269795417786,1331.6861572265625,1.080913887973297,True,2,2
6,2,full,0.001,1,kmeans,300,20844797.605124418,-4094842.0,2231.382,0.36924269795417786,1331.6861572265625,1.080913887973297,True,2,2
7,2,full,0.001,5,kmeans,100,20844797.605124418,-4094842.0,2231.382,0.36924269795417786,1331.6861572265625,1.080913887973297,True,2,2
8,2,full,0.001,5,kmeans,300,20844797.605124418,-4094842.0,2231.382,0.36924269795417786,1331.6861572265625,1.080913887973297,True,2,2
9,2,full,0.0001,1,k-means++,100,17370120.605124418,-7569519.0,2851.86,0.3683019280433655,1320.3240966796875,1.0772816604479254,True,3,2
10,2,full,0.0001,1,k-means++,300,17370120.605124418,-7569519.0,2851.86,0.3683019280433655,1320.3240966796875,1.0772816604479254,True,3,2
11,2,full,0.0001,5,k-means++,100,17270534.605124418,-7669105.0,2869.6433,0.3693194091320038,1331.4493408203125,1.0799693510874797,True,3,2
12,2,full,0.0001,5,k-means++,300,17270534.605124418,-7669105.0,2869.6433,0.3693194091320038,1331.4493408203125,1.0799693510874797,True,3,2
13,2,full,0.001,1,k-means++,100,20919727.605124418,-4019912.0,2218.0017,0.3683019280433655,1320.3240966796875,1.0772816604479254,True,3,2
14,2,full,0.001,1,k-means++,300,20919727.605124418,-4019912.0,2218.0017,0.3683019280433655,1320.3240966796875,1.0772816604479254,True,3,2
15,2,full,0.001,5,k-means++,100,20851959.605124418,-4087680.0,2230.1033,0.3693194091320038,1331.4493408203125,1.0799693510874797,True,3,2
16,2,full,0.001,5,k-means++,300,20851959.605124418,-4087680.0,2230.1033,0.3693194091320038,1331.4493408203125,1.0799693510874797,True,3,2
17,3,full,0.0001,1,kmeans,100,33833558.37637398,-3575904.0,2888.795,0.37564584612846375,781.5426635742188,0.7905502894819209,True,2,3
18,3,full,0.0001,1,kmeans,300,33833558.37637398,-3575904.0,2888.795,0.37564584612846375,781.5426635742188,0.7905502894819209,True,2,3
19,3,full,0.0001,5,kmeans,100,26462676.376373976,-10946786.0,4205.024,0.2200196236371994,918.8184814453125,1.8325651201939497,True,2,3
20,3,full,0.0001,5,kmeans,300,26462676.376373976,-10946786.0,4205.024,0.2200196236371994,918.8184814453125,1.8325651201939497,True,2,3
21,3,full,0.001,1,kmeans,100,37452100.37637398,42638.0,2242.6267,0.37564584612846375,781.5426635742188,0.7905502894819209,True,2,3
22,3,full,0.001,1,kmeans,300,37452100.37637398,42638.0,2242.6267,0.37564584612846375,781.5426635742188,0.7905502894819209,True,2,3
23,3,full,0.001,5,kmeans,100,33411843.376373976,-3997619.0,2964.1013,0.2200196236371994,918.8184814453125,1.8325651201939497,True,2,3
24,3,full,0.001,5,kmeans,300,33411843.376373976,-3997619.0,2964.1013,0.2200196236371994,918.8184814453125,1.8325651201939497,True,2,3
25,3,full,0.001,1,k-means++,100,37089716.37637398,-319746.0,2307.3381,0.3671606779098511,710.0538940429688,1.7711288790751016,True,3,3
26,3,full,0.001,1,k-means++,300,37089716.37637398,-319746.0,2307.3381,0.3671606779098511,710.0538940429688,1.7711288790751016,True,3,3
27,3,full,0.001,5,k-means++,100,33366158.376373976,-4043304.0,2972.2593,0.19688381254673004,834.6746215820312,2.265938022603405,True,3,3
28,3,full,0.001,5,k-means++,300,33366158.376373976,-4043304.0,2972.2593,0.19688381254673004,834.6746215820312,2.265938022603405,True,3,3
29,4,full,0.0001,1,kmeans,100,44240183.14762353,-5639102.0,4007.3035,0.310958594083786,829.0513916015625,1.0906616124036408,True,2,4
30,4,full,0.0001,1,kmeans,300,44240183.14762353,-5639102.0,4007.3035,0.310958594083786,829.0513916015625,1.0906616124036408,True,2,4
31,4,full,0.0001,5,kmeans,100,38759701.14762353,-11119584.0,4985.961,0.2639731168746948,873.2352905273438,1.6723937598752374,True,2,4
32,4,full,0.0001,5,kmeans,300,38759701.14762353,-11119584.0,4985.961,0.2639731168746948,873.2352905273438,1.6723937598752374,True,2,4
33,4,full,0.001,1,kmeans,100,50532988.14762353,653703.0,2883.5884,0.310958594083786,829.0513916015625,1.0906616124036408,True,2,4
34,4,full,0.001,1,kmeans,300,50532988.14762353,653703.0,2883.5884,0.310958594083786,829.0513916015625,1.0906616124036408,True,2,4
35,4,full,0.001,5,kmeans,100,47456739.14762353,-2422546.0,3432.9185,0.2639731168746948,873.2352905273438,1.6723937598752374,True,2,4
36,4,full,0.001,5,kmeans,300,47456739.14762353,-2422546.0,3432.9185,0.2639731168746948,873.2352905273438,1.6723937598752374,True,2,4
37,4,full,0.001,1,k-means++,100,50369343.14762353,490058.0,2912.8108,0.16492997109889984,579.3992309570312,2.14603204385876,True,3,4
38,4,full,0.001,1,k-means++,300,50369343.14762353,490058.0,2912.8108,0.16492997109889984,579.3992309570312,2.14603204385876,True,3,4
39,4,full,0.001,5,k-means++,100,48104059.14762353,-1775226.0,3317.3257,0.19116489589214325,729.421630859375,2.391271825318095,True,3,4
40,4,full,0.001,5,k-means++,300,48104059.14762353,-1775226.0,3317.3257,0.19116489589214325,729.421630859375,2.391271825318095,True,3,4
41,5,full,0.0001,1,kmeans,100,60034171.91887309,-2314936.0,4163.7827,0.3162730038166046,780.908935546875,1.0143329238161003,True,2,5
42,5,full,0.0001,1,kmeans,300,60034171.91887309,-2314936.0,4163.7827,0.3162730038166046,780.908935546875,1.0143329238161003,True,2,5
43,5,full,0.0001,5,kmeans,100,54230057.91887309,-8119050.0,5200.232,0.25592249631881714,721.9691162109375,1.585169064053077,True,2,5
44,5,full,0.0001,5,kmeans,300,54230057.91887309,-8119050.0,5200.232,0.25592249631881714,721.9691162109375,1.585169064053077,True,2,5
45,5,full,0.001,1,kmeans,100,66698922.91887309,4349815.0,2973.6487,0.3162730038166046,780.908935546875,1.0143329238161003,True,2,5
46,5,full,0.001,1,kmeans,300,66698922.91887309,4349815.0,2973.6487,0.3162730038166046,780.908935546875,1.0143329238161003,True,2,5
47,5,full,0.001,5,kmeans,100,63375271.91887309,1026164.0,3567.158,0.25592249631881714,721.9691162109375,1.585169064053077,True,2,5
48,5,full,0.001,5,kmeans,300,63375271.91887309,1026164.0,3567.158,0.25592249631881714,721.9691162109375,1.585169064053077,True,2,5
49,5,full,0.001,1,k-means++,100,66517075.91887309,4167968.0,3006.1213,0.16880138218402863,549.265625,1.8881643569801063,True,3,5
50,5,full,0.001,1,k-means++,300,66517075.91887309,4167968.0,3006.1213,0.16880138218402863,549.265625,1.8881643569801063,True,3,5
51,5,full,0.001,5,k-means++,100,63364071.91887309,1014964.0,3569.158,0.25286975502967834,715.984619140625,1.6093410042807197,True,3,5
52,5,full,0.001,5,k-means++,300,63364071.91887309,1014964.0,3569.158,0.25286975502967834,715.984619140625,1.6093410042807197,True,3,5
53,6,full,0.0001,1,kmeans,100,73062550.69012265,-1756380.0,4814.121,0.24853873252868652,668.8661499023438,1.646429379523011,True,2,6
54,6,full,0.0001,1,kmeans,300,73062550.69012265,-1756380.0,4814.121,0.24853873252868652,668.8661499023438,1.646429379523011,True,2,6
55,6,full,0.0001,5,kmeans,100,69890932.69012265,-4927998.0,5380.4814,0.27074411511421204,655.6273193359375,1.6294192539951549,True,2,6
56,6,full,0.0001,5,kmeans,300,69890932.69012265,-4927998.0,5380.4814,0.27074411511421204,655.6273193359375,1.6294192539951549,True,2,6
57,6,full,0.001,1,kmeans,100,81179056.69012265,6360126.0,3364.745,0.24853873252868652,668.8661499023438,1.646429379523011,True,2,6
58,6,full,0.001,1,kmeans,300,81179056.69012265,6360126.0,3364.745,0.24853873252868652,668.8661499023438,1.646429379523011,True,2,6
59,6,full,0.001,5,kmeans,100,79356620.69012265,4537690.0,3690.18,0.27074411511421204,655.6273193359375,1.6294192539951549,True,2,6
60,6,full,0.001,5,kmeans,300,79356620.69012265,4537690.0,3690.18,0.27074411511421204,655.6273193359375,1.6294192539951549,True,2,6
61,6,full,1e-05,1,k-means++,100,68753328.69012265,-6065602.0,5583.625,0.17110876739025116,480.8229675292969,1.5572656008570327,True,3,6
62,6,full,1e-05,1,k-means++,300,68753328.69012265,-6065602.0,5583.625,0.17110876739025116,480.8229675292969,1.5572656008570327,True,3,6
63,6,full,0.0001,1,k-means++,100,75948732.69012265,1129802.0,4298.7314,0.17110876739025116,480.8229675292969,1.5572656008570327,True,3,6
64,6,full,0.0001,1,k-means++,300,75948732.69012265,1129802.0,4298.7314,0.17110876739025116,480.8229675292969,1.5572656008570327,True,3,6
65,6,full,0.0001,5,k-means++,100,69381502.69012265,-5437428.0,5471.451,0.21281521022319794,580.2518920898438,2.180165862436555,True,3,6
66,6,full,0.0001,5,k-means++,300,69381502.69012265,-5437428.0,5471.451,0.21281521022319794,580.2518920898438,2.180165862436555,True,3,6
67,6,full,0.001,1,k-means++,100,83185656.69012265,8366726.0,3006.4236,0.17110876739025116,480.8229675292969,1.5572656008570327,True,3,6
68,6,full,0.001,1,k-means++,300,83185656.69012265,8366726.0,3006.4236,0.17110876739025116,480.8229675292969,1.5572656008570327,True,3,6
69,6,full,0.001,5,k-means++,100,79079084.69012265,4260154.0,3739.74,0.21281521022319794,580.2518920898438,2.180165862436555,True,3,6
70,6,full,0.001,5,k-means++,300,79079084.69012265,4260154.0,3739.74,0.21281521022319794,580.2518920898438,2.180165862436555,True,3,6
71,8,full,0.0001,1,kmeans,100,101322900.23262176,1564326.0,5721.299,0.2680038809776306,557.3214721679688,1.5652706740038278,True,2,8
72,8,full,0.0001,1,kmeans,300,101322900.23262176,1564326.0,5721.299,0.2680038809776306,557.3214721679688,1.5652706740038278,True,2,8
73,8,full,0.0001,5,kmeans,100,100810002.23262176,1051428.0,5812.8877,0.27481919527053833,642.9092407226562,1.6967101819134367,True,2,8
74,8,full,0.0001,5,kmeans,300,100810002.23262176,1051428.0,5812.8877,0.27481919527053833,642.9092407226562,1.6967101819134367,True,2,8
75,8,full,0.001,1,kmeans,100,111448618.23262176,11690044.0,3913.135,0.2680038809776306,557.3214721679688,1.5652706740038278,True,2,8
76,8,full,0.001,1,kmeans,300,111448618.23262176,11690044.0,3913.135,0.2680038809776306,557.3214721679688,1.5652706740038278,True,2,8
77,8,full,0.001,5,kmeans,100,111172686.23262176,11414112.0,3962.4087,0.27481919527053833,642.9092407226562,1.6967101819134367,True,2,8
78,8,full,0.001,5,kmeans,300,111172686.23262176,11414112.0,3962.4087,0.27481919527053833,642.9092407226562,1.6967101819134367,True,2,8
79,8,full,0.001,1,k-means++,100,111979964.23262176,12221390.0,3818.2517,0.2020130306482315,465.200927734375,1.9463124697846808,True,3,8
80,8,full,0.001,1,k-means++,300,111979964.23262176,12221390.0,3818.2517,0.2020130306482315,465.200927734375,1.9463124697846808,True,3,8
81,8,full,0.001,5,k-means++,100,111327662.23262176,11569088.0,3934.7344,0.2736768126487732,617.4371948242188,1.7398856934277325,True,3,8
82,8,full,0.001,5,k-means++,300,111327662.23262176,11569088.0,3934.7344,0.2736768126487732,617.4371948242188,1.7398856934277325,True,3,8
83,10,full,0.0001,1,kmeans,100,133265705.77512088,8567482.0,5970.8955,0.24388161301612854,576.40185546875,1.5385559411472558,True,2,10
84,10,full,0.0001,1,kmeans,300,133265705.77512088,8567482.0,5970.8955,0.24388161301612854,576.40185546875,1.5385559411472558,True,2,10
85,10,full,0.0001,5,kmeans,100,132892239.77512088,8194016.0,6037.586,0.28627628087997437,557.144775390625,1.6716653781194553,True,2,10
86,10,full,0.0001,5,kmeans,300,132892239.77512088,8194016.0,6037.586,0.28627628087997437,557.144775390625,1.6716653781194553,True,2,10
87,10,full,0.001,1,kmeans,100,143970687.77512088,19272464.0,4059.2915,0.24388161301612854,576.40185546875,1.5385559411472558,True,2,10
88,10,full,0.001,1,kmeans,300,143970687.77512088,19272464.0,4059.2915,0.24388161301612854,576.40185546875,1.5385559411472558,True,2,10
89,10,full,0.001,5,kmeans,100,143652495.77512088,18954272.0,4116.1113,0.28627628087997437,557.144775390625,1.6716653781194553,True,2,10
90,10,full,0.001,5,kmeans,300,143652495.77512088,18954272.0,4116.1113,0.28627628087997437,557.144775390625,1.6716653781194553,True,2,10
91,10,full,0.001,1,k-means++,100,144482919.77512088,19784696.0,3967.8215,0.17508849501609802,474.4588928222656,1.846488092509191,True,3,10
92,10,full,0.001,1,k-means++,300,144482919.77512088,19784696.0,3967.8215,0.17508849501609802,474.4588928222656,1.846488092509191,True,3,10
93,10,full,0.001,5,k-means++,100,144071547.77512088,19373324.0,4041.2808,0.22849640250205994,521.3035278320312,1.9523215129883376,True,3,10
94,10,full,0.001,5,k-means++,300,144071547.77512088,19373324.0,4041.2808,0.22849640250205994,521.3035278320312,1.9523215129883376,True,3,10
95,11,full,0.0001,1,kmeans,100,149128048.54637042,11960004.0,6115.1685,0.2577499449253082,598.6676635742188,1.5193188313170118,True,2,11
96,11,full,0.0001,1,kmeans,300,149128048.54637042,11960004.0,6115.1685,0.2577499449253082,598.6676635742188,1.5193188313170118,True,2,11
97,11,full,0.0001,5,kmeans,100,149128048.54637042,11960004.0,6115.1685,0.2577499449253082,598.6676635742188,1.5193188313170118,True,2,11
98,11,full,0.0001,5,kmeans,300,149128048.54637042,11960004.0,6115.1685,0.2577499449253082,598.6676635742188,1.5193188313170118,True,2,11
99,11,full,0.001,1,kmeans,100,160074600.54637042,22906556.0,4160.4272,0.2577499449253082,598.6676635742188,1.5193188313170118,True,2,11
100,11,full,0.001,1,kmeans,300,160074600.54637042,22906556.0,4160.4272,0.2577499449253082,598.6676635742188,1.5193188313170118,True,2,11
101,11,full,0.001,5,kmeans,100,160074600.54637042,22906556.0,4160.4272,0.2577499449253082,598.6676635742188,1.5193188313170118,True,2,11
102,11,full,0.001,5,kmeans,300,160074600.54637042,22906556.0,4160.4272,0.2577499449253082,598.6676635742188,1.5193188313170118,True,2,11
103,11,full,0.001,1,k-means++,100,160636770.54637042,23468726.0,4060.0396,0.18649740517139435,485.63348388671875,1.8009971426865101,True,3,11
104,11,full,0.001,1,k-means++,300,160636770.54637042,23468726.0,4060.0396,0.18649740517139435,485.63348388671875,1.8009971426865101,True,3,11
105,11,full,0.001,5,k-means++,100,160636770.54637042,23468726.0,4060.0396,0.18649740517139435,485.63348388671875,1.8009971426865101,True,3,11
106,11,full,0.001,5,k-means++,300,160636770.54637042,23468726.0,4060.0396,0.18649740517139435,485.63348388671875,1.8009971426865101,True,3,11
107,14,full,0.0001,1,kmeans,100,198149922.8601191,23572408.0,6291.7656,0.21188320219516754,491.18792724609375,1.7082811638393387,True,2,14
108,14,full,0.0001,1,kmeans,300,198149922.8601191,23572408.0,6291.7656,0.21188320219516754,491.18792724609375,1.7082811638393387,True,2,14
109,14,full,0.0001,5,kmeans,100,197540314.8601191,22962800.0,6400.6245,0.20964229106903076,496.4472351074219,1.950038464238459,True,2,14
110,14,full,0.0001,5,kmeans,300,197540314.8601191,22962800.0,6400.6245,0.20964229106903076,496.4472351074219,1.950038464238459,True,2,14
111,14,full,0.001,1,kmeans,100,209401674.8601191,34824160.0,4282.5244,0.21188320219516754,491.18792724609375,1.7082811638393387,True,2,14
112,14,full,0.001,1,kmeans,300,209401674.8601191,34824160.0,4282.5244,0.21188320219516754,491.18792724609375,1.7082811638393387,True,2,14
113,14,full,0.001,5,kmeans,100,208994740.8601191,34417224.0,4355.191,0.20964229106903076,496.4472351074219,1.950038464238459,True,2,14
114,14,full,0.001,5,kmeans,300,208994740.8601191,34417224.0,4355.191,0.20964229106903076,496.4472351074219,1.950038464238459,True,2,14
115,14,full,0.0001,1,k-means++,100,197987434.8601191,23409920.0,6320.7812,0.16694776713848114,449.8548889160156,1.8331922544784534,True,3,14
116,14,full,0.0001,1,k-means++,300,197987434.8601191,23409920.0,6320.7812,0.16694776713848114,449.8548889160156,1.8331922544784534,True,3,14
117,14,full,0.0001,5,k-means++,100,197987434.8601191,23409920.0,6320.7812,0.16694776713848114,449.8548889160156,1.8331922544784534,True,3,14
118,14,full,0.0001,5,k-means++,300,197987434.8601191,23409920.0,6320.7812,0.16694776713848114,449.8548889160156,1.8331922544784534,True,3,14
119,14,full,0.001,1,k-means++,100,209335602.8601191,34758090.0,4294.3228,0.16694776713848114,449.8548889160156,1.8331922544784534,True,3,14
120,14,full,0.001,1,k-means++,300,209335602.8601191,34758090.0,4294.3228,0.16694776713848114,449.8548889160156,1.8331922544784534,True,3,14
121,14,full,0.001,5,k-means++,100,209335602.8601191,34758090.0,4294.3228,0.16694776713848114,449.8548889160156,1.8331922544784534,True,3,14
122,14,full,0.001,5,k-means++,300,209335602.8601191,34758090.0,4294.3228,0.16694776713848114,449.8548889160156,1.8331922544784534,True,3,14
123,17,full,0.0001,1,kmeans,100,247173509.17386776,35186530.0,6468.057,0.1834743171930313,427.35272216796875,1.8956740098304399,True,2,17
124,17,full,0.0001,1,kmeans,300,247173509.17386776,35186530.0,6468.057,0.1834743171930313,427.35272216796875,1.8956740098304399,True,2,17
125,17,full,0.0001,5,kmeans,100,246850361.17386776,34863380.0,6525.762,0.2085043042898178,427.7763977050781,1.9588585142518828,True,2,17
126,17,full,0.0001,5,kmeans,300,246850361.17386776,34863380.0,6525.762,0.2085043042898178,427.7763977050781,1.9588585142518828,True,2,17
127,17,full,0.001,1,kmeans,100,258736973.17386776,46749990.0,4403.153,0.1834743171930313,427.35272216796875,1.8956740098304399,True,2,17
128,17,full,0.001,1,kmeans,300,258736973.17386776,46749990.0,4403.153,0.1834743171930313,427.35272216796875,1.8956740098304399,True,2,17
129,17,full,0.001,5,kmeans,100,258504801.17386776,46517820.0,4444.6123,0.2085043042898178,427.7763977050781,1.9588585142518828,True,2,17
130,17,full,0.001,5,kmeans,300,258504801.17386776,46517820.0,4444.6123,0.2085043042898178,427.7763977050781,1.9588585142518828,True,2,17
131,17,full,0.0001,1,k-means++,100,247607397.17386776,35620416.0,6390.577,0.14455115795135498,384.99053955078125,2.108500185002096,True,3,17
132,17,full,0.0001,1,k-means++,300,247607397.17386776,35620416.0,6390.577,0.14455115795135498,384.99053955078125,2.108500185002096,True,3,17
133,17,full,0.0001,5,k-means++,100,246784997.17386776,34798016.0,6537.434,0.13458234071731567,386.3608093261719,2.3614049227531075,True,3,17
134,17,full,0.0001,5,k-means++,300,246784997.17386776,34798016.0,6537.434,0.13458234071731567,386.3608093261719,2.3614049227531075,True,3,17
135,17,full,0.001,1,k-means++,100,259055585.17386776,47068604.0,4346.258,0.14455115795135498,384.99053955078125,2.108500185002096,True,3,17
136,17,full,0.001,1,k-means++,300,259055585.17386776,47068604.0,4346.258,0.14455115795135498,384.99053955078125,2.108500185002096,True,3,17
137,17,full,0.001,5,k-means++,100,258522869.17386776,46535890.0,4441.3857,0.13458234071731567,386.3608093261719,2.3614049227531075,True,3,17
138,17,full,0.001,5,k-means++,300,258522869.17386776,46535890.0,4441.3857,0.13458234071731567,386.3608093261719,2.3614049227531075,True,3,17
139,20,full,0.0001,1,kmeans,100,296473639.4876164,47077190.0,6594.966,0.1770476996898651,382.437744140625,1.8608292401058428,True,2,20
140,20,full,0.0001,1,kmeans,300,296473639.4876164,47077190.0,6594.966,0.1770476996898651,382.437744140625,1.8608292401058428,True,2,20
141,20,full,0.001,1,kmeans,100,308235301.4876164,58838856.0,4494.669,0.1770476996898651,382.437744140625,1.8608292401058428,True,2,20
142,20,full,0.001,1,kmeans,300,308235301.4876164,58838856.0,4494.669,0.1770476996898651,382.437744140625,1.8608292401058428,True,2,20
143,20,full,0.001,5,kmeans,100,307947927.4876164,58551480.0,4545.986,0.12884767353534698,377.9795227050781,2.0180962938149367,True,2,20
144,20,full,0.001,5,kmeans,300,307947927.4876164,58551480.0,4545.986,0.12884767353534698,377.9795227050781,2.0180962938149367,True,2,20
145,20,full,0.0001,1,k-means++,100,297139767.4876164,47743320.0,6476.014,0.13996723294258118,336.5575866699219,2.2953358196957456,True,3,20
146,20,full,0.0001,1,k-means++,300,297139767.4876164,47743320.0,6476.014,0.13996723294258118,336.5575866699219,2.2953358196957456,True,3,20
147,20,full,0.001,1,k-means++,100,308712155.4876164,59315708.0,4409.5166,0.13996723294258118,336.5575866699219,2.2953358196957456,True,3,20
148,20,full,0.001,1,k-means++,300,308712155.4876164,59315708.0,4409.5166,0.13996723294258118,336.5575866699219,2.2953358196957456,True,3,20
149,20,full,0.001,5,k-means++,100,308599855.4876164,59203410.0,4429.57,0.15204866230487823,341.3536376953125,2.231048217195437,True,3,20
150,20,full,0.001,5,k-means++,300,308599855.4876164,59203410.0,4429.57,0.15204866230487823,341.3536376953125,2.231048217195437,True,3,20
151,2,diag,1e-05,1,kmeans,100,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
152,2,diag,1e-05,1,kmeans,300,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
153,2,diag,1e-05,5,kmeans,100,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
154,2,diag,1e-05,5,kmeans,300,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
155,2,diag,0.0001,1,kmeans,100,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
156,2,diag,0.0001,1,kmeans,300,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
157,2,diag,0.0001,5,kmeans,100,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
158,2,diag,0.0001,5,kmeans,300,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
159,2,diag,0.001,1,kmeans,100,13089203.910885666,13040559.0,-2325.745,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
160,2,diag,0.001,1,kmeans,300,13089203.910885666,13040559.0,-2325.745,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
161,2,diag,0.001,5,kmeans,100,13089203.910885666,13040559.0,-2325.745,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
162,2,diag,0.001,5,kmeans,300,13089203.910885666,13040559.0,-2325.745,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
163,2,diag,1e-05,1,k-means++,100,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,6,2
164,2,diag,1e-05,1,k-means++,300,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,6,2
165,2,diag,1e-05,5,k-means++,100,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,6,2
166,2,diag,1e-05,5,k-means++,300,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,6,2
167,2,diag,0.0001,1,k-means++,100,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,5,2
168,2,diag,0.0001,1,k-means++,300,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,5,2
169,2,diag,0.0001,5,k-means++,100,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,5,2
170,2,diag,0.0001,5,k-means++,300,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,5,2
171,2,diag,0.001,1,k-means++,100,13089203.910885666,13040559.0,-2325.745,0.36971479654312134,1327.397216796875,1.073152783729392,True,5,2
172,2,diag,0.001,1,k-means++,300,13089203.910885666,13040559.0,-2325.745,0.36971479654312134,1327.397216796875,1.073152783729392,True,5,2
173,2,diag,0.001,5,k-means++,100,13089203.910885666,13040559.0,-2325.745,0.36971479654312134,1327.397216796875,1.073152783729392,True,5,2
174,2,diag,0.001,5,k-means++,300,13089203.910885666,13040559.0,-2325.745,0.36971479654312134,1327.397216796875,1.073152783729392,True,5,2
175,3,diag,1e-05,1,kmeans,100,12693850.335015846,12620880.0,-2249.3394,0.3760926127433777,779.2965087890625,0.7860265546274455,True,6,3
176,3,diag,1e-05,1,kmeans,300,12693850.335015846,12620880.0,-2249.3394,0.3760926127433777,779.2965087890625,0.7860265546274455,True,6,3
177,3,diag,1e-05,5,kmeans,100,11770626.335015846,11697656.0,-2084.4778,0.1531982570886612,837.4287719726562,1.6999940518251055,True,19,3
178,3,diag,1e-05,5,kmeans,300,11770626.335015846,11697656.0,-2084.4778,0.1531982570886612,837.4287719726562,1.6999940518251055,True,19,3
179,3,diag,0.0001,1,kmeans,100,12699627.335015846,12626657.0,-2250.3708,0.3760926127433777,779.2965087890625,0.7860265546274455,True,6,3
180,3,diag,0.0001,1,kmeans,300,12699627.335015846,12626657.0,-2250.3708,0.3760926127433777,779.2965087890625,0.7860265546274455,True,6,3
181,3,diag,0.0001,5,kmeans,100,11770626.335015846,11697656.0,-2084.4778,0.1531982570886612,837.4287719726562,1.6999940518251055,True,20,3
182,3,diag,0.0001,5,kmeans,300,11770626.335015846,11697656.0,-2084.4778,0.1531982570886612,837.4287719726562,1.6999940518251055,True,20,3
183,3,diag,0.001,1,kmeans,100,12718245.335015846,12645275.0,-2253.6956,0.3760926127433777,779.2965087890625,0.7860265546274455,True,7,3
184,3,diag,0.001,1,kmeans,300,12718245.335015846,12645275.0,-2253.6956,0.3760926127433777,779.2965087890625,0.7860265546274455,True,7,3
185,3,diag,0.001,5,kmeans,100,11770859.335015846,11697889.0,-2084.5195,0.15369778871536255,838.0372924804688,1.7007544548321498,True,19,3
186,3,diag,0.001,5,kmeans,300,11770859.335015846,11697889.0,-2084.5195,0.15369778871536255,838.0372924804688,1.7007544548321498,True,19,3
187,3,diag,1e-05,1,k-means++,100,11686081.335015846,11613111.0,-2069.3806,0.2351498007774353,882.5064086914062,2.071681816869212,True,19,3
188,3,diag,1e-05,1,k-means++,300,11686081.335015846,11613111.0,-2069.3806,0.2351498007774353,882.5064086914062,2.071681816869212,True,19,3
189,3,diag,1e-05,5,k-means++,100,11686081.335015846,11613111.0,-2069.3806,0.2351498007774353,882.5064086914062,2.071681816869212,True,19,3
190,3,diag,1e-05,5,k-means++,300,11686081.335015846,11613111.0,-2069.3806,0.2351498007774353,882.5064086914062,2.071681816869212,True,19,3
191,3,diag,0.0001,1,k-means++,100,11686083.335015846,11613113.0,-2069.3809,0.2351498007774353,882.5064086914062,2.071681816869212,True,21,3
192,3,diag,0.0001,1,k-means++,300,11686083.335015846,11613113.0,-2069.3809,0.2351498007774353,882.5064086914062,2.071681816869212,True,21,3
193,3,diag,0.0001,5,k-means++,100,11686083.335015846,11613113.0,-2069.3809,0.2351498007774353,882.5064086914062,2.071681816869212,True,21,3
194,3,diag,0.0001,5,k-means++,300,11686083.335015846,11613113.0,-2069.3809,0.2351498007774353,882.5064086914062,2.071681816869212,True,21,3
195,3,diag,0.001,1,k-means++,100,11686162.335015846,11613192.0,-2069.395,0.2351498007774353,882.5064086914062,2.071681816869212,True,22,3
196,3,diag,0.001,1,k-means++,300,11686162.335015846,11613192.0,-2069.395,0.2351498007774353,882.5064086914062,2.071681816869212,True,22,3
197,3,diag,0.001,5,k-means++,100,11686154.335015846,11613184.0,-2069.3936,0.2351498007774353,882.5064697265625,2.071681816869212,True,13,3
198,3,diag,0.001,5,k-means++,300,11686154.335015846,11613184.0,-2069.3936,0.2351498007774353,882.5064697265625,2.071681816869212,True,13,3
199,4,diag,1e-05,1,kmeans,100,11525150.759146027,11427855.0,-2034.8359,0.3090108335018158,828.0037841796875,1.0965690514458653,True,4,4
200,4,diag,1e-05,1,kmeans,300,11525150.759146027,11427855.0,-2034.8359,0.3090108335018158,828.0037841796875,1.0965690514458653,True,4,4
201,4,diag,1e-05,5,kmeans,100,10872145.759146027,10774850.0,-1918.2279,0.22238799929618835,665.7476806640625,2.503304023275595,True,17,4
202,4,diag,1e-05,5,kmeans,300,10872145.759146027,10774850.0,-1918.2279,0.22238799929618835,665.7476806640625,2.503304023275595,True,17,4
203,4,diag,0.0001,1,kmeans,100,11530927.759146027,11433632.0,-2035.8676,0.3090108335018158,828.0037841796875,1.0965690514458653,True,4,4
204,4,diag,0.0001,1,kmeans,300,11530927.759146027,11433632.0,-2035.8676,0.3090108335018158,828.0037841796875,1.0965690514458653,True,4,4
205,4,diag,0.0001,5,kmeans,100,10872147.759146027,10774852.0,-1918.2283,0.22238799929618835,665.7476806640625,2.503304023275595,True,16,4
206,4,diag,0.0001,5,kmeans,300,10872147.759146027,10774852.0,-1918.2283,0.22238799929618835,665.7476806640625,2.503304023275595,True,16,4
207,4,diag,0.001,1,kmeans,100,11549555.759146027,11452260.0,-2039.194,0.3090108335018158,828.0037841796875,1.0965690514458653,True,4,4
208,4,diag,0.001,1,kmeans,300,11549555.759146027,11452260.0,-2039.194,0.3090108335018158,828.0037841796875,1.0965690514458653,True,4,4
209,4,diag,0.001,5,kmeans,100,10872103.759146027,10774808.0,-1918.2203,0.2233457714319229,667.0189208984375,2.500263810780557,True,18,4
210,4,diag,0.001,5,kmeans,300,10872103.759146027,10774808.0,-1918.2203,0.2233457714319229,667.0189208984375,2.500263810780557,True,18,4
211,4,diag,1e-05,1,k-means++,100,10871971.759146027,10774676.0,-1918.1968,0.22509750723838806,669.1204223632812,2.4944239618747446,True,17,4
212,4,diag,1e-05,1,k-means++,300,10871971.759146027,10774676.0,-1918.1968,0.22509750723838806,669.1204223632812,2.4944239618747446,True,17,4
213,4,diag,1e-05,5,k-means++,100,10865268.759146027,10767973.0,-1916.9999,0.18924137949943542,715.3873291015625,1.9068310882928445,True,23,4
214,4,diag,1e-05,5,k-means++,300,10865268.759146027,10767973.0,-1916.9999,0.18924137949943542,715.3873291015625,1.9068310882928445,True,23,4
215,4,diag,0.0001,1,k-means++,100,11379467.759146027,11282172.0,-2008.821,0.23593758046627045,682.464599609375,1.5970337323460135,True,12,4
216,4,diag,0.0001,1,k-means++,300,11379467.759146027,11282172.0,-2008.821,0.23593758046627045,682.464599609375,1.5970337323460135,True,12,4
217,4,diag,0.0001,5,k-means++,100,10872147.759146027,10774852.0,-1918.2283,0.22238799929618835,665.7476806640625,2.503304023275595,True,26,4
218,4,diag,0.0001,5,k-means++,300,10872147.759146027,10774852.0,-1918.2283,0.22238799929618835,665.7476806640625,2.503304023275595,True,26,4
219,4,diag,0.001,1,k-means++,100,11398228.759146027,11300933.0,-2012.1713,0.23644769191741943,683.3140869140625,1.5953322750132166,True,13,4
220,4,diag,0.001,1,k-means++,300,11398228.759146027,11300933.0,-2012.1713,0.23644769191741943,683.3140869140625,1.5953322750132166,True,13,4
221,4,diag,0.001,5,k-means++,100,10872234.759146027,10774939.0,-1918.2438,0.22265465557575226,666.09033203125,2.5026321909350457,True,24,4
222,4,diag,0.001,5,k-means++,300,10872234.759146027,10774939.0,-1918.2438,0.22265465557575226,666.09033203125,2.5026321909350457,True,24,4
223,5,diag,1e-05,1,kmeans,100,10641753.183276208,10520132.0,-1871.2793,0.3118983507156372,774.8809814453125,1.0195520325095044,True,5,5
224,5,diag,1e-05,1,kmeans,300,10641753.183276208,10520132.0,-1871.2793,0.3118983507156372,774.8809814453125,1.0195520325095044,True,5,5
225,5,diag,1e-05,5,kmeans,100,10324953.183276208,10203332.0,-1814.7079,0.21296893060207367,639.6068115234375,1.5943357881476847,True,20,5
226,5,diag,1e-05,5,kmeans,300,10324953.183276208,10203332.0,-1814.7079,0.21296893060207367,639.6068115234375,1.5943357881476847,True,20,5
227,5,diag,0.0001,1,kmeans,100,10647529.183276208,10525908.0,-1872.3107,0.3118983507156372,774.8809814453125,1.0195520325095044,True,5,5
228,5,diag,0.0001,1,kmeans,300,10647529.183276208,10525908.0,-1872.3107,0.3118983507156372,774.8809814453125,1.0195520325095044,True,5,5
229,5,diag,0.0001,5,kmeans,100,10324954.183276208,10203333.0,-1814.708,0.21296893060207367,639.6068115234375,1.5943357881476847,True,20,5
230,5,diag,0.0001,5,kmeans,300,10324954.183276208,10203333.0,-1814.708,0.21296893060207367,639.6068115234375,1.5943357881476847,True,20,5
231,5,diag,0.001,1,kmeans,100,10666196.183276208,10544575.0,-1875.6442,0.3118983507156372,774.8809814453125,1.0195520325095044,True,5,5
232,5,diag,0.001,1,kmeans,300,10666196.183276208,10544575.0,-1875.6442,0.3118983507156372,774.8809814453125,1.0195520325095044,True,5,5
233,5,diag,0.001,5,kmeans,100,10327782.183276208,10206161.0,-1815.213,0.2155037522315979,645.0463256835938,1.5864905576853905,True,23,5
234,5,diag,0.001,5,kmeans,300,10327782.183276208,10206161.0,-1815.213,0.2155037522315979,645.0463256835938,1.5864905576853905,True,23,5
235,5,diag,1e-05,1,k-means++,100,9931250.183276208,9809629.0,-1744.4038,0.2225552350282669,602.316162109375,2.189639810006293,True,17,5
236,5,diag,1e-05,1,k-means++,300,9931250.183276208,9809629.0,-1744.4038,0.2225552350282669,602.316162109375,2.189639810006293,True,17,5
237,5,diag,1e-05,5,k-means++,100,9931250.183276208,9809629.0,-1744.4038,0.2225552350282669,602.316162109375,2.189639810006293,True,17,5
238,5,diag,1e-05,5,k-means++,300,9931250.183276208,9809629.0,-1744.4038,0.2225552350282669,602.316162109375,2.189639810006293,True,17,5
239,5,diag,0.0001,1,k-means++,100,10466296.183276208,10344675.0,-1839.9476,0.24029850959777832,631.74609375,1.4320268333089838,True,14,5
240,5,diag,0.0001,1,k-means++,300,10466296.183276208,10344675.0,-1839.9476,0.24029850959777832,631.74609375,1.4320268333089838,True,14,5
241,5,diag,0.0001,5,k-means++,100,9948819.183276208,9827198.0,-1747.541,0.19564315676689148,662.848876953125,1.6781877684435718,True,17,5
242,5,diag,0.0001,5,k-means++,300,9948819.183276208,9827198.0,-1747.541,0.19564315676689148,662.848876953125,1.6781877684435718,True,17,5
243,5,diag,0.001,1,k-means++,100,10485035.183276208,10363414.0,-1843.294,0.24058617651462555,631.865478515625,1.432154375281304,True,17,5
244,5,diag,0.001,1,k-means++,300,10485035.183276208,10363414.0,-1843.294,0.24058617651462555,631.865478515625,1.432154375281304,True,17,5
245,5,diag,0.001,5,k-means++,100,9949011.183276208,9827390.0,-1747.5753,0.19609026610851288,663.4921264648438,1.6774180513139325,True,19,5
246,5,diag,0.001,5,k-means++,300,9949011.183276208,9827390.0,-1747.5753,0.19609026610851288,663.4921264648438,1.6774180513139325,True,19,5
247,6,diag,1e-05,1,kmeans,100,9799004.60740639,9653058.0,-1714.9814,0.26129186153411865,629.4482421875,1.9915981688116708,True,15,6
248,6,diag,1e-05,1,kmeans,300,9799004.60740639,9653058.0,-1714.9814,0.26129186153411865,629.4482421875,1.9915981688116708,True,15,6
249,6,diag,1e-05,5,kmeans,100,9102218.60740639,8956272.0,-1590.5553,0.21352295577526093,566.8534545898438,2.13510416879353,True,19,6
250,6,diag,1e-05,5,kmeans,300,9102218.60740639,8956272.0,-1590.5553,0.21352295577526093,566.8534545898438,2.13510416879353,True,19,6
251,6,diag,0.0001,1,kmeans,100,9804784.60740639,9658838.0,-1716.0135,0.26129186153411865,629.4482421875,1.9915981688116708,True,15,6
252,6,diag,0.0001,1,kmeans,300,9804784.60740639,9658838.0,-1716.0135,0.26129186153411865,629.4482421875,1.9915981688116708,True,15,6
253,6,diag,0.0001,5,kmeans,100,9102220.60740639,8956274.0,-1590.5557,0.21352295577526093,566.8534545898438,2.13510416879353,True,19,6
254,6,diag,0.0001,5,kmeans,300,9102220.60740639,8956274.0,-1590.5557,0.21352295577526093,566.8534545898438,2.13510416879353,True,19,6
255,6,diag,0.001,1,kmeans,100,9823540.60740639,9677594.0,-1719.3629,0.26235219836235046,629.54638671875,1.991657340604508,True,16,6
256,6,diag,0.001,1,kmeans,300,9823540.60740639,9677594.0,-1719.3629,0.26235219836235046,629.54638671875,1.991657340604508,True,16,6
257,6,diag,0.001,5,kmeans,100,9102520.60740639,8956574.0,-1590.6093,0.21336553990840912,567.1337890625,2.137562111985855,True,28,6
258,6,diag,0.001,5,kmeans,300,9102520.60740639,8956574.0,-1590.6093,0.21336553990840912,567.1337890625,2.137562111985855,True,28,6
259,6,diag,1e-05,1,k-means++,100,9646063.60740639,9500117.0,-1687.6705,0.20277422666549683,612.2411499023438,1.4227889323530194,True,20,6
260,6,diag,1e-05,1,k-means++,300,9646063.60740639,9500117.0,-1687.6705,0.20277422666549683,612.2411499023438,1.4227889323530194,True,20,6
261,6,diag,1e-05,5,k-means++,100,9102296.60740639,8956350.0,-1590.5693,0.21240639686584473,565.2139282226562,2.136853531967839,True,21,6
262,6,diag,1e-05,5,k-means++,300,9102296.60740639,8956350.0,-1590.5693,0.21240639686584473,565.2139282226562,2.136853531967839,True,21,6
263,6,diag,0.0001,1,k-means++,100,9682908.60740639,9536962.0,-1694.25,0.17942221462726593,527.6393432617188,1.9590089629656866,True,34,6
264,6,diag,0.0001,1,k-means++,300,9682908.60740639,9536962.0,-1694.25,0.17942221462726593,527.6393432617188,1.9590089629656866,True,34,6
265,6,diag,0.0001,5,k-means++,100,9102298.60740639,8956352.0,-1590.5697,0.21240639686584473,565.2139282226562,2.136853531967839,True,24,6
266,6,diag,0.0001,5,k-means++,300,9102298.60740639,8956352.0,-1590.5697,0.21240639686584473,565.2139282226562,2.136853531967839,True,24,6
267,6,diag,0.001,1,k-means++,100,9701922.60740639,9555976.0,-1697.6454,0.18013142049312592,529.0560913085938,1.9546049777626981,True,31,6
268,6,diag,0.001,1,k-means++,300,9701922.60740639,9555976.0,-1697.6454,0.18013142049312592,529.0560913085938,1.9546049777626981,True,31,6
269,6,diag,0.001,5,k-means++,100,9102520.60740639,8956574.0,-1590.6093,0.21336553990840912,567.1337890625,2.137562111985855,True,31,6
270,6,diag,0.001,5,k-means++,300,9102520.60740639,8956574.0,-1590.6093,0.21336553990840912,567.1337890625,2.137562111985855,True,31,6
271,8,diag,1e-05,1,kmeans,100,9403674.455666753,9209077.0,-1632.7727,0.25722020864486694,545.548095703125,1.7082735900456691,True,9,8
272,8,diag,1e-05,1,kmeans,300,9403674.455666753,9209077.0,-1632.7727,0.25722020864486694,545.548095703125,1.7082735900456691,True,9,8
273,8,diag,1e-05,5,kmeans,100,8401628.455666753,8207031.0,-1453.8359,0.24377639591693878,568.4773559570312,2.056540191275295,True,13,8
274,8,diag,1e-05,5,kmeans,300,8401628.455666753,8207031.0,-1453.8359,0.24377639591693878,568.4773559570312,2.056540191275295,True,13,8
275,8,diag,0.0001,1,kmeans,100,9409411.455666753,9214814.0,-1633.7971,0.25722020864486694,545.548095703125,1.7082735900456691,True,9,8
276,8,diag,0.0001,1,kmeans,300,9409411.455666753,9214814.0,-1633.7971,0.25722020864486694,545.548095703125,1.7082735900456691,True,9,8
277,8,diag,0.0001,5,kmeans,100,8401766.955666753,8207169.5,-1453.8606,0.24412217736244202,568.5999755859375,2.0555351393061194,True,13,8
278,8,diag,0.0001,5,kmeans,300,8401766.955666753,8207169.5,-1453.8606,0.24412217736244202,568.5999755859375,2.0555351393061194,True,13,8
279,8,diag,0.001,1,kmeans,100,9428169.455666753,9233572.0,-1637.1467,0.2572267949581146,545.4515991210938,1.707896480449314,True,12,8
280,8,diag,0.001,1,kmeans,300,9428169.455666753,9233572.0,-1637.1467,0.2572267949581146,545.4515991210938,1.707896480449314,True,12,8
281,8,diag,0.001,5,kmeans,100,8402030.455666753,8207433.0,-1453.9077,0.24412217736244202,568.5999755859375,2.0555351393061194,True,19,8
282,8,diag,0.001,5,kmeans,300,8402030.455666753,8207433.0,-1453.9077,0.24412217736244202,568.5999755859375,2.0555351393061194,True,19,8
283,8,diag,1e-05,1,k-means++,100,9222857.455666753,9028260.0,-1600.4839,0.20888392627239227,437.5425109863281,2.1599292696306343,True,22,8
284,8,diag,1e-05,1,k-means++,300,9222857.455666753,9028260.0,-1600.4839,0.20888392627239227,437.5425109863281,2.1599292696306343,True,22,8
285,8,diag,1e-05,5,k-means++,100,8425934.455666753,8231337.0,-1458.1763,0.240326389670372,491.6292724609375,2.0912175194979867,True,18,8
286,8,diag,1e-05,5,k-means++,300,8425934.455666753,8231337.0,-1458.1763,0.240326389670372,491.6292724609375,2.0912175194979867,True,18,8
287,8,diag,0.0001,1,k-means++,100,9489607.455666753,9295010.0,-1648.1178,0.214387446641922,498.1647033691406,1.8502738691794258,True,15,8
288,8,diag,0.0001,1,k-means++,300,9489607.455666753,9295010.0,-1648.1178,0.214387446641922,498.1647033691406,1.8502738691794258,True,15,8
289,8,diag,0.0001,5,k-means++,100,8401862.455666753,8207265.0,-1453.8777,0.24387237429618835,568.502197265625,2.056662610079275,True,14,8
290,8,diag,0.0001,5,k-means++,300,8401862.455666753,8207265.0,-1453.8777,0.24387237429618835,568.502197265625,2.056662610079275,True,14,8
291,8,diag,0.001,1,k-means++,100,9508329.455666753,9313732.0,-1651.461,0.21473833918571472,498.2613830566406,1.850583105515141,True,13,8
292,8,diag,0.001,1,k-means++,300,9508329.455666753,9313732.0,-1651.461,0.21473833918571472,498.2613830566406,1.850583105515141,True,13,8
293,8,diag,0.001,5,k-means++,100,8402078.455666753,8207481.0,-1453.9163,0.2442118227481842,568.6036987304688,2.0567161321422094,True,17,8
294,8,diag,0.001,5,k-means++,300,8402078.455666753,8207481.0,-1453.9163,0.2442118227481842,568.6036987304688,2.0567161321422094,True,17,8
295,10,diag,1e-05,1,kmeans,100,7888954.303927114,7645706.0,-1350.6729,0.20840129256248474,479.23870849609375,1.9209118556333535,True,40,10
296,10,diag,1e-05,1,kmeans,300,7888954.303927114,7645706.0,-1350.6729,0.20840129256248474,479.23870849609375,1.9209118556333535,True,40,10
297,10,diag,1e-05,5,kmeans,100,7737961.303927114,7494713.0,-1323.7098,0.21908442676067352,571.03955078125,1.8024606257110887,True,14,10
298,10,diag,1e-05,5,kmeans,300,7737961.303927114,7494713.0,-1323.7098,0.21908442676067352,571.03955078125,1.8024606257110887,True,14,10
299,10,diag,0.0001,1,kmeans,100,7894699.303927114,7651451.0,-1351.6987,0.20837311446666718,479.2680969238281,1.921187996397482,True,39,10
300,10,diag,0.0001,1,kmeans,300,7894699.303927114,7651451.0,-1351.6987,0.20837311446666718,479.2680969238281,1.921187996397482,True,39,10
301,10,diag,0.0001,5,kmeans,100,7743709.303927114,7500461.0,-1324.7362,0.21908442676067352,571.03955078125,1.8024606257110887,True,15,10
302,10,diag,0.0001,5,kmeans,300,7743709.303927114,7500461.0,-1324.7362,0.21908442676067352,571.03955078125,1.8024606257110887,True,15,10
303,10,diag,0.001,1,kmeans,100,7913842.303927114,7670594.0,-1355.1172,0.20897234976291656,479.4935302734375,1.9199507888474652,True,32,10
304,10,diag,0.001,1,kmeans,300,7913842.303927114,7670594.0,-1355.1172,0.20897234976291656,479.4935302734375,1.9199507888474652,True,32,10
305,10,diag,0.001,5,kmeans,100,7762934.303927114,7519686.0,-1328.1693,0.2199474722146988,571.3079833984375,1.801659072375885,True,12,10
306,10,diag,0.001,5,kmeans,300,7762934.303927114,7519686.0,-1328.1693,0.2199474722146988,571.3079833984375,1.801659072375885,True,12,10
307,10,diag,1e-05,1,k-means++,100,7924116.803927114,7680868.5,-1356.9519,0.18109233677387238,451.2889099121094,2.0573889908400558,True,21,10
308,10,diag,1e-05,1,k-means++,300,7924116.803927114,7680868.5,-1356.9519,0.18109233677387238,451.2889099121094,2.0573889908400558,True,21,10
309,10,diag,1e-05,5,k-means++,100,7738147.803927114,7494899.5,-1323.7432,0.2197750359773636,571.7454223632812,1.7996143618833955,True,15,10
310,10,diag,1e-05,5,k-means++,300,7738147.803927114,7494899.5,-1323.7432,0.2197750359773636,571.7454223632812,1.7996143618833955,True,15,10
311,10,diag,0.0001,1,k-means++,100,8187378.303927114,7944130.0,-1403.9629,0.18437595665454865,504.9218444824219,1.8422731699234043,True,15,10
312,10,diag,0.0001,1,k-means++,300,8187378.303927114,7944130.0,-1403.9629,0.18437595665454865,504.9218444824219,1.8422731699234043,True,15,10
313,10,diag,0.0001,5,k-means++,100,7743709.303927114,7500461.0,-1324.7362,0.21908442676067352,571.03955078125,1.8024606257110887,True,19,10
314,10,diag,0.0001,5,k-means++,300,7743709.303927114,7500461.0,-1324.7362,0.21908442676067352,571.03955078125,1.8024606257110887,True,19,10
315,10,diag,0.001,1,k-means++,100,8206320.303927114,7963072.0,-1407.3453,0.18497711420059204,505.0896301269531,1.8418734003624215,True,12,10
316,10,diag,0.001,1,k-means++,300,8206320.303927114,7963072.0,-1407.3453,0.18497711420059204,505.0896301269531,1.8418734003624215,True,12,10
317,10,diag,0.001,5,k-means++,100,7762911.303927114,7519663.0,-1328.1652,0.21994036436080933,571.3048706054688,1.8012326594817665,True,14,10
318,10,diag,0.001,5,k-means++,300,7762911.303927114,7519663.0,-1328.1652,0.21994036436080933,571.3048706054688,1.8012326594817665,True,14,10
319,11,diag,1e-05,1,kmeans,100,7579813.728057295,7312240.0,-1289.6621,0.23397988080978394,564.2086791992188,1.7291402394614084,True,11,11
320,11,diag,1e-05,1,kmeans,300,7579813.728057295,7312240.0,-1289.6621,0.23397988080978394,564.2086791992188,1.7291402394614084,True,11,11
321,11,diag,1e-05,5,kmeans,100,7579813.728057295,7312240.0,-1289.6621,0.23397988080978394,564.2086791992188,1.7291402394614084,True,11,11
322,11,diag,1e-05,5,kmeans,300,7579813.728057295,7312240.0,-1289.6621,0.23397988080978394,564.2086791992188,1.7291402394614084,True,11,11
323,11,diag,0.0001,1,kmeans,100,7585561.228057295,7317987.5,-1290.6885,0.23397988080978394,564.2086791992188,1.7291402394614084,True,13,11
324,11,diag,0.0001,1,kmeans,300,7585561.228057295,7317987.5,-1290.6885,0.23397988080978394,564.2086791992188,1.7291402394614084,True,13,11
325,11,diag,0.0001,5,kmeans,100,7585561.228057295,7317987.5,-1290.6885,0.23397988080978394,564.2086791992188,1.7291402394614084,True,13,11
326,11,diag,0.0001,5,kmeans,300,7585561.228057295,7317987.5,-1290.6885,0.23397988080978394,564.2086791992188,1.7291402394614084,True,13,11
327,11,diag,0.001,1,kmeans,100,7604425.728057295,7336852.0,-1294.0571,0.2341206818819046,564.5838012695312,1.7292947793505737,True,23,11
328,11,diag,0.001,1,kmeans,300,7604425.728057295,7336852.0,-1294.0571,0.2341206818819046,564.5838012695312,1.7292947793505737,True,23,11
329,11,diag,0.001,5,kmeans,100,7604425.728057295,7336852.0,-1294.0571,0.2341206818819046,564.5838012695312,1.7292947793505737,True,23,11
330,11,diag,0.001,5,kmeans,300,7604425.728057295,7336852.0,-1294.0571,0.2341206818819046,564.5838012695312,1.7292947793505737,True,23,11
331,11,diag,1e-05,1,k-means++,100,7791839.228057295,7524265.5,-1327.5238,0.19072884321212769,512.7921752929688,1.8151284796024916,True,12,11
332,11,diag,1e-05,1,k-means++,300,7791839.228057295,7524265.5,-1327.5238,0.19072884321212769,512.7921752929688,1.8151284796024916,True,12,11
333,11,diag,1e-05,5,k-means++,100,7590459.228057295,7322885.5,-1291.5631,0.23250937461853027,557.0549926757812,1.6956773285989237,True,15,11
334,11,diag,1e-05,5,k-means++,300,7590459.228057295,7322885.5,-1291.5631,0.23250937461853027,557.0549926757812,1.6956773285989237,True,15,11
335,11,diag,0.0001,1,k-means++,100,7673894.728057295,7406321.0,-1306.4623,0.1943679302930832,520.76953125,1.7964734396608426,True,18,11
336,11,diag,0.0001,1,k-means++,300,7673894.728057295,7406321.0,-1306.4623,0.1943679302930832,520.76953125,1.7964734396608426,True,18,11
337,11,diag,0.0001,5,k-means++,100,7595473.228057295,7327899.5,-1292.4585,0.23414196074008942,558.0660400390625,1.7058883755528695,True,22,11
338,11,diag,0.0001,5,k-means++,300,7595473.228057295,7327899.5,-1292.4585,0.23414196074008942,558.0660400390625,1.7058883755528695,True,22,11
339,11,diag,0.001,1,k-means++,100,7694915.228057295,7427341.5,-1310.216,0.1941366195678711,518.650634765625,1.8003890716705255,True,19,11
340,11,diag,0.001,1,k-means++,300,7694915.228057295,7427341.5,-1310.216,0.1941366195678711,518.650634765625,1.8003890716705255,True,19,11
341,11,diag,0.001,5,k-means++,100,7603135.728057295,7335562.0,-1293.8268,0.23057277500629425,554.763427734375,1.7732846393460022,True,61,11
342,11,diag,0.001,5,k-means++,300,7603135.728057295,7335562.0,-1293.8268,0.23057277500629425,554.763427734375,1.7732846393460022,True,61,11
343,14,diag,1e-05,1,kmeans,100,7223238.000447838,6882688.0,-1208.5668,0.189774751663208,445.0321044921875,2.405689156966965,True,35,14
344,14,diag,1e-05,1,kmeans,300,7223238.000447838,6882688.0,-1208.5668,0.189774751663208,445.0321044921875,2.405689156966965,True,35,14
345,14,diag,1e-05,5,kmeans,100,7179637.000447838,6839087.0,-1200.7809,0.19881103932857513,443.0815124511719,2.3498009106287974,True,37,14
346,14,diag,1e-05,5,kmeans,300,7179637.000447838,6839087.0,-1200.7809,0.19881103932857513,443.0815124511719,2.3498009106287974,True,37,14
347,14,diag,0.0001,1,kmeans,100,7403717.000447838,7063167.0,-1240.7952,0.19351521134376526,467.3027038574219,1.8745209391776043,True,9,14
348,14,diag,0.0001,1,kmeans,300,7403717.000447838,7063167.0,-1240.7952,0.19351521134376526,467.3027038574219,1.8745209391776043,True,9,14
349,14,diag,0.0001,5,kmeans,100,7185399.000447838,6844849.0,-1201.8098,0.1989377737045288,443.1646423339844,2.3487992155318165,True,36,14
350,14,diag,0.0001,5,kmeans,300,7185399.000447838,6844849.0,-1201.8098,0.1989377737045288,443.1646423339844,2.3487992155318165,True,36,14
351,14,diag,0.001,1,kmeans,100,7249591.000447838,6909041.0,-1213.2727,0.1921805739402771,446.1286315917969,2.4012027743509816,True,32,14
352,14,diag,0.001,1,kmeans,300,7249591.000447838,6909041.0,-1213.2727,0.1921805739402771,446.1286315917969,2.4012027743509816,True,32,14
353,14,diag,0.001,5,kmeans,100,7206289.000447838,6865739.0,-1205.5402,0.2005954384803772,446.14300537109375,2.3618214198344596,True,35,14
354,14,diag,0.001,5,kmeans,300,7206289.000447838,6865739.0,-1205.5402,0.2005954384803772,446.14300537109375,2.3618214198344596,True,35,14
355,14,diag,1e-05,1,k-means++,100,7407422.000447838,7066872.0,-1241.4568,0.17794980108737946,461.5752868652344,1.8973642909602886,True,16,14
356,14,diag,1e-05,1,k-means++,300,7407422.000447838,7066872.0,-1241.4568,0.17794980108737946,461.5752868652344,1.8973642909602886,True,16,14
357,14,diag,1e-05,5,k-means++,100,7225380.500447838,6884830.5,-1208.9493,0.21065233647823334,449.29351806640625,2.369687246645459,True,41,14
358,14,diag,1e-05,5,k-means++,300,7225380.500447838,6884830.5,-1208.9493,0.21065233647823334,449.29351806640625,2.369687246645459,True,41,14
359,14,diag,0.0001,1,k-means++,100,7409969.000447838,7069419.0,-1241.9116,0.18569153547286987,462.6240539550781,1.8868216012452776,True,15,14
360,14,diag,0.0001,1,k-means++,300,7409969.000447838,7069419.0,-1241.9116,0.18569153547286987,462.6240539550781,1.8868216012452776,True,15,14
361,14,diag,0.0001,5,k-means++,100,7197481.000447838,6856931.0,-1203.9673,0.1839146614074707,442.7921142578125,2.4198138057845995,True,34,14
362,14,diag,0.0001,5,k-means++,300,7197481.000447838,6856931.0,-1203.9673,0.1839146614074707,442.7921142578125,2.4198138057845995,True,34,14
363,14,diag,0.001,1,k-means++,300,7428971.000447838,7088421.0,-1245.3048,0.18628861010074615,462.73419189453125,1.8868677406736207,False,300,14
364,14,diag,0.001,5,k-means++,100,7217251.500447838,6876701.5,-1207.4978,0.18614986538887024,443.1617431640625,2.4207333709907854,True,30,14
365,14,diag,0.001,5,k-means++,300,7217251.500447838,6876701.5,-1207.4978,0.18614986538887024,443.1617431640625,2.4207333709907854,True,30,14
366,17,diag,1e-05,1,kmeans,100,7157834.272838381,6744308.0,-1179.4664,0.15990924835205078,403.0197448730469,2.0343843124532546,True,24,17
367,17,diag,1e-05,1,kmeans,300,7157834.272838381,6744308.0,-1179.4664,0.15990924835205078,403.0197448730469,2.0343843124532546,True,24,17
368,17,diag,1e-05,5,kmeans,100,6988291.272838381,6574765.0,-1149.1909,0.18784816563129425,396.8871154785156,2.38212018534803,True,20,17
369,17,diag,1e-05,5,kmeans,300,6988291.272838381,6574765.0,-1149.1909,0.18784816563129425,396.8871154785156,2.38212018534803,True,20,17
370,17,diag,0.0001,1,kmeans,100,7164777.772838381,6751251.5,-1180.7063,0.16118811070919037,403.645263671875,2.0161663571062354,True,12,17
371,17,diag,0.0001,1,kmeans,300,7164777.772838381,6751251.5,-1180.7063,0.16118811070919037,403.645263671875,2.0161663571062354,True,12,17
372,17,diag,0.0001,5,kmeans,100,6993985.272838381,6580459.0,-1150.2076,0.18836656212806702,396.94464111328125,2.382098456335865,True,20,17
373,17,diag,0.0001,5,kmeans,300,6993985.272838381,6580459.0,-1150.2076,0.18836656212806702,396.94464111328125,2.382098456335865,True,20,17
374,17,diag,0.001,1,kmeans,100,7020208.272838381,6606682.0,-1154.8904,0.16110706329345703,392.0225830078125,2.466771067136851,True,32,17
375,17,diag,0.001,1,kmeans,300,7020208.272838381,6606682.0,-1154.8904,0.16110706329345703,392.0225830078125,2.466771067136851,True,32,17
376,17,diag,0.001,5,kmeans,100,7014078.772838381,6600552.5,-1153.7958,0.19070318341255188,397.37750244140625,2.3799724457371485,True,20,17
377,17,diag,0.001,5,kmeans,300,7014078.772838381,6600552.5,-1153.7958,0.19070318341255188,397.37750244140625,2.3799724457371485,True,20,17
378,17,diag,1e-05,1,k-means++,100,7121674.772838381,6708148.5,-1173.0094,0.15012019872665405,375.2821350097656,2.4197980533663803,True,31,17
379,17,diag,1e-05,1,k-means++,300,7121674.772838381,6708148.5,-1173.0094,0.15012019872665405,375.2821350097656,2.4197980533663803,True,31,17
380,17,diag,1e-05,5,k-means++,100,7005072.772838381,6591546.5,-1152.1876,0.14115209877490997,384.4289245605469,2.4847770953101596,True,29,17
381,17,diag,1e-05,5,k-means++,300,7005072.772838381,6591546.5,-1152.1876,0.14115209877490997,384.4289245605469,2.4847770953101596,True,29,17
382,17,diag,0.0001,1,k-means++,100,7293509.772838381,6879983.5,-1203.6942,0.15238241851329803,397.42816162109375,2.107055060535422,True,15,17
383,17,diag,0.0001,1,k-means++,300,7293509.772838381,6879983.5,-1203.6942,0.15238241851329803,397.42816162109375,2.107055060535422,True,15,17
384,17,diag,0.0001,5,k-means++,100,7015674.772838381,6602148.5,-1154.0808,0.1819005310535431,394.13629150390625,2.4964933433175283,True,18,17
385,17,diag,0.0001,5,k-means++,300,7015674.772838381,6602148.5,-1154.0808,0.1819005310535431,394.13629150390625,2.4964933433175283,True,18,17
386,17,diag,0.001,1,k-means++,100,7312575.772838381,6899049.5,-1207.0989,0.15248946845531464,397.60723876953125,2.1086064619099547,True,17,17
387,17,diag,0.001,1,k-means++,300,7312575.772838381,6899049.5,-1207.0989,0.15248946845531464,397.60723876953125,2.1086064619099547,True,17,17
388,17,diag,0.001,5,k-means++,100,7034312.772838381,6620786.5,-1157.409,0.18249236047267914,394.5459289550781,2.4918179246451175,True,23,17
389,17,diag,0.001,5,k-means++,300,7034312.772838381,6620786.5,-1157.409,0.18249236047267914,394.5459289550781,2.4918179246451175,True,23,17
390,20,diag,1e-05,1,kmeans,100,6849987.045228925,6363484.5,-1107.0726,0.1538863182067871,351.1917419433594,2.4313421881484762,True,30,20
391,20,diag,1e-05,1,kmeans,300,6849987.045228925,6363484.5,-1107.0726,0.1538863182067871,351.1917419433594,2.4313421881484762,True,30,20
392,20,diag,1e-05,5,kmeans,100,6849987.045228925,6363484.5,-1107.0726,0.1538863182067871,351.1917419433594,2.4313421881484762,True,30,20
393,20,diag,1e-05,5,kmeans,300,6849987.045228925,6363484.5,-1107.0726,0.1538863182067871,351.1917419433594,2.4313421881484762,True,30,20
394,20,diag,0.0001,1,kmeans,100,6855879.045228925,6369376.5,-1108.1248,0.15445564687252045,351.1902160644531,2.4330055346823083,True,25,20
395,20,diag,0.0001,1,kmeans,300,6855879.045228925,6369376.5,-1108.1248,0.15445564687252045,351.1902160644531,2.4330055346823083,True,25,20
396,20,diag,0.0001,5,kmeans,100,6855879.045228925,6369376.5,-1108.1248,0.15445564687252045,351.1902160644531,2.4330055346823083,True,25,20
397,20,diag,0.0001,5,kmeans,300,6855879.045228925,6369376.5,-1108.1248,0.15445564687252045,351.1902160644531,2.4330055346823083,True,25,20
398,20,diag,0.001,1,kmeans,100,6875191.545228925,6388689.0,-1111.5734,0.15459507703781128,351.5787658691406,2.432923325373909,True,36,20
399,20,diag,0.001,1,kmeans,300,6875191.545228925,6388689.0,-1111.5734,0.15459507703781128,351.5787658691406,2.432923325373909,True,36,20
400,20,diag,0.001,5,kmeans,100,6875191.545228925,6388689.0,-1111.5734,0.15459507703781128,351.5787658691406,2.432923325373909,True,36,20
401,20,diag,0.001,5,kmeans,300,6875191.545228925,6388689.0,-1111.5734,0.15459507703781128,351.5787658691406,2.432923325373909,True,36,20
402,20,diag,1e-05,1,k-means++,100,6978855.045228925,6492352.5,-1130.0847,0.13519038259983063,338.322509765625,2.5026143875581077,True,24,20
403,20,diag,1e-05,1,k-means++,300,6978855.045228925,6492352.5,-1130.0847,0.13519038259983063,338.322509765625,2.5026143875581077,True,24,20
404,20,diag,1e-05,5,k-means++,100,6897127.045228925,6410624.5,-1115.4905,0.13251666724681854,352.5394592285156,2.4669189695674225,True,42,20
405,20,diag,1e-05,5,k-means++,300,6897127.045228925,6410624.5,-1115.4905,0.13251666724681854,352.5394592285156,2.4669189695674225,True,42,20
406,20,diag,0.0001,1,k-means++,100,7011968.045228925,6525465.5,-1135.9978,0.14400699734687805,344.567138671875,2.517887865440349,True,30,20
407,20,diag,0.0001,1,k-means++,300,7011968.045228925,6525465.5,-1135.9978,0.14400699734687805,344.567138671875,2.517887865440349,True,30,20
408,20,diag,0.0001,5,k-means++,100,6905988.545228925,6419486.0,-1117.0729,0.13107705116271973,351.8740234375,2.4956842864961937,True,36,20
409,20,diag,0.0001,5,k-means++,300,6905988.545228925,6419486.0,-1117.0729,0.13107705116271973,351.8740234375,2.4956842864961937,True,36,20
410,20,diag,0.001,1,k-means++,100,7031180.545228925,6544678.0,-1139.4286,0.14613750576972961,345.2534484863281,2.516432567197497,True,27,20
411,20,diag,0.001,1,k-means++,300,7031180.545228925,6544678.0,-1139.4286,0.14613750576972961,345.2534484863281,2.516432567197497,True,27,20
412,20,diag,0.001,5,k-means++,100,6918391.545228925,6431889.0,-1119.2877,0.13308578729629517,351.49005126953125,2.474649164658472,True,35,20
413,20,diag,0.001,5,k-means++,300,6918391.545228925,6431889.0,-1119.2877,0.13308578729629517,351.49005126953125,2.474649164658472,True,35,20
1 trial_id n_components covariance_type reg_covar n_init init_params max_iter bic_score aic_score log_likelihood silhouette_score calinski_harabasz_score davies_bouldin_score converged n_iter unique_clusters
2 1 2 full 0.0001 1 kmeans 100 17260132.605124418 -7679507.0 2871.501 0.36924269795417786 1331.6861572265625 1.080913887973297 True 2 2
3 2 2 full 0.0001 1 kmeans 300 17260132.605124418 -7679507.0 2871.501 0.36924269795417786 1331.6861572265625 1.080913887973297 True 2 2
4 3 2 full 0.0001 5 kmeans 100 17260132.605124418 -7679507.0 2871.501 0.36924269795417786 1331.6861572265625 1.080913887973297 True 2 2
5 4 2 full 0.0001 5 kmeans 300 17260132.605124418 -7679507.0 2871.501 0.36924269795417786 1331.6861572265625 1.080913887973297 True 2 2
6 5 2 full 0.001 1 kmeans 100 20844797.605124418 -4094842.0 2231.382 0.36924269795417786 1331.6861572265625 1.080913887973297 True 2 2
7 6 2 full 0.001 1 kmeans 300 20844797.605124418 -4094842.0 2231.382 0.36924269795417786 1331.6861572265625 1.080913887973297 True 2 2
8 7 2 full 0.001 5 kmeans 100 20844797.605124418 -4094842.0 2231.382 0.36924269795417786 1331.6861572265625 1.080913887973297 True 2 2
9 8 2 full 0.001 5 kmeans 300 20844797.605124418 -4094842.0 2231.382 0.36924269795417786 1331.6861572265625 1.080913887973297 True 2 2
10 9 2 full 0.0001 1 k-means++ 100 17370120.605124418 -7569519.0 2851.86 0.3683019280433655 1320.3240966796875 1.0772816604479254 True 3 2
11 10 2 full 0.0001 1 k-means++ 300 17370120.605124418 -7569519.0 2851.86 0.3683019280433655 1320.3240966796875 1.0772816604479254 True 3 2
12 11 2 full 0.0001 5 k-means++ 100 17270534.605124418 -7669105.0 2869.6433 0.3693194091320038 1331.4493408203125 1.0799693510874797 True 3 2
13 12 2 full 0.0001 5 k-means++ 300 17270534.605124418 -7669105.0 2869.6433 0.3693194091320038 1331.4493408203125 1.0799693510874797 True 3 2
14 13 2 full 0.001 1 k-means++ 100 20919727.605124418 -4019912.0 2218.0017 0.3683019280433655 1320.3240966796875 1.0772816604479254 True 3 2
15 14 2 full 0.001 1 k-means++ 300 20919727.605124418 -4019912.0 2218.0017 0.3683019280433655 1320.3240966796875 1.0772816604479254 True 3 2
16 15 2 full 0.001 5 k-means++ 100 20851959.605124418 -4087680.0 2230.1033 0.3693194091320038 1331.4493408203125 1.0799693510874797 True 3 2
17 16 2 full 0.001 5 k-means++ 300 20851959.605124418 -4087680.0 2230.1033 0.3693194091320038 1331.4493408203125 1.0799693510874797 True 3 2
18 17 3 full 0.0001 1 kmeans 100 33833558.37637398 -3575904.0 2888.795 0.37564584612846375 781.5426635742188 0.7905502894819209 True 2 3
19 18 3 full 0.0001 1 kmeans 300 33833558.37637398 -3575904.0 2888.795 0.37564584612846375 781.5426635742188 0.7905502894819209 True 2 3
20 19 3 full 0.0001 5 kmeans 100 26462676.376373976 -10946786.0 4205.024 0.2200196236371994 918.8184814453125 1.8325651201939497 True 2 3
21 20 3 full 0.0001 5 kmeans 300 26462676.376373976 -10946786.0 4205.024 0.2200196236371994 918.8184814453125 1.8325651201939497 True 2 3
22 21 3 full 0.001 1 kmeans 100 37452100.37637398 42638.0 2242.6267 0.37564584612846375 781.5426635742188 0.7905502894819209 True 2 3
23 22 3 full 0.001 1 kmeans 300 37452100.37637398 42638.0 2242.6267 0.37564584612846375 781.5426635742188 0.7905502894819209 True 2 3
24 23 3 full 0.001 5 kmeans 100 33411843.376373976 -3997619.0 2964.1013 0.2200196236371994 918.8184814453125 1.8325651201939497 True 2 3
25 24 3 full 0.001 5 kmeans 300 33411843.376373976 -3997619.0 2964.1013 0.2200196236371994 918.8184814453125 1.8325651201939497 True 2 3
26 25 3 full 0.001 1 k-means++ 100 37089716.37637398 -319746.0 2307.3381 0.3671606779098511 710.0538940429688 1.7711288790751016 True 3 3
27 26 3 full 0.001 1 k-means++ 300 37089716.37637398 -319746.0 2307.3381 0.3671606779098511 710.0538940429688 1.7711288790751016 True 3 3
28 27 3 full 0.001 5 k-means++ 100 33366158.376373976 -4043304.0 2972.2593 0.19688381254673004 834.6746215820312 2.265938022603405 True 3 3
29 28 3 full 0.001 5 k-means++ 300 33366158.376373976 -4043304.0 2972.2593 0.19688381254673004 834.6746215820312 2.265938022603405 True 3 3
30 29 4 full 0.0001 1 kmeans 100 44240183.14762353 -5639102.0 4007.3035 0.310958594083786 829.0513916015625 1.0906616124036408 True 2 4
31 30 4 full 0.0001 1 kmeans 300 44240183.14762353 -5639102.0 4007.3035 0.310958594083786 829.0513916015625 1.0906616124036408 True 2 4
32 31 4 full 0.0001 5 kmeans 100 38759701.14762353 -11119584.0 4985.961 0.2639731168746948 873.2352905273438 1.6723937598752374 True 2 4
33 32 4 full 0.0001 5 kmeans 300 38759701.14762353 -11119584.0 4985.961 0.2639731168746948 873.2352905273438 1.6723937598752374 True 2 4
34 33 4 full 0.001 1 kmeans 100 50532988.14762353 653703.0 2883.5884 0.310958594083786 829.0513916015625 1.0906616124036408 True 2 4
35 34 4 full 0.001 1 kmeans 300 50532988.14762353 653703.0 2883.5884 0.310958594083786 829.0513916015625 1.0906616124036408 True 2 4
36 35 4 full 0.001 5 kmeans 100 47456739.14762353 -2422546.0 3432.9185 0.2639731168746948 873.2352905273438 1.6723937598752374 True 2 4
37 36 4 full 0.001 5 kmeans 300 47456739.14762353 -2422546.0 3432.9185 0.2639731168746948 873.2352905273438 1.6723937598752374 True 2 4
38 37 4 full 0.001 1 k-means++ 100 50369343.14762353 490058.0 2912.8108 0.16492997109889984 579.3992309570312 2.14603204385876 True 3 4
39 38 4 full 0.001 1 k-means++ 300 50369343.14762353 490058.0 2912.8108 0.16492997109889984 579.3992309570312 2.14603204385876 True 3 4
40 39 4 full 0.001 5 k-means++ 100 48104059.14762353 -1775226.0 3317.3257 0.19116489589214325 729.421630859375 2.391271825318095 True 3 4
41 40 4 full 0.001 5 k-means++ 300 48104059.14762353 -1775226.0 3317.3257 0.19116489589214325 729.421630859375 2.391271825318095 True 3 4
42 41 5 full 0.0001 1 kmeans 100 60034171.91887309 -2314936.0 4163.7827 0.3162730038166046 780.908935546875 1.0143329238161003 True 2 5
43 42 5 full 0.0001 1 kmeans 300 60034171.91887309 -2314936.0 4163.7827 0.3162730038166046 780.908935546875 1.0143329238161003 True 2 5
44 43 5 full 0.0001 5 kmeans 100 54230057.91887309 -8119050.0 5200.232 0.25592249631881714 721.9691162109375 1.585169064053077 True 2 5
45 44 5 full 0.0001 5 kmeans 300 54230057.91887309 -8119050.0 5200.232 0.25592249631881714 721.9691162109375 1.585169064053077 True 2 5
46 45 5 full 0.001 1 kmeans 100 66698922.91887309 4349815.0 2973.6487 0.3162730038166046 780.908935546875 1.0143329238161003 True 2 5
47 46 5 full 0.001 1 kmeans 300 66698922.91887309 4349815.0 2973.6487 0.3162730038166046 780.908935546875 1.0143329238161003 True 2 5
48 47 5 full 0.001 5 kmeans 100 63375271.91887309 1026164.0 3567.158 0.25592249631881714 721.9691162109375 1.585169064053077 True 2 5
49 48 5 full 0.001 5 kmeans 300 63375271.91887309 1026164.0 3567.158 0.25592249631881714 721.9691162109375 1.585169064053077 True 2 5
50 49 5 full 0.001 1 k-means++ 100 66517075.91887309 4167968.0 3006.1213 0.16880138218402863 549.265625 1.8881643569801063 True 3 5
51 50 5 full 0.001 1 k-means++ 300 66517075.91887309 4167968.0 3006.1213 0.16880138218402863 549.265625 1.8881643569801063 True 3 5
52 51 5 full 0.001 5 k-means++ 100 63364071.91887309 1014964.0 3569.158 0.25286975502967834 715.984619140625 1.6093410042807197 True 3 5
53 52 5 full 0.001 5 k-means++ 300 63364071.91887309 1014964.0 3569.158 0.25286975502967834 715.984619140625 1.6093410042807197 True 3 5
54 53 6 full 0.0001 1 kmeans 100 73062550.69012265 -1756380.0 4814.121 0.24853873252868652 668.8661499023438 1.646429379523011 True 2 6
55 54 6 full 0.0001 1 kmeans 300 73062550.69012265 -1756380.0 4814.121 0.24853873252868652 668.8661499023438 1.646429379523011 True 2 6
56 55 6 full 0.0001 5 kmeans 100 69890932.69012265 -4927998.0 5380.4814 0.27074411511421204 655.6273193359375 1.6294192539951549 True 2 6
57 56 6 full 0.0001 5 kmeans 300 69890932.69012265 -4927998.0 5380.4814 0.27074411511421204 655.6273193359375 1.6294192539951549 True 2 6
58 57 6 full 0.001 1 kmeans 100 81179056.69012265 6360126.0 3364.745 0.24853873252868652 668.8661499023438 1.646429379523011 True 2 6
59 58 6 full 0.001 1 kmeans 300 81179056.69012265 6360126.0 3364.745 0.24853873252868652 668.8661499023438 1.646429379523011 True 2 6
60 59 6 full 0.001 5 kmeans 100 79356620.69012265 4537690.0 3690.18 0.27074411511421204 655.6273193359375 1.6294192539951549 True 2 6
61 60 6 full 0.001 5 kmeans 300 79356620.69012265 4537690.0 3690.18 0.27074411511421204 655.6273193359375 1.6294192539951549 True 2 6
62 61 6 full 1e-05 1 k-means++ 100 68753328.69012265 -6065602.0 5583.625 0.17110876739025116 480.8229675292969 1.5572656008570327 True 3 6
63 62 6 full 1e-05 1 k-means++ 300 68753328.69012265 -6065602.0 5583.625 0.17110876739025116 480.8229675292969 1.5572656008570327 True 3 6
64 63 6 full 0.0001 1 k-means++ 100 75948732.69012265 1129802.0 4298.7314 0.17110876739025116 480.8229675292969 1.5572656008570327 True 3 6
65 64 6 full 0.0001 1 k-means++ 300 75948732.69012265 1129802.0 4298.7314 0.17110876739025116 480.8229675292969 1.5572656008570327 True 3 6
66 65 6 full 0.0001 5 k-means++ 100 69381502.69012265 -5437428.0 5471.451 0.21281521022319794 580.2518920898438 2.180165862436555 True 3 6
67 66 6 full 0.0001 5 k-means++ 300 69381502.69012265 -5437428.0 5471.451 0.21281521022319794 580.2518920898438 2.180165862436555 True 3 6
68 67 6 full 0.001 1 k-means++ 100 83185656.69012265 8366726.0 3006.4236 0.17110876739025116 480.8229675292969 1.5572656008570327 True 3 6
69 68 6 full 0.001 1 k-means++ 300 83185656.69012265 8366726.0 3006.4236 0.17110876739025116 480.8229675292969 1.5572656008570327 True 3 6
70 69 6 full 0.001 5 k-means++ 100 79079084.69012265 4260154.0 3739.74 0.21281521022319794 580.2518920898438 2.180165862436555 True 3 6
71 70 6 full 0.001 5 k-means++ 300 79079084.69012265 4260154.0 3739.74 0.21281521022319794 580.2518920898438 2.180165862436555 True 3 6
72 71 8 full 0.0001 1 kmeans 100 101322900.23262176 1564326.0 5721.299 0.2680038809776306 557.3214721679688 1.5652706740038278 True 2 8
73 72 8 full 0.0001 1 kmeans 300 101322900.23262176 1564326.0 5721.299 0.2680038809776306 557.3214721679688 1.5652706740038278 True 2 8
74 73 8 full 0.0001 5 kmeans 100 100810002.23262176 1051428.0 5812.8877 0.27481919527053833 642.9092407226562 1.6967101819134367 True 2 8
75 74 8 full 0.0001 5 kmeans 300 100810002.23262176 1051428.0 5812.8877 0.27481919527053833 642.9092407226562 1.6967101819134367 True 2 8
76 75 8 full 0.001 1 kmeans 100 111448618.23262176 11690044.0 3913.135 0.2680038809776306 557.3214721679688 1.5652706740038278 True 2 8
77 76 8 full 0.001 1 kmeans 300 111448618.23262176 11690044.0 3913.135 0.2680038809776306 557.3214721679688 1.5652706740038278 True 2 8
78 77 8 full 0.001 5 kmeans 100 111172686.23262176 11414112.0 3962.4087 0.27481919527053833 642.9092407226562 1.6967101819134367 True 2 8
79 78 8 full 0.001 5 kmeans 300 111172686.23262176 11414112.0 3962.4087 0.27481919527053833 642.9092407226562 1.6967101819134367 True 2 8
80 79 8 full 0.001 1 k-means++ 100 111979964.23262176 12221390.0 3818.2517 0.2020130306482315 465.200927734375 1.9463124697846808 True 3 8
81 80 8 full 0.001 1 k-means++ 300 111979964.23262176 12221390.0 3818.2517 0.2020130306482315 465.200927734375 1.9463124697846808 True 3 8
82 81 8 full 0.001 5 k-means++ 100 111327662.23262176 11569088.0 3934.7344 0.2736768126487732 617.4371948242188 1.7398856934277325 True 3 8
83 82 8 full 0.001 5 k-means++ 300 111327662.23262176 11569088.0 3934.7344 0.2736768126487732 617.4371948242188 1.7398856934277325 True 3 8
84 83 10 full 0.0001 1 kmeans 100 133265705.77512088 8567482.0 5970.8955 0.24388161301612854 576.40185546875 1.5385559411472558 True 2 10
85 84 10 full 0.0001 1 kmeans 300 133265705.77512088 8567482.0 5970.8955 0.24388161301612854 576.40185546875 1.5385559411472558 True 2 10
86 85 10 full 0.0001 5 kmeans 100 132892239.77512088 8194016.0 6037.586 0.28627628087997437 557.144775390625 1.6716653781194553 True 2 10
87 86 10 full 0.0001 5 kmeans 300 132892239.77512088 8194016.0 6037.586 0.28627628087997437 557.144775390625 1.6716653781194553 True 2 10
88 87 10 full 0.001 1 kmeans 100 143970687.77512088 19272464.0 4059.2915 0.24388161301612854 576.40185546875 1.5385559411472558 True 2 10
89 88 10 full 0.001 1 kmeans 300 143970687.77512088 19272464.0 4059.2915 0.24388161301612854 576.40185546875 1.5385559411472558 True 2 10
90 89 10 full 0.001 5 kmeans 100 143652495.77512088 18954272.0 4116.1113 0.28627628087997437 557.144775390625 1.6716653781194553 True 2 10
91 90 10 full 0.001 5 kmeans 300 143652495.77512088 18954272.0 4116.1113 0.28627628087997437 557.144775390625 1.6716653781194553 True 2 10
92 91 10 full 0.001 1 k-means++ 100 144482919.77512088 19784696.0 3967.8215 0.17508849501609802 474.4588928222656 1.846488092509191 True 3 10
93 92 10 full 0.001 1 k-means++ 300 144482919.77512088 19784696.0 3967.8215 0.17508849501609802 474.4588928222656 1.846488092509191 True 3 10
94 93 10 full 0.001 5 k-means++ 100 144071547.77512088 19373324.0 4041.2808 0.22849640250205994 521.3035278320312 1.9523215129883376 True 3 10
95 94 10 full 0.001 5 k-means++ 300 144071547.77512088 19373324.0 4041.2808 0.22849640250205994 521.3035278320312 1.9523215129883376 True 3 10
96 95 11 full 0.0001 1 kmeans 100 149128048.54637042 11960004.0 6115.1685 0.2577499449253082 598.6676635742188 1.5193188313170118 True 2 11
97 96 11 full 0.0001 1 kmeans 300 149128048.54637042 11960004.0 6115.1685 0.2577499449253082 598.6676635742188 1.5193188313170118 True 2 11
98 97 11 full 0.0001 5 kmeans 100 149128048.54637042 11960004.0 6115.1685 0.2577499449253082 598.6676635742188 1.5193188313170118 True 2 11
99 98 11 full 0.0001 5 kmeans 300 149128048.54637042 11960004.0 6115.1685 0.2577499449253082 598.6676635742188 1.5193188313170118 True 2 11
100 99 11 full 0.001 1 kmeans 100 160074600.54637042 22906556.0 4160.4272 0.2577499449253082 598.6676635742188 1.5193188313170118 True 2 11
101 100 11 full 0.001 1 kmeans 300 160074600.54637042 22906556.0 4160.4272 0.2577499449253082 598.6676635742188 1.5193188313170118 True 2 11
102 101 11 full 0.001 5 kmeans 100 160074600.54637042 22906556.0 4160.4272 0.2577499449253082 598.6676635742188 1.5193188313170118 True 2 11
103 102 11 full 0.001 5 kmeans 300 160074600.54637042 22906556.0 4160.4272 0.2577499449253082 598.6676635742188 1.5193188313170118 True 2 11
104 103 11 full 0.001 1 k-means++ 100 160636770.54637042 23468726.0 4060.0396 0.18649740517139435 485.63348388671875 1.8009971426865101 True 3 11
105 104 11 full 0.001 1 k-means++ 300 160636770.54637042 23468726.0 4060.0396 0.18649740517139435 485.63348388671875 1.8009971426865101 True 3 11
106 105 11 full 0.001 5 k-means++ 100 160636770.54637042 23468726.0 4060.0396 0.18649740517139435 485.63348388671875 1.8009971426865101 True 3 11
107 106 11 full 0.001 5 k-means++ 300 160636770.54637042 23468726.0 4060.0396 0.18649740517139435 485.63348388671875 1.8009971426865101 True 3 11
108 107 14 full 0.0001 1 kmeans 100 198149922.8601191 23572408.0 6291.7656 0.21188320219516754 491.18792724609375 1.7082811638393387 True 2 14
109 108 14 full 0.0001 1 kmeans 300 198149922.8601191 23572408.0 6291.7656 0.21188320219516754 491.18792724609375 1.7082811638393387 True 2 14
110 109 14 full 0.0001 5 kmeans 100 197540314.8601191 22962800.0 6400.6245 0.20964229106903076 496.4472351074219 1.950038464238459 True 2 14
111 110 14 full 0.0001 5 kmeans 300 197540314.8601191 22962800.0 6400.6245 0.20964229106903076 496.4472351074219 1.950038464238459 True 2 14
112 111 14 full 0.001 1 kmeans 100 209401674.8601191 34824160.0 4282.5244 0.21188320219516754 491.18792724609375 1.7082811638393387 True 2 14
113 112 14 full 0.001 1 kmeans 300 209401674.8601191 34824160.0 4282.5244 0.21188320219516754 491.18792724609375 1.7082811638393387 True 2 14
114 113 14 full 0.001 5 kmeans 100 208994740.8601191 34417224.0 4355.191 0.20964229106903076 496.4472351074219 1.950038464238459 True 2 14
115 114 14 full 0.001 5 kmeans 300 208994740.8601191 34417224.0 4355.191 0.20964229106903076 496.4472351074219 1.950038464238459 True 2 14
116 115 14 full 0.0001 1 k-means++ 100 197987434.8601191 23409920.0 6320.7812 0.16694776713848114 449.8548889160156 1.8331922544784534 True 3 14
117 116 14 full 0.0001 1 k-means++ 300 197987434.8601191 23409920.0 6320.7812 0.16694776713848114 449.8548889160156 1.8331922544784534 True 3 14
118 117 14 full 0.0001 5 k-means++ 100 197987434.8601191 23409920.0 6320.7812 0.16694776713848114 449.8548889160156 1.8331922544784534 True 3 14
119 118 14 full 0.0001 5 k-means++ 300 197987434.8601191 23409920.0 6320.7812 0.16694776713848114 449.8548889160156 1.8331922544784534 True 3 14
120 119 14 full 0.001 1 k-means++ 100 209335602.8601191 34758090.0 4294.3228 0.16694776713848114 449.8548889160156 1.8331922544784534 True 3 14
121 120 14 full 0.001 1 k-means++ 300 209335602.8601191 34758090.0 4294.3228 0.16694776713848114 449.8548889160156 1.8331922544784534 True 3 14
122 121 14 full 0.001 5 k-means++ 100 209335602.8601191 34758090.0 4294.3228 0.16694776713848114 449.8548889160156 1.8331922544784534 True 3 14
123 122 14 full 0.001 5 k-means++ 300 209335602.8601191 34758090.0 4294.3228 0.16694776713848114 449.8548889160156 1.8331922544784534 True 3 14
124 123 17 full 0.0001 1 kmeans 100 247173509.17386776 35186530.0 6468.057 0.1834743171930313 427.35272216796875 1.8956740098304399 True 2 17
125 124 17 full 0.0001 1 kmeans 300 247173509.17386776 35186530.0 6468.057 0.1834743171930313 427.35272216796875 1.8956740098304399 True 2 17
126 125 17 full 0.0001 5 kmeans 100 246850361.17386776 34863380.0 6525.762 0.2085043042898178 427.7763977050781 1.9588585142518828 True 2 17
127 126 17 full 0.0001 5 kmeans 300 246850361.17386776 34863380.0 6525.762 0.2085043042898178 427.7763977050781 1.9588585142518828 True 2 17
128 127 17 full 0.001 1 kmeans 100 258736973.17386776 46749990.0 4403.153 0.1834743171930313 427.35272216796875 1.8956740098304399 True 2 17
129 128 17 full 0.001 1 kmeans 300 258736973.17386776 46749990.0 4403.153 0.1834743171930313 427.35272216796875 1.8956740098304399 True 2 17
130 129 17 full 0.001 5 kmeans 100 258504801.17386776 46517820.0 4444.6123 0.2085043042898178 427.7763977050781 1.9588585142518828 True 2 17
131 130 17 full 0.001 5 kmeans 300 258504801.17386776 46517820.0 4444.6123 0.2085043042898178 427.7763977050781 1.9588585142518828 True 2 17
132 131 17 full 0.0001 1 k-means++ 100 247607397.17386776 35620416.0 6390.577 0.14455115795135498 384.99053955078125 2.108500185002096 True 3 17
133 132 17 full 0.0001 1 k-means++ 300 247607397.17386776 35620416.0 6390.577 0.14455115795135498 384.99053955078125 2.108500185002096 True 3 17
134 133 17 full 0.0001 5 k-means++ 100 246784997.17386776 34798016.0 6537.434 0.13458234071731567 386.3608093261719 2.3614049227531075 True 3 17
135 134 17 full 0.0001 5 k-means++ 300 246784997.17386776 34798016.0 6537.434 0.13458234071731567 386.3608093261719 2.3614049227531075 True 3 17
136 135 17 full 0.001 1 k-means++ 100 259055585.17386776 47068604.0 4346.258 0.14455115795135498 384.99053955078125 2.108500185002096 True 3 17
137 136 17 full 0.001 1 k-means++ 300 259055585.17386776 47068604.0 4346.258 0.14455115795135498 384.99053955078125 2.108500185002096 True 3 17
138 137 17 full 0.001 5 k-means++ 100 258522869.17386776 46535890.0 4441.3857 0.13458234071731567 386.3608093261719 2.3614049227531075 True 3 17
139 138 17 full 0.001 5 k-means++ 300 258522869.17386776 46535890.0 4441.3857 0.13458234071731567 386.3608093261719 2.3614049227531075 True 3 17
140 139 20 full 0.0001 1 kmeans 100 296473639.4876164 47077190.0 6594.966 0.1770476996898651 382.437744140625 1.8608292401058428 True 2 20
141 140 20 full 0.0001 1 kmeans 300 296473639.4876164 47077190.0 6594.966 0.1770476996898651 382.437744140625 1.8608292401058428 True 2 20
142 141 20 full 0.001 1 kmeans 100 308235301.4876164 58838856.0 4494.669 0.1770476996898651 382.437744140625 1.8608292401058428 True 2 20
143 142 20 full 0.001 1 kmeans 300 308235301.4876164 58838856.0 4494.669 0.1770476996898651 382.437744140625 1.8608292401058428 True 2 20
144 143 20 full 0.001 5 kmeans 100 307947927.4876164 58551480.0 4545.986 0.12884767353534698 377.9795227050781 2.0180962938149367 True 2 20
145 144 20 full 0.001 5 kmeans 300 307947927.4876164 58551480.0 4545.986 0.12884767353534698 377.9795227050781 2.0180962938149367 True 2 20
146 145 20 full 0.0001 1 k-means++ 100 297139767.4876164 47743320.0 6476.014 0.13996723294258118 336.5575866699219 2.2953358196957456 True 3 20
147 146 20 full 0.0001 1 k-means++ 300 297139767.4876164 47743320.0 6476.014 0.13996723294258118 336.5575866699219 2.2953358196957456 True 3 20
148 147 20 full 0.001 1 k-means++ 100 308712155.4876164 59315708.0 4409.5166 0.13996723294258118 336.5575866699219 2.2953358196957456 True 3 20
149 148 20 full 0.001 1 k-means++ 300 308712155.4876164 59315708.0 4409.5166 0.13996723294258118 336.5575866699219 2.2953358196957456 True 3 20
150 149 20 full 0.001 5 k-means++ 100 308599855.4876164 59203410.0 4429.57 0.15204866230487823 341.3536376953125 2.231048217195437 True 3 20
151 150 20 full 0.001 5 k-means++ 300 308599855.4876164 59203410.0 4429.57 0.15204866230487823 341.3536376953125 2.231048217195437 True 3 20
152 151 2 diag 1e-05 1 kmeans 100 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
153 152 2 diag 1e-05 1 kmeans 300 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
154 153 2 diag 1e-05 5 kmeans 100 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
155 154 2 diag 1e-05 5 kmeans 300 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
156 155 2 diag 0.0001 1 kmeans 100 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
157 156 2 diag 0.0001 1 kmeans 300 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
158 157 2 diag 0.0001 5 kmeans 100 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
159 158 2 diag 0.0001 5 kmeans 300 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
160 159 2 diag 0.001 1 kmeans 100 13089203.910885666 13040559.0 -2325.745 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
161 160 2 diag 0.001 1 kmeans 300 13089203.910885666 13040559.0 -2325.745 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
162 161 2 diag 0.001 5 kmeans 100 13089203.910885666 13040559.0 -2325.745 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
163 162 2 diag 0.001 5 kmeans 300 13089203.910885666 13040559.0 -2325.745 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
164 163 2 diag 1e-05 1 k-means++ 100 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 6 2
165 164 2 diag 1e-05 1 k-means++ 300 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 6 2
166 165 2 diag 1e-05 5 k-means++ 100 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 6 2
167 166 2 diag 1e-05 5 k-means++ 300 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 6 2
168 167 2 diag 0.0001 1 k-means++ 100 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 5 2
169 168 2 diag 0.0001 1 k-means++ 300 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 5 2
170 169 2 diag 0.0001 5 k-means++ 100 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 5 2
171 170 2 diag 0.0001 5 k-means++ 300 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 5 2
172 171 2 diag 0.001 1 k-means++ 100 13089203.910885666 13040559.0 -2325.745 0.36971479654312134 1327.397216796875 1.073152783729392 True 5 2
173 172 2 diag 0.001 1 k-means++ 300 13089203.910885666 13040559.0 -2325.745 0.36971479654312134 1327.397216796875 1.073152783729392 True 5 2
174 173 2 diag 0.001 5 k-means++ 100 13089203.910885666 13040559.0 -2325.745 0.36971479654312134 1327.397216796875 1.073152783729392 True 5 2
175 174 2 diag 0.001 5 k-means++ 300 13089203.910885666 13040559.0 -2325.745 0.36971479654312134 1327.397216796875 1.073152783729392 True 5 2
176 175 3 diag 1e-05 1 kmeans 100 12693850.335015846 12620880.0 -2249.3394 0.3760926127433777 779.2965087890625 0.7860265546274455 True 6 3
177 176 3 diag 1e-05 1 kmeans 300 12693850.335015846 12620880.0 -2249.3394 0.3760926127433777 779.2965087890625 0.7860265546274455 True 6 3
178 177 3 diag 1e-05 5 kmeans 100 11770626.335015846 11697656.0 -2084.4778 0.1531982570886612 837.4287719726562 1.6999940518251055 True 19 3
179 178 3 diag 1e-05 5 kmeans 300 11770626.335015846 11697656.0 -2084.4778 0.1531982570886612 837.4287719726562 1.6999940518251055 True 19 3
180 179 3 diag 0.0001 1 kmeans 100 12699627.335015846 12626657.0 -2250.3708 0.3760926127433777 779.2965087890625 0.7860265546274455 True 6 3
181 180 3 diag 0.0001 1 kmeans 300 12699627.335015846 12626657.0 -2250.3708 0.3760926127433777 779.2965087890625 0.7860265546274455 True 6 3
182 181 3 diag 0.0001 5 kmeans 100 11770626.335015846 11697656.0 -2084.4778 0.1531982570886612 837.4287719726562 1.6999940518251055 True 20 3
183 182 3 diag 0.0001 5 kmeans 300 11770626.335015846 11697656.0 -2084.4778 0.1531982570886612 837.4287719726562 1.6999940518251055 True 20 3
184 183 3 diag 0.001 1 kmeans 100 12718245.335015846 12645275.0 -2253.6956 0.3760926127433777 779.2965087890625 0.7860265546274455 True 7 3
185 184 3 diag 0.001 1 kmeans 300 12718245.335015846 12645275.0 -2253.6956 0.3760926127433777 779.2965087890625 0.7860265546274455 True 7 3
186 185 3 diag 0.001 5 kmeans 100 11770859.335015846 11697889.0 -2084.5195 0.15369778871536255 838.0372924804688 1.7007544548321498 True 19 3
187 186 3 diag 0.001 5 kmeans 300 11770859.335015846 11697889.0 -2084.5195 0.15369778871536255 838.0372924804688 1.7007544548321498 True 19 3
188 187 3 diag 1e-05 1 k-means++ 100 11686081.335015846 11613111.0 -2069.3806 0.2351498007774353 882.5064086914062 2.071681816869212 True 19 3
189 188 3 diag 1e-05 1 k-means++ 300 11686081.335015846 11613111.0 -2069.3806 0.2351498007774353 882.5064086914062 2.071681816869212 True 19 3
190 189 3 diag 1e-05 5 k-means++ 100 11686081.335015846 11613111.0 -2069.3806 0.2351498007774353 882.5064086914062 2.071681816869212 True 19 3
191 190 3 diag 1e-05 5 k-means++ 300 11686081.335015846 11613111.0 -2069.3806 0.2351498007774353 882.5064086914062 2.071681816869212 True 19 3
192 191 3 diag 0.0001 1 k-means++ 100 11686083.335015846 11613113.0 -2069.3809 0.2351498007774353 882.5064086914062 2.071681816869212 True 21 3
193 192 3 diag 0.0001 1 k-means++ 300 11686083.335015846 11613113.0 -2069.3809 0.2351498007774353 882.5064086914062 2.071681816869212 True 21 3
194 193 3 diag 0.0001 5 k-means++ 100 11686083.335015846 11613113.0 -2069.3809 0.2351498007774353 882.5064086914062 2.071681816869212 True 21 3
195 194 3 diag 0.0001 5 k-means++ 300 11686083.335015846 11613113.0 -2069.3809 0.2351498007774353 882.5064086914062 2.071681816869212 True 21 3
196 195 3 diag 0.001 1 k-means++ 100 11686162.335015846 11613192.0 -2069.395 0.2351498007774353 882.5064086914062 2.071681816869212 True 22 3
197 196 3 diag 0.001 1 k-means++ 300 11686162.335015846 11613192.0 -2069.395 0.2351498007774353 882.5064086914062 2.071681816869212 True 22 3
198 197 3 diag 0.001 5 k-means++ 100 11686154.335015846 11613184.0 -2069.3936 0.2351498007774353 882.5064697265625 2.071681816869212 True 13 3
199 198 3 diag 0.001 5 k-means++ 300 11686154.335015846 11613184.0 -2069.3936 0.2351498007774353 882.5064697265625 2.071681816869212 True 13 3
200 199 4 diag 1e-05 1 kmeans 100 11525150.759146027 11427855.0 -2034.8359 0.3090108335018158 828.0037841796875 1.0965690514458653 True 4 4
201 200 4 diag 1e-05 1 kmeans 300 11525150.759146027 11427855.0 -2034.8359 0.3090108335018158 828.0037841796875 1.0965690514458653 True 4 4
202 201 4 diag 1e-05 5 kmeans 100 10872145.759146027 10774850.0 -1918.2279 0.22238799929618835 665.7476806640625 2.503304023275595 True 17 4
203 202 4 diag 1e-05 5 kmeans 300 10872145.759146027 10774850.0 -1918.2279 0.22238799929618835 665.7476806640625 2.503304023275595 True 17 4
204 203 4 diag 0.0001 1 kmeans 100 11530927.759146027 11433632.0 -2035.8676 0.3090108335018158 828.0037841796875 1.0965690514458653 True 4 4
205 204 4 diag 0.0001 1 kmeans 300 11530927.759146027 11433632.0 -2035.8676 0.3090108335018158 828.0037841796875 1.0965690514458653 True 4 4
206 205 4 diag 0.0001 5 kmeans 100 10872147.759146027 10774852.0 -1918.2283 0.22238799929618835 665.7476806640625 2.503304023275595 True 16 4
207 206 4 diag 0.0001 5 kmeans 300 10872147.759146027 10774852.0 -1918.2283 0.22238799929618835 665.7476806640625 2.503304023275595 True 16 4
208 207 4 diag 0.001 1 kmeans 100 11549555.759146027 11452260.0 -2039.194 0.3090108335018158 828.0037841796875 1.0965690514458653 True 4 4
209 208 4 diag 0.001 1 kmeans 300 11549555.759146027 11452260.0 -2039.194 0.3090108335018158 828.0037841796875 1.0965690514458653 True 4 4
210 209 4 diag 0.001 5 kmeans 100 10872103.759146027 10774808.0 -1918.2203 0.2233457714319229 667.0189208984375 2.500263810780557 True 18 4
211 210 4 diag 0.001 5 kmeans 300 10872103.759146027 10774808.0 -1918.2203 0.2233457714319229 667.0189208984375 2.500263810780557 True 18 4
212 211 4 diag 1e-05 1 k-means++ 100 10871971.759146027 10774676.0 -1918.1968 0.22509750723838806 669.1204223632812 2.4944239618747446 True 17 4
213 212 4 diag 1e-05 1 k-means++ 300 10871971.759146027 10774676.0 -1918.1968 0.22509750723838806 669.1204223632812 2.4944239618747446 True 17 4
214 213 4 diag 1e-05 5 k-means++ 100 10865268.759146027 10767973.0 -1916.9999 0.18924137949943542 715.3873291015625 1.9068310882928445 True 23 4
215 214 4 diag 1e-05 5 k-means++ 300 10865268.759146027 10767973.0 -1916.9999 0.18924137949943542 715.3873291015625 1.9068310882928445 True 23 4
216 215 4 diag 0.0001 1 k-means++ 100 11379467.759146027 11282172.0 -2008.821 0.23593758046627045 682.464599609375 1.5970337323460135 True 12 4
217 216 4 diag 0.0001 1 k-means++ 300 11379467.759146027 11282172.0 -2008.821 0.23593758046627045 682.464599609375 1.5970337323460135 True 12 4
218 217 4 diag 0.0001 5 k-means++ 100 10872147.759146027 10774852.0 -1918.2283 0.22238799929618835 665.7476806640625 2.503304023275595 True 26 4
219 218 4 diag 0.0001 5 k-means++ 300 10872147.759146027 10774852.0 -1918.2283 0.22238799929618835 665.7476806640625 2.503304023275595 True 26 4
220 219 4 diag 0.001 1 k-means++ 100 11398228.759146027 11300933.0 -2012.1713 0.23644769191741943 683.3140869140625 1.5953322750132166 True 13 4
221 220 4 diag 0.001 1 k-means++ 300 11398228.759146027 11300933.0 -2012.1713 0.23644769191741943 683.3140869140625 1.5953322750132166 True 13 4
222 221 4 diag 0.001 5 k-means++ 100 10872234.759146027 10774939.0 -1918.2438 0.22265465557575226 666.09033203125 2.5026321909350457 True 24 4
223 222 4 diag 0.001 5 k-means++ 300 10872234.759146027 10774939.0 -1918.2438 0.22265465557575226 666.09033203125 2.5026321909350457 True 24 4
224 223 5 diag 1e-05 1 kmeans 100 10641753.183276208 10520132.0 -1871.2793 0.3118983507156372 774.8809814453125 1.0195520325095044 True 5 5
225 224 5 diag 1e-05 1 kmeans 300 10641753.183276208 10520132.0 -1871.2793 0.3118983507156372 774.8809814453125 1.0195520325095044 True 5 5
226 225 5 diag 1e-05 5 kmeans 100 10324953.183276208 10203332.0 -1814.7079 0.21296893060207367 639.6068115234375 1.5943357881476847 True 20 5
227 226 5 diag 1e-05 5 kmeans 300 10324953.183276208 10203332.0 -1814.7079 0.21296893060207367 639.6068115234375 1.5943357881476847 True 20 5
228 227 5 diag 0.0001 1 kmeans 100 10647529.183276208 10525908.0 -1872.3107 0.3118983507156372 774.8809814453125 1.0195520325095044 True 5 5
229 228 5 diag 0.0001 1 kmeans 300 10647529.183276208 10525908.0 -1872.3107 0.3118983507156372 774.8809814453125 1.0195520325095044 True 5 5
230 229 5 diag 0.0001 5 kmeans 100 10324954.183276208 10203333.0 -1814.708 0.21296893060207367 639.6068115234375 1.5943357881476847 True 20 5
231 230 5 diag 0.0001 5 kmeans 300 10324954.183276208 10203333.0 -1814.708 0.21296893060207367 639.6068115234375 1.5943357881476847 True 20 5
232 231 5 diag 0.001 1 kmeans 100 10666196.183276208 10544575.0 -1875.6442 0.3118983507156372 774.8809814453125 1.0195520325095044 True 5 5
233 232 5 diag 0.001 1 kmeans 300 10666196.183276208 10544575.0 -1875.6442 0.3118983507156372 774.8809814453125 1.0195520325095044 True 5 5
234 233 5 diag 0.001 5 kmeans 100 10327782.183276208 10206161.0 -1815.213 0.2155037522315979 645.0463256835938 1.5864905576853905 True 23 5
235 234 5 diag 0.001 5 kmeans 300 10327782.183276208 10206161.0 -1815.213 0.2155037522315979 645.0463256835938 1.5864905576853905 True 23 5
236 235 5 diag 1e-05 1 k-means++ 100 9931250.183276208 9809629.0 -1744.4038 0.2225552350282669 602.316162109375 2.189639810006293 True 17 5
237 236 5 diag 1e-05 1 k-means++ 300 9931250.183276208 9809629.0 -1744.4038 0.2225552350282669 602.316162109375 2.189639810006293 True 17 5
238 237 5 diag 1e-05 5 k-means++ 100 9931250.183276208 9809629.0 -1744.4038 0.2225552350282669 602.316162109375 2.189639810006293 True 17 5
239 238 5 diag 1e-05 5 k-means++ 300 9931250.183276208 9809629.0 -1744.4038 0.2225552350282669 602.316162109375 2.189639810006293 True 17 5
240 239 5 diag 0.0001 1 k-means++ 100 10466296.183276208 10344675.0 -1839.9476 0.24029850959777832 631.74609375 1.4320268333089838 True 14 5
241 240 5 diag 0.0001 1 k-means++ 300 10466296.183276208 10344675.0 -1839.9476 0.24029850959777832 631.74609375 1.4320268333089838 True 14 5
242 241 5 diag 0.0001 5 k-means++ 100 9948819.183276208 9827198.0 -1747.541 0.19564315676689148 662.848876953125 1.6781877684435718 True 17 5
243 242 5 diag 0.0001 5 k-means++ 300 9948819.183276208 9827198.0 -1747.541 0.19564315676689148 662.848876953125 1.6781877684435718 True 17 5
244 243 5 diag 0.001 1 k-means++ 100 10485035.183276208 10363414.0 -1843.294 0.24058617651462555 631.865478515625 1.432154375281304 True 17 5
245 244 5 diag 0.001 1 k-means++ 300 10485035.183276208 10363414.0 -1843.294 0.24058617651462555 631.865478515625 1.432154375281304 True 17 5
246 245 5 diag 0.001 5 k-means++ 100 9949011.183276208 9827390.0 -1747.5753 0.19609026610851288 663.4921264648438 1.6774180513139325 True 19 5
247 246 5 diag 0.001 5 k-means++ 300 9949011.183276208 9827390.0 -1747.5753 0.19609026610851288 663.4921264648438 1.6774180513139325 True 19 5
248 247 6 diag 1e-05 1 kmeans 100 9799004.60740639 9653058.0 -1714.9814 0.26129186153411865 629.4482421875 1.9915981688116708 True 15 6
249 248 6 diag 1e-05 1 kmeans 300 9799004.60740639 9653058.0 -1714.9814 0.26129186153411865 629.4482421875 1.9915981688116708 True 15 6
250 249 6 diag 1e-05 5 kmeans 100 9102218.60740639 8956272.0 -1590.5553 0.21352295577526093 566.8534545898438 2.13510416879353 True 19 6
251 250 6 diag 1e-05 5 kmeans 300 9102218.60740639 8956272.0 -1590.5553 0.21352295577526093 566.8534545898438 2.13510416879353 True 19 6
252 251 6 diag 0.0001 1 kmeans 100 9804784.60740639 9658838.0 -1716.0135 0.26129186153411865 629.4482421875 1.9915981688116708 True 15 6
253 252 6 diag 0.0001 1 kmeans 300 9804784.60740639 9658838.0 -1716.0135 0.26129186153411865 629.4482421875 1.9915981688116708 True 15 6
254 253 6 diag 0.0001 5 kmeans 100 9102220.60740639 8956274.0 -1590.5557 0.21352295577526093 566.8534545898438 2.13510416879353 True 19 6
255 254 6 diag 0.0001 5 kmeans 300 9102220.60740639 8956274.0 -1590.5557 0.21352295577526093 566.8534545898438 2.13510416879353 True 19 6
256 255 6 diag 0.001 1 kmeans 100 9823540.60740639 9677594.0 -1719.3629 0.26235219836235046 629.54638671875 1.991657340604508 True 16 6
257 256 6 diag 0.001 1 kmeans 300 9823540.60740639 9677594.0 -1719.3629 0.26235219836235046 629.54638671875 1.991657340604508 True 16 6
258 257 6 diag 0.001 5 kmeans 100 9102520.60740639 8956574.0 -1590.6093 0.21336553990840912 567.1337890625 2.137562111985855 True 28 6
259 258 6 diag 0.001 5 kmeans 300 9102520.60740639 8956574.0 -1590.6093 0.21336553990840912 567.1337890625 2.137562111985855 True 28 6
260 259 6 diag 1e-05 1 k-means++ 100 9646063.60740639 9500117.0 -1687.6705 0.20277422666549683 612.2411499023438 1.4227889323530194 True 20 6
261 260 6 diag 1e-05 1 k-means++ 300 9646063.60740639 9500117.0 -1687.6705 0.20277422666549683 612.2411499023438 1.4227889323530194 True 20 6
262 261 6 diag 1e-05 5 k-means++ 100 9102296.60740639 8956350.0 -1590.5693 0.21240639686584473 565.2139282226562 2.136853531967839 True 21 6
263 262 6 diag 1e-05 5 k-means++ 300 9102296.60740639 8956350.0 -1590.5693 0.21240639686584473 565.2139282226562 2.136853531967839 True 21 6
264 263 6 diag 0.0001 1 k-means++ 100 9682908.60740639 9536962.0 -1694.25 0.17942221462726593 527.6393432617188 1.9590089629656866 True 34 6
265 264 6 diag 0.0001 1 k-means++ 300 9682908.60740639 9536962.0 -1694.25 0.17942221462726593 527.6393432617188 1.9590089629656866 True 34 6
266 265 6 diag 0.0001 5 k-means++ 100 9102298.60740639 8956352.0 -1590.5697 0.21240639686584473 565.2139282226562 2.136853531967839 True 24 6
267 266 6 diag 0.0001 5 k-means++ 300 9102298.60740639 8956352.0 -1590.5697 0.21240639686584473 565.2139282226562 2.136853531967839 True 24 6
268 267 6 diag 0.001 1 k-means++ 100 9701922.60740639 9555976.0 -1697.6454 0.18013142049312592 529.0560913085938 1.9546049777626981 True 31 6
269 268 6 diag 0.001 1 k-means++ 300 9701922.60740639 9555976.0 -1697.6454 0.18013142049312592 529.0560913085938 1.9546049777626981 True 31 6
270 269 6 diag 0.001 5 k-means++ 100 9102520.60740639 8956574.0 -1590.6093 0.21336553990840912 567.1337890625 2.137562111985855 True 31 6
271 270 6 diag 0.001 5 k-means++ 300 9102520.60740639 8956574.0 -1590.6093 0.21336553990840912 567.1337890625 2.137562111985855 True 31 6
272 271 8 diag 1e-05 1 kmeans 100 9403674.455666753 9209077.0 -1632.7727 0.25722020864486694 545.548095703125 1.7082735900456691 True 9 8
273 272 8 diag 1e-05 1 kmeans 300 9403674.455666753 9209077.0 -1632.7727 0.25722020864486694 545.548095703125 1.7082735900456691 True 9 8
274 273 8 diag 1e-05 5 kmeans 100 8401628.455666753 8207031.0 -1453.8359 0.24377639591693878 568.4773559570312 2.056540191275295 True 13 8
275 274 8 diag 1e-05 5 kmeans 300 8401628.455666753 8207031.0 -1453.8359 0.24377639591693878 568.4773559570312 2.056540191275295 True 13 8
276 275 8 diag 0.0001 1 kmeans 100 9409411.455666753 9214814.0 -1633.7971 0.25722020864486694 545.548095703125 1.7082735900456691 True 9 8
277 276 8 diag 0.0001 1 kmeans 300 9409411.455666753 9214814.0 -1633.7971 0.25722020864486694 545.548095703125 1.7082735900456691 True 9 8
278 277 8 diag 0.0001 5 kmeans 100 8401766.955666753 8207169.5 -1453.8606 0.24412217736244202 568.5999755859375 2.0555351393061194 True 13 8
279 278 8 diag 0.0001 5 kmeans 300 8401766.955666753 8207169.5 -1453.8606 0.24412217736244202 568.5999755859375 2.0555351393061194 True 13 8
280 279 8 diag 0.001 1 kmeans 100 9428169.455666753 9233572.0 -1637.1467 0.2572267949581146 545.4515991210938 1.707896480449314 True 12 8
281 280 8 diag 0.001 1 kmeans 300 9428169.455666753 9233572.0 -1637.1467 0.2572267949581146 545.4515991210938 1.707896480449314 True 12 8
282 281 8 diag 0.001 5 kmeans 100 8402030.455666753 8207433.0 -1453.9077 0.24412217736244202 568.5999755859375 2.0555351393061194 True 19 8
283 282 8 diag 0.001 5 kmeans 300 8402030.455666753 8207433.0 -1453.9077 0.24412217736244202 568.5999755859375 2.0555351393061194 True 19 8
284 283 8 diag 1e-05 1 k-means++ 100 9222857.455666753 9028260.0 -1600.4839 0.20888392627239227 437.5425109863281 2.1599292696306343 True 22 8
285 284 8 diag 1e-05 1 k-means++ 300 9222857.455666753 9028260.0 -1600.4839 0.20888392627239227 437.5425109863281 2.1599292696306343 True 22 8
286 285 8 diag 1e-05 5 k-means++ 100 8425934.455666753 8231337.0 -1458.1763 0.240326389670372 491.6292724609375 2.0912175194979867 True 18 8
287 286 8 diag 1e-05 5 k-means++ 300 8425934.455666753 8231337.0 -1458.1763 0.240326389670372 491.6292724609375 2.0912175194979867 True 18 8
288 287 8 diag 0.0001 1 k-means++ 100 9489607.455666753 9295010.0 -1648.1178 0.214387446641922 498.1647033691406 1.8502738691794258 True 15 8
289 288 8 diag 0.0001 1 k-means++ 300 9489607.455666753 9295010.0 -1648.1178 0.214387446641922 498.1647033691406 1.8502738691794258 True 15 8
290 289 8 diag 0.0001 5 k-means++ 100 8401862.455666753 8207265.0 -1453.8777 0.24387237429618835 568.502197265625 2.056662610079275 True 14 8
291 290 8 diag 0.0001 5 k-means++ 300 8401862.455666753 8207265.0 -1453.8777 0.24387237429618835 568.502197265625 2.056662610079275 True 14 8
292 291 8 diag 0.001 1 k-means++ 100 9508329.455666753 9313732.0 -1651.461 0.21473833918571472 498.2613830566406 1.850583105515141 True 13 8
293 292 8 diag 0.001 1 k-means++ 300 9508329.455666753 9313732.0 -1651.461 0.21473833918571472 498.2613830566406 1.850583105515141 True 13 8
294 293 8 diag 0.001 5 k-means++ 100 8402078.455666753 8207481.0 -1453.9163 0.2442118227481842 568.6036987304688 2.0567161321422094 True 17 8
295 294 8 diag 0.001 5 k-means++ 300 8402078.455666753 8207481.0 -1453.9163 0.2442118227481842 568.6036987304688 2.0567161321422094 True 17 8
296 295 10 diag 1e-05 1 kmeans 100 7888954.303927114 7645706.0 -1350.6729 0.20840129256248474 479.23870849609375 1.9209118556333535 True 40 10
297 296 10 diag 1e-05 1 kmeans 300 7888954.303927114 7645706.0 -1350.6729 0.20840129256248474 479.23870849609375 1.9209118556333535 True 40 10
298 297 10 diag 1e-05 5 kmeans 100 7737961.303927114 7494713.0 -1323.7098 0.21908442676067352 571.03955078125 1.8024606257110887 True 14 10
299 298 10 diag 1e-05 5 kmeans 300 7737961.303927114 7494713.0 -1323.7098 0.21908442676067352 571.03955078125 1.8024606257110887 True 14 10
300 299 10 diag 0.0001 1 kmeans 100 7894699.303927114 7651451.0 -1351.6987 0.20837311446666718 479.2680969238281 1.921187996397482 True 39 10
301 300 10 diag 0.0001 1 kmeans 300 7894699.303927114 7651451.0 -1351.6987 0.20837311446666718 479.2680969238281 1.921187996397482 True 39 10
302 301 10 diag 0.0001 5 kmeans 100 7743709.303927114 7500461.0 -1324.7362 0.21908442676067352 571.03955078125 1.8024606257110887 True 15 10
303 302 10 diag 0.0001 5 kmeans 300 7743709.303927114 7500461.0 -1324.7362 0.21908442676067352 571.03955078125 1.8024606257110887 True 15 10
304 303 10 diag 0.001 1 kmeans 100 7913842.303927114 7670594.0 -1355.1172 0.20897234976291656 479.4935302734375 1.9199507888474652 True 32 10
305 304 10 diag 0.001 1 kmeans 300 7913842.303927114 7670594.0 -1355.1172 0.20897234976291656 479.4935302734375 1.9199507888474652 True 32 10
306 305 10 diag 0.001 5 kmeans 100 7762934.303927114 7519686.0 -1328.1693 0.2199474722146988 571.3079833984375 1.801659072375885 True 12 10
307 306 10 diag 0.001 5 kmeans 300 7762934.303927114 7519686.0 -1328.1693 0.2199474722146988 571.3079833984375 1.801659072375885 True 12 10
308 307 10 diag 1e-05 1 k-means++ 100 7924116.803927114 7680868.5 -1356.9519 0.18109233677387238 451.2889099121094 2.0573889908400558 True 21 10
309 308 10 diag 1e-05 1 k-means++ 300 7924116.803927114 7680868.5 -1356.9519 0.18109233677387238 451.2889099121094 2.0573889908400558 True 21 10
310 309 10 diag 1e-05 5 k-means++ 100 7738147.803927114 7494899.5 -1323.7432 0.2197750359773636 571.7454223632812 1.7996143618833955 True 15 10
311 310 10 diag 1e-05 5 k-means++ 300 7738147.803927114 7494899.5 -1323.7432 0.2197750359773636 571.7454223632812 1.7996143618833955 True 15 10
312 311 10 diag 0.0001 1 k-means++ 100 8187378.303927114 7944130.0 -1403.9629 0.18437595665454865 504.9218444824219 1.8422731699234043 True 15 10
313 312 10 diag 0.0001 1 k-means++ 300 8187378.303927114 7944130.0 -1403.9629 0.18437595665454865 504.9218444824219 1.8422731699234043 True 15 10
314 313 10 diag 0.0001 5 k-means++ 100 7743709.303927114 7500461.0 -1324.7362 0.21908442676067352 571.03955078125 1.8024606257110887 True 19 10
315 314 10 diag 0.0001 5 k-means++ 300 7743709.303927114 7500461.0 -1324.7362 0.21908442676067352 571.03955078125 1.8024606257110887 True 19 10
316 315 10 diag 0.001 1 k-means++ 100 8206320.303927114 7963072.0 -1407.3453 0.18497711420059204 505.0896301269531 1.8418734003624215 True 12 10
317 316 10 diag 0.001 1 k-means++ 300 8206320.303927114 7963072.0 -1407.3453 0.18497711420059204 505.0896301269531 1.8418734003624215 True 12 10
318 317 10 diag 0.001 5 k-means++ 100 7762911.303927114 7519663.0 -1328.1652 0.21994036436080933 571.3048706054688 1.8012326594817665 True 14 10
319 318 10 diag 0.001 5 k-means++ 300 7762911.303927114 7519663.0 -1328.1652 0.21994036436080933 571.3048706054688 1.8012326594817665 True 14 10
320 319 11 diag 1e-05 1 kmeans 100 7579813.728057295 7312240.0 -1289.6621 0.23397988080978394 564.2086791992188 1.7291402394614084 True 11 11
321 320 11 diag 1e-05 1 kmeans 300 7579813.728057295 7312240.0 -1289.6621 0.23397988080978394 564.2086791992188 1.7291402394614084 True 11 11
322 321 11 diag 1e-05 5 kmeans 100 7579813.728057295 7312240.0 -1289.6621 0.23397988080978394 564.2086791992188 1.7291402394614084 True 11 11
323 322 11 diag 1e-05 5 kmeans 300 7579813.728057295 7312240.0 -1289.6621 0.23397988080978394 564.2086791992188 1.7291402394614084 True 11 11
324 323 11 diag 0.0001 1 kmeans 100 7585561.228057295 7317987.5 -1290.6885 0.23397988080978394 564.2086791992188 1.7291402394614084 True 13 11
325 324 11 diag 0.0001 1 kmeans 300 7585561.228057295 7317987.5 -1290.6885 0.23397988080978394 564.2086791992188 1.7291402394614084 True 13 11
326 325 11 diag 0.0001 5 kmeans 100 7585561.228057295 7317987.5 -1290.6885 0.23397988080978394 564.2086791992188 1.7291402394614084 True 13 11
327 326 11 diag 0.0001 5 kmeans 300 7585561.228057295 7317987.5 -1290.6885 0.23397988080978394 564.2086791992188 1.7291402394614084 True 13 11
328 327 11 diag 0.001 1 kmeans 100 7604425.728057295 7336852.0 -1294.0571 0.2341206818819046 564.5838012695312 1.7292947793505737 True 23 11
329 328 11 diag 0.001 1 kmeans 300 7604425.728057295 7336852.0 -1294.0571 0.2341206818819046 564.5838012695312 1.7292947793505737 True 23 11
330 329 11 diag 0.001 5 kmeans 100 7604425.728057295 7336852.0 -1294.0571 0.2341206818819046 564.5838012695312 1.7292947793505737 True 23 11
331 330 11 diag 0.001 5 kmeans 300 7604425.728057295 7336852.0 -1294.0571 0.2341206818819046 564.5838012695312 1.7292947793505737 True 23 11
332 331 11 diag 1e-05 1 k-means++ 100 7791839.228057295 7524265.5 -1327.5238 0.19072884321212769 512.7921752929688 1.8151284796024916 True 12 11
333 332 11 diag 1e-05 1 k-means++ 300 7791839.228057295 7524265.5 -1327.5238 0.19072884321212769 512.7921752929688 1.8151284796024916 True 12 11
334 333 11 diag 1e-05 5 k-means++ 100 7590459.228057295 7322885.5 -1291.5631 0.23250937461853027 557.0549926757812 1.6956773285989237 True 15 11
335 334 11 diag 1e-05 5 k-means++ 300 7590459.228057295 7322885.5 -1291.5631 0.23250937461853027 557.0549926757812 1.6956773285989237 True 15 11
336 335 11 diag 0.0001 1 k-means++ 100 7673894.728057295 7406321.0 -1306.4623 0.1943679302930832 520.76953125 1.7964734396608426 True 18 11
337 336 11 diag 0.0001 1 k-means++ 300 7673894.728057295 7406321.0 -1306.4623 0.1943679302930832 520.76953125 1.7964734396608426 True 18 11
338 337 11 diag 0.0001 5 k-means++ 100 7595473.228057295 7327899.5 -1292.4585 0.23414196074008942 558.0660400390625 1.7058883755528695 True 22 11
339 338 11 diag 0.0001 5 k-means++ 300 7595473.228057295 7327899.5 -1292.4585 0.23414196074008942 558.0660400390625 1.7058883755528695 True 22 11
340 339 11 diag 0.001 1 k-means++ 100 7694915.228057295 7427341.5 -1310.216 0.1941366195678711 518.650634765625 1.8003890716705255 True 19 11
341 340 11 diag 0.001 1 k-means++ 300 7694915.228057295 7427341.5 -1310.216 0.1941366195678711 518.650634765625 1.8003890716705255 True 19 11
342 341 11 diag 0.001 5 k-means++ 100 7603135.728057295 7335562.0 -1293.8268 0.23057277500629425 554.763427734375 1.7732846393460022 True 61 11
343 342 11 diag 0.001 5 k-means++ 300 7603135.728057295 7335562.0 -1293.8268 0.23057277500629425 554.763427734375 1.7732846393460022 True 61 11
344 343 14 diag 1e-05 1 kmeans 100 7223238.000447838 6882688.0 -1208.5668 0.189774751663208 445.0321044921875 2.405689156966965 True 35 14
345 344 14 diag 1e-05 1 kmeans 300 7223238.000447838 6882688.0 -1208.5668 0.189774751663208 445.0321044921875 2.405689156966965 True 35 14
346 345 14 diag 1e-05 5 kmeans 100 7179637.000447838 6839087.0 -1200.7809 0.19881103932857513 443.0815124511719 2.3498009106287974 True 37 14
347 346 14 diag 1e-05 5 kmeans 300 7179637.000447838 6839087.0 -1200.7809 0.19881103932857513 443.0815124511719 2.3498009106287974 True 37 14
348 347 14 diag 0.0001 1 kmeans 100 7403717.000447838 7063167.0 -1240.7952 0.19351521134376526 467.3027038574219 1.8745209391776043 True 9 14
349 348 14 diag 0.0001 1 kmeans 300 7403717.000447838 7063167.0 -1240.7952 0.19351521134376526 467.3027038574219 1.8745209391776043 True 9 14
350 349 14 diag 0.0001 5 kmeans 100 7185399.000447838 6844849.0 -1201.8098 0.1989377737045288 443.1646423339844 2.3487992155318165 True 36 14
351 350 14 diag 0.0001 5 kmeans 300 7185399.000447838 6844849.0 -1201.8098 0.1989377737045288 443.1646423339844 2.3487992155318165 True 36 14
352 351 14 diag 0.001 1 kmeans 100 7249591.000447838 6909041.0 -1213.2727 0.1921805739402771 446.1286315917969 2.4012027743509816 True 32 14
353 352 14 diag 0.001 1 kmeans 300 7249591.000447838 6909041.0 -1213.2727 0.1921805739402771 446.1286315917969 2.4012027743509816 True 32 14
354 353 14 diag 0.001 5 kmeans 100 7206289.000447838 6865739.0 -1205.5402 0.2005954384803772 446.14300537109375 2.3618214198344596 True 35 14
355 354 14 diag 0.001 5 kmeans 300 7206289.000447838 6865739.0 -1205.5402 0.2005954384803772 446.14300537109375 2.3618214198344596 True 35 14
356 355 14 diag 1e-05 1 k-means++ 100 7407422.000447838 7066872.0 -1241.4568 0.17794980108737946 461.5752868652344 1.8973642909602886 True 16 14
357 356 14 diag 1e-05 1 k-means++ 300 7407422.000447838 7066872.0 -1241.4568 0.17794980108737946 461.5752868652344 1.8973642909602886 True 16 14
358 357 14 diag 1e-05 5 k-means++ 100 7225380.500447838 6884830.5 -1208.9493 0.21065233647823334 449.29351806640625 2.369687246645459 True 41 14
359 358 14 diag 1e-05 5 k-means++ 300 7225380.500447838 6884830.5 -1208.9493 0.21065233647823334 449.29351806640625 2.369687246645459 True 41 14
360 359 14 diag 0.0001 1 k-means++ 100 7409969.000447838 7069419.0 -1241.9116 0.18569153547286987 462.6240539550781 1.8868216012452776 True 15 14
361 360 14 diag 0.0001 1 k-means++ 300 7409969.000447838 7069419.0 -1241.9116 0.18569153547286987 462.6240539550781 1.8868216012452776 True 15 14
362 361 14 diag 0.0001 5 k-means++ 100 7197481.000447838 6856931.0 -1203.9673 0.1839146614074707 442.7921142578125 2.4198138057845995 True 34 14
363 362 14 diag 0.0001 5 k-means++ 300 7197481.000447838 6856931.0 -1203.9673 0.1839146614074707 442.7921142578125 2.4198138057845995 True 34 14
364 363 14 diag 0.001 1 k-means++ 300 7428971.000447838 7088421.0 -1245.3048 0.18628861010074615 462.73419189453125 1.8868677406736207 False 300 14
365 364 14 diag 0.001 5 k-means++ 100 7217251.500447838 6876701.5 -1207.4978 0.18614986538887024 443.1617431640625 2.4207333709907854 True 30 14
366 365 14 diag 0.001 5 k-means++ 300 7217251.500447838 6876701.5 -1207.4978 0.18614986538887024 443.1617431640625 2.4207333709907854 True 30 14
367 366 17 diag 1e-05 1 kmeans 100 7157834.272838381 6744308.0 -1179.4664 0.15990924835205078 403.0197448730469 2.0343843124532546 True 24 17
368 367 17 diag 1e-05 1 kmeans 300 7157834.272838381 6744308.0 -1179.4664 0.15990924835205078 403.0197448730469 2.0343843124532546 True 24 17
369 368 17 diag 1e-05 5 kmeans 100 6988291.272838381 6574765.0 -1149.1909 0.18784816563129425 396.8871154785156 2.38212018534803 True 20 17
370 369 17 diag 1e-05 5 kmeans 300 6988291.272838381 6574765.0 -1149.1909 0.18784816563129425 396.8871154785156 2.38212018534803 True 20 17
371 370 17 diag 0.0001 1 kmeans 100 7164777.772838381 6751251.5 -1180.7063 0.16118811070919037 403.645263671875 2.0161663571062354 True 12 17
372 371 17 diag 0.0001 1 kmeans 300 7164777.772838381 6751251.5 -1180.7063 0.16118811070919037 403.645263671875 2.0161663571062354 True 12 17
373 372 17 diag 0.0001 5 kmeans 100 6993985.272838381 6580459.0 -1150.2076 0.18836656212806702 396.94464111328125 2.382098456335865 True 20 17
374 373 17 diag 0.0001 5 kmeans 300 6993985.272838381 6580459.0 -1150.2076 0.18836656212806702 396.94464111328125 2.382098456335865 True 20 17
375 374 17 diag 0.001 1 kmeans 100 7020208.272838381 6606682.0 -1154.8904 0.16110706329345703 392.0225830078125 2.466771067136851 True 32 17
376 375 17 diag 0.001 1 kmeans 300 7020208.272838381 6606682.0 -1154.8904 0.16110706329345703 392.0225830078125 2.466771067136851 True 32 17
377 376 17 diag 0.001 5 kmeans 100 7014078.772838381 6600552.5 -1153.7958 0.19070318341255188 397.37750244140625 2.3799724457371485 True 20 17
378 377 17 diag 0.001 5 kmeans 300 7014078.772838381 6600552.5 -1153.7958 0.19070318341255188 397.37750244140625 2.3799724457371485 True 20 17
379 378 17 diag 1e-05 1 k-means++ 100 7121674.772838381 6708148.5 -1173.0094 0.15012019872665405 375.2821350097656 2.4197980533663803 True 31 17
380 379 17 diag 1e-05 1 k-means++ 300 7121674.772838381 6708148.5 -1173.0094 0.15012019872665405 375.2821350097656 2.4197980533663803 True 31 17
381 380 17 diag 1e-05 5 k-means++ 100 7005072.772838381 6591546.5 -1152.1876 0.14115209877490997 384.4289245605469 2.4847770953101596 True 29 17
382 381 17 diag 1e-05 5 k-means++ 300 7005072.772838381 6591546.5 -1152.1876 0.14115209877490997 384.4289245605469 2.4847770953101596 True 29 17
383 382 17 diag 0.0001 1 k-means++ 100 7293509.772838381 6879983.5 -1203.6942 0.15238241851329803 397.42816162109375 2.107055060535422 True 15 17
384 383 17 diag 0.0001 1 k-means++ 300 7293509.772838381 6879983.5 -1203.6942 0.15238241851329803 397.42816162109375 2.107055060535422 True 15 17
385 384 17 diag 0.0001 5 k-means++ 100 7015674.772838381 6602148.5 -1154.0808 0.1819005310535431 394.13629150390625 2.4964933433175283 True 18 17
386 385 17 diag 0.0001 5 k-means++ 300 7015674.772838381 6602148.5 -1154.0808 0.1819005310535431 394.13629150390625 2.4964933433175283 True 18 17
387 386 17 diag 0.001 1 k-means++ 100 7312575.772838381 6899049.5 -1207.0989 0.15248946845531464 397.60723876953125 2.1086064619099547 True 17 17
388 387 17 diag 0.001 1 k-means++ 300 7312575.772838381 6899049.5 -1207.0989 0.15248946845531464 397.60723876953125 2.1086064619099547 True 17 17
389 388 17 diag 0.001 5 k-means++ 100 7034312.772838381 6620786.5 -1157.409 0.18249236047267914 394.5459289550781 2.4918179246451175 True 23 17
390 389 17 diag 0.001 5 k-means++ 300 7034312.772838381 6620786.5 -1157.409 0.18249236047267914 394.5459289550781 2.4918179246451175 True 23 17
391 390 20 diag 1e-05 1 kmeans 100 6849987.045228925 6363484.5 -1107.0726 0.1538863182067871 351.1917419433594 2.4313421881484762 True 30 20
392 391 20 diag 1e-05 1 kmeans 300 6849987.045228925 6363484.5 -1107.0726 0.1538863182067871 351.1917419433594 2.4313421881484762 True 30 20
393 392 20 diag 1e-05 5 kmeans 100 6849987.045228925 6363484.5 -1107.0726 0.1538863182067871 351.1917419433594 2.4313421881484762 True 30 20
394 393 20 diag 1e-05 5 kmeans 300 6849987.045228925 6363484.5 -1107.0726 0.1538863182067871 351.1917419433594 2.4313421881484762 True 30 20
395 394 20 diag 0.0001 1 kmeans 100 6855879.045228925 6369376.5 -1108.1248 0.15445564687252045 351.1902160644531 2.4330055346823083 True 25 20
396 395 20 diag 0.0001 1 kmeans 300 6855879.045228925 6369376.5 -1108.1248 0.15445564687252045 351.1902160644531 2.4330055346823083 True 25 20
397 396 20 diag 0.0001 5 kmeans 100 6855879.045228925 6369376.5 -1108.1248 0.15445564687252045 351.1902160644531 2.4330055346823083 True 25 20
398 397 20 diag 0.0001 5 kmeans 300 6855879.045228925 6369376.5 -1108.1248 0.15445564687252045 351.1902160644531 2.4330055346823083 True 25 20
399 398 20 diag 0.001 1 kmeans 100 6875191.545228925 6388689.0 -1111.5734 0.15459507703781128 351.5787658691406 2.432923325373909 True 36 20
400 399 20 diag 0.001 1 kmeans 300 6875191.545228925 6388689.0 -1111.5734 0.15459507703781128 351.5787658691406 2.432923325373909 True 36 20
401 400 20 diag 0.001 5 kmeans 100 6875191.545228925 6388689.0 -1111.5734 0.15459507703781128 351.5787658691406 2.432923325373909 True 36 20
402 401 20 diag 0.001 5 kmeans 300 6875191.545228925 6388689.0 -1111.5734 0.15459507703781128 351.5787658691406 2.432923325373909 True 36 20
403 402 20 diag 1e-05 1 k-means++ 100 6978855.045228925 6492352.5 -1130.0847 0.13519038259983063 338.322509765625 2.5026143875581077 True 24 20
404 403 20 diag 1e-05 1 k-means++ 300 6978855.045228925 6492352.5 -1130.0847 0.13519038259983063 338.322509765625 2.5026143875581077 True 24 20
405 404 20 diag 1e-05 5 k-means++ 100 6897127.045228925 6410624.5 -1115.4905 0.13251666724681854 352.5394592285156 2.4669189695674225 True 42 20
406 405 20 diag 1e-05 5 k-means++ 300 6897127.045228925 6410624.5 -1115.4905 0.13251666724681854 352.5394592285156 2.4669189695674225 True 42 20
407 406 20 diag 0.0001 1 k-means++ 100 7011968.045228925 6525465.5 -1135.9978 0.14400699734687805 344.567138671875 2.517887865440349 True 30 20
408 407 20 diag 0.0001 1 k-means++ 300 7011968.045228925 6525465.5 -1135.9978 0.14400699734687805 344.567138671875 2.517887865440349 True 30 20
409 408 20 diag 0.0001 5 k-means++ 100 6905988.545228925 6419486.0 -1117.0729 0.13107705116271973 351.8740234375 2.4956842864961937 True 36 20
410 409 20 diag 0.0001 5 k-means++ 300 6905988.545228925 6419486.0 -1117.0729 0.13107705116271973 351.8740234375 2.4956842864961937 True 36 20
411 410 20 diag 0.001 1 k-means++ 100 7031180.545228925 6544678.0 -1139.4286 0.14613750576972961 345.2534484863281 2.516432567197497 True 27 20
412 411 20 diag 0.001 1 k-means++ 300 7031180.545228925 6544678.0 -1139.4286 0.14613750576972961 345.2534484863281 2.516432567197497 True 27 20
413 412 20 diag 0.001 5 k-means++ 100 6918391.545228925 6431889.0 -1119.2877 0.13308578729629517 351.49005126953125 2.474649164658472 True 35 20
414 413 20 diag 0.001 5 k-means++ 300 6918391.545228925 6431889.0 -1119.2877 0.13308578729629517 351.49005126953125 2.474649164658472 True 35 20

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,158 @@
nohup: ignoring input
Loading embeddings from /home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json...
Loaded 2800 samples with embedding dimension 2048
======================================================================
RUNNING GAUSSIAN MIXTURE MODEL CLUSTERING WITH OPTIMIZED GRID SEARCH
======================================================================
Optimized parameter combinations:
- n_components: 11 values [2, 3, 4, 5, 6, 8, 10, 11, 14, 17, 20]
- covariance_types: 2 options ['full', 'diag']
- reg_covar: 3 values [1e-05, 0.0001, 0.001]
- n_init: 2 values [1, 5]
- init_params: 2 options ['kmeans', 'k-means++']
- max_iter: 2 values [100, 300]
Total combinations: 528 (optimized for speed)
Estimated runtime: 4.4 minutes
This should be much faster...
Progress: 50/528 (9.5%) - Best scores so far: BIC=17260132.61, Silhouette=0.376
Progress: 100/528 (18.9%) - Best scores so far: BIC=17260132.61, Silhouette=0.376
Progress: 150/528 (28.4%) - Best scores so far: BIC=17260132.61, Silhouette=0.376
Progress: 200/528 (37.9%) - Best scores so far: BIC=17260132.61, Silhouette=0.376
Progress: 250/528 (47.3%) - Best scores so far: BIC=17260132.61, Silhouette=0.376
n_components=2, cov=diag, init=kmeans: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089203.91, AIC=13040559.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089203.91, AIC=13040559.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089203.91, AIC=13040559.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089203.91, AIC=13040559.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089203.91, AIC=13040559.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089203.91, AIC=13040559.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089203.91, AIC=13040559.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089203.91, AIC=13040559.00, silhouette=0.3697
n_components=3, cov=diag, init=kmeans: BIC=12693850.34, AIC=12620880.00, silhouette=0.3761
n_components=3, cov=diag, init=kmeans: BIC=12693850.34, AIC=12620880.00, silhouette=0.3761
n_components=3, cov=diag, init=kmeans: BIC=12699627.34, AIC=12626657.00, silhouette=0.3761
n_components=3, cov=diag, init=kmeans: BIC=12699627.34, AIC=12626657.00, silhouette=0.3761
n_components=3, cov=diag, init=kmeans: BIC=12718245.34, AIC=12645275.00, silhouette=0.3761
n_components=3, cov=diag, init=kmeans: BIC=12718245.34, AIC=12645275.00, silhouette=0.3761
Progress: 300/528 (56.8%) - Best scores so far: BIC=11770626.34, Silhouette=0.376
n_components=4, cov=diag, init=kmeans: BIC=11525150.76, AIC=11427855.00, silhouette=0.3090
n_components=4, cov=diag, init=kmeans: BIC=11525150.76, AIC=11427855.00, silhouette=0.3090
n_components=4, cov=diag, init=kmeans: BIC=11530927.76, AIC=11433632.00, silhouette=0.3090
n_components=4, cov=diag, init=kmeans: BIC=11530927.76, AIC=11433632.00, silhouette=0.3090
n_components=4, cov=diag, init=kmeans: BIC=11549555.76, AIC=11452260.00, silhouette=0.3090
n_components=4, cov=diag, init=kmeans: BIC=11549555.76, AIC=11452260.00, silhouette=0.3090
n_components=5, cov=diag, init=kmeans: BIC=10641753.18, AIC=10520132.00, silhouette=0.3119
n_components=5, cov=diag, init=kmeans: BIC=10641753.18, AIC=10520132.00, silhouette=0.3119
n_components=5, cov=diag, init=kmeans: BIC=10647529.18, AIC=10525908.00, silhouette=0.3119
n_components=5, cov=diag, init=kmeans: BIC=10647529.18, AIC=10525908.00, silhouette=0.3119
n_components=5, cov=diag, init=kmeans: BIC=10666196.18, AIC=10544575.00, silhouette=0.3119
n_components=5, cov=diag, init=kmeans: BIC=10666196.18, AIC=10544575.00, silhouette=0.3119
Progress: 350/528 (66.3%) - Best scores so far: BIC=9931250.18, Silhouette=0.376
Progress: 400/528 (75.8%) - Best scores so far: BIC=8401628.46, Silhouette=0.376
Progress: 450/528 (85.2%) - Best scores so far: BIC=7579813.73, Silhouette=0.376
Progress: 500/528 (94.7%) - Best scores so far: BIC=6988291.27, Silhouette=0.376
Progress: 528/528 (100.0%) - Best scores so far: BIC=6849987.05, Silhouette=0.376
======================================================================
GAUSSIAN MIXTURE MODEL GRID SEARCH ANALYSIS
======================================================================
Total parameter combinations tested: 413
Combinations with valid clustering: 413
Model Selection Metrics:
Best BIC score: 6849987.05
Best AIC score: -11119584.00
Best Log-Likelihood: 6594.97
Clustering Quality Metrics:
Best silhouette score: 0.3761
Mean silhouette score: 0.2317
Best Calinski-Harabasz score: 1331.69
Best Davies-Bouldin score: 0.7860
Top 5 results by BIC (lower is better):
n_comp=20, cov=diag: BIC=6849987.05, AIC=6363484.50
n_comp=20, cov=diag: BIC=6849987.05, AIC=6363484.50
n_comp=20, cov=diag: BIC=6849987.05, AIC=6363484.50
n_comp=20, cov=diag: BIC=6849987.05, AIC=6363484.50
n_comp=20, cov=diag: BIC=6855879.05, AIC=6369376.50
Top 5 results by AIC (lower is better):
n_comp=4, cov=full: BIC=38759701.15, AIC=-11119584.00
n_comp=4, cov=full: BIC=38759701.15, AIC=-11119584.00
n_comp=3, cov=full: BIC=26462676.38, AIC=-10946786.00
n_comp=3, cov=full: BIC=26462676.38, AIC=-10946786.00
n_comp=5, cov=full: BIC=54230057.92, AIC=-8119050.00
Top 5 results by Silhouette Score:
n_comp=3, cov=diag: silhouette=0.3761
n_comp=3, cov=diag: silhouette=0.3761
n_comp=3, cov=diag: silhouette=0.3761
n_comp=3, cov=diag: silhouette=0.3761
n_comp=3, cov=diag: silhouette=0.3761
Component count analysis (top 10 by BIC):
20.0 components: BIC=6849987.05, AIC=6363484.50, silhouette=0.1770
17.0 components: BIC=6988291.27, AIC=6574765.00, silhouette=0.2085
14.0 components: BIC=7179637.00, AIC=6839087.00, silhouette=0.2119
11.0 components: BIC=7579813.73, AIC=7312240.00, silhouette=0.2577
10.0 components: BIC=7737961.30, AIC=7494713.00, silhouette=0.2863
8.0 components: BIC=8401628.46, AIC=1051428.00, silhouette=0.2748
6.0 components: BIC=9102218.61, AIC=-6065602.00, silhouette=0.2707
5.0 components: BIC=9931250.18, AIC=-8119050.00, silhouette=0.3163
4.0 components: BIC=10865268.76, AIC=-11119584.00, silhouette=0.3110
3.0 components: BIC=11686081.34, AIC=-10946786.00, silhouette=0.3761
📁 SAVING DETAILED RESULTS...
==============================
Detailed grid search results saved to: gmm_grid_search_detailed_20250801_015245.json
Grid search summary CSV saved to: gmm_grid_search_summary_20250801_015245.csv
Best GMM result by BIC:
Parameters: {'n_components': 20, 'covariance_type': 'diag', 'reg_covar': 1e-05, 'n_init': 1, 'init_params': 'kmeans', 'max_iter': 100}
BIC score: 6849987.05
Best GMM result by AIC:
Parameters: {'n_components': 4, 'covariance_type': 'full', 'reg_covar': 0.0001, 'n_init': 5, 'init_params': 'kmeans', 'max_iter': 100}
AIC score: -11119584.00
Best GMM result by Silhouette:
Parameters: {'n_components': 3, 'covariance_type': 'diag', 'reg_covar': 1e-05, 'n_init': 1, 'init_params': 'kmeans', 'max_iter': 100}
Silhouette score: 0.3761
Visualization saved as 'gmm_clustering_results.png'
Final clustering results (bic) saved to: gmm_final_results_bic_20250801_015247.json
Traceback (most recent call last):
File "/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_extensive.py", line 646, in <module>
main()
File "/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_extensive.py", line 640, in main
clustering.save_clustering_results(results)
File "/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_extensive.py", line 614, in save_clustering_results
json.dump({
File "/home/nguyendc/miniconda3/envs/cluster/lib/python3.10/json/__init__.py", line 179, in dump
for chunk in iterable:
File "/home/nguyendc/miniconda3/envs/cluster/lib/python3.10/json/encoder.py", line 431, in _iterencode
yield from _iterencode_dict(o, _current_indent_level)
File "/home/nguyendc/miniconda3/envs/cluster/lib/python3.10/json/encoder.py", line 405, in _iterencode_dict
yield from chunks
File "/home/nguyendc/miniconda3/envs/cluster/lib/python3.10/json/encoder.py", line 438, in _iterencode
o = _default(o)
File "/home/nguyendc/miniconda3/envs/cluster/lib/python3.10/json/encoder.py", line 179, in default
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type float32 is not JSON serializable

View File

@@ -0,0 +1,140 @@
nohup: ignoring input
Loading embeddings from /home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json...
Loaded 2800 samples with embedding dimension 2048
======================================================================
RUNNING GAUSSIAN MIXTURE MODEL CLUSTERING WITH OPTIMIZED GRID SEARCH
======================================================================
Optimized parameter combinations:
- n_components: 21 values [2, 3, 4, 5, 6, 8, 10, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50]
- covariance_types: 2 options ['tied', 'spherical']
- reg_covar: 3 values [1e-05, 0.0001, 0.001]
- n_init: 2 values [1, 5]
- init_params: 2 options ['kmeans', 'k-means++']
- max_iter: 2 values [100, 300]
Total combinations: 1008 (optimized for speed)
Estimated runtime: 8.4 minutes
This should be much faster...
n_components=2, cov=tied, init=kmeans: BIC=6521812.14, AIC=-5960170.38, silhouette=0.3692
n_components=3, cov=tied, init=kmeans: BIC=6511443.85, AIC=-5982704.34, silhouette=0.3756
n_components=3, cov=tied, init=kmeans: BIC=6511443.85, AIC=-5982704.34, silhouette=0.3756
n_components=3, cov=tied, init=kmeans: BIC=6511443.85, AIC=-5982704.34, silhouette=0.3756
n_components=3, cov=tied, init=kmeans: BIC=6511443.85, AIC=-5982704.34, silhouette=0.3756
n_components=4, cov=tied, init=kmeans: BIC=6514783.32, AIC=-5991530.55, silhouette=0.3110
Progress: 50/1008 (5.0%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
n_components=4, cov=tied, init=kmeans: BIC=6514783.32, AIC=-5991530.55, silhouette=0.3110
n_components=4, cov=tied, init=kmeans: BIC=6514783.32, AIC=-5991530.55, silhouette=0.3110
n_components=4, cov=tied, init=kmeans: BIC=6514783.32, AIC=-5991530.55, silhouette=0.3110
n_components=5, cov=tied, init=kmeans: BIC=6520503.08, AIC=-5997976.48, silhouette=0.3163
n_components=5, cov=tied, init=kmeans: BIC=6520503.08, AIC=-5997976.48, silhouette=0.3163
n_components=5, cov=tied, init=kmeans: BIC=6520503.08, AIC=-5997976.48, silhouette=0.3163
n_components=5, cov=tied, init=kmeans: BIC=6520503.08, AIC=-5997976.48, silhouette=0.3163
Progress: 100/1008 (9.9%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 150/1008 (14.9%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 200/1008 (19.8%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 250/1008 (24.8%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 300/1008 (29.8%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 350/1008 (34.7%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 400/1008 (39.7%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 450/1008 (44.6%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 500/1008 (49.6%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 550/1008 (54.6%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 600/1008 (59.5%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 650/1008 (64.5%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 700/1008 (69.4%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 750/1008 (74.4%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 800/1008 (79.4%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 850/1008 (84.3%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 900/1008 (89.3%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 950/1008 (94.2%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 1000/1008 (99.2%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 1008/1008 (100.0%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
======================================================================
GAUSSIAN MIXTURE MODEL GRID SEARCH ANALYSIS
======================================================================
Total parameter combinations tested: 1008
Combinations with valid clustering: 1008
Model Selection Metrics:
Best BIC score: 6511443.85
Best AIC score: -6295231.48
Best Log-Likelihood: 1910.09
Clustering Quality Metrics:
Best silhouette score: 0.3757
Mean silhouette score: 0.0287
Best Calinski-Harabasz score: 1331.69
Best Davies-Bouldin score: 0.6762
Top 5 results by BIC (lower is better):
n_comp=3, cov=tied: BIC=6511443.85, AIC=-5982704.34
n_comp=3, cov=tied: BIC=6511443.85, AIC=-5982704.34
n_comp=3, cov=tied: BIC=6511443.85, AIC=-5982704.34
n_comp=3, cov=tied: BIC=6511443.85, AIC=-5982704.34
n_comp=4, cov=tied: BIC=6514783.32, AIC=-5991530.55
Top 5 results by AIC (lower is better):
n_comp=50, cov=tied: BIC=6770703.71, AIC=-6295231.48
n_comp=50, cov=tied: BIC=6770703.71, AIC=-6295231.48
n_comp=50, cov=tied: BIC=6779928.76, AIC=-6286006.43
n_comp=50, cov=tied: BIC=6779928.76, AIC=-6286006.43
n_comp=47, cov=tied: BIC=6755535.12, AIC=-6273903.03
Top 5 results by Silhouette Score:
n_comp=3, cov=spherical: silhouette=0.3757
n_comp=3, cov=spherical: silhouette=0.3757
n_comp=3, cov=spherical: silhouette=0.3757
n_comp=3, cov=spherical: silhouette=0.3757
n_comp=3, cov=spherical: silhouette=0.3757
Component count analysis (top 10 by BIC):
3.0 components: BIC=6511443.85, AIC=-5982704.34, silhouette=0.3757
4.0 components: BIC=6514783.32, AIC=-5991530.55, silhouette=0.3110
5.0 components: BIC=6520503.08, AIC=-5997976.48, silhouette=0.3163
2.0 components: BIC=6521812.14, AIC=-5960170.38, silhouette=0.3693
6.0 components: BIC=6526215.27, AIC=-6004429.97, silhouette=0.2485
8.0 components: BIC=6529704.08, AIC=-6025272.52, silhouette=0.2680
10.0 components: BIC=6538644.29, AIC=-6040663.67, silhouette=0.2706
11.0 components: BIC=6546208.81, AIC=-6045264.84, silhouette=0.2580
14.0 components: BIC=6563001.35, AIC=-6064969.34, silhouette=0.2241
17.0 components: BIC=6580862.17, AIC=-6083605.55, silhouette=0.2109
📁 SAVING DETAILED RESULTS...
==============================
Detailed grid search results saved to: gmm_grid_search_detailed_20250805_150635.json
Grid search summary CSV saved to: gmm_grid_search_summary_20250805_150635.csv
Best GMM result by BIC:
Parameters: {'n_components': 3, 'covariance_type': 'tied', 'reg_covar': 1e-05, 'n_init': 1, 'init_params': 'kmeans', 'max_iter': 100}
BIC score: 6511443.85
Best GMM result by AIC:
Parameters: {'n_components': 50, 'covariance_type': 'tied', 'reg_covar': 1e-05, 'n_init': 5, 'init_params': 'kmeans', 'max_iter': 100}
AIC score: -6295231.48
Best GMM result by Silhouette:
Parameters: {'n_components': 3, 'covariance_type': 'spherical', 'reg_covar': 1e-05, 'n_init': 1, 'init_params': 'kmeans', 'max_iter': 100}
Silhouette score: 0.3757
Visualization saved as 'gmm_clustering_results.png'
Final clustering results (bic) saved to: gmm_final_results_bic_20250805_150636.json
Final clustering results (aic) saved to: gmm_final_results_aic_20250805_150636.json
Traceback (most recent call last):
File "/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_extensive.py", line 649, in <module>
main()
File "/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_extensive.py", line 643, in main
clustering.save_clustering_results(results)
File "/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_extensive.py", line 617, in save_clustering_results
json.dump({
File "/usr/lib/python3.10/json/__init__.py", line 179, in dump
for chunk in iterable:
File "/usr/lib/python3.10/json/encoder.py", line 431, in _iterencode
yield from _iterencode_dict(o, _current_indent_level)
File "/usr/lib/python3.10/json/encoder.py", line 405, in _iterencode_dict
yield from chunks
File "/usr/lib/python3.10/json/encoder.py", line 438, in _iterencode
o = _default(o)
File "/usr/lib/python3.10/json/encoder.py", line 179, in default
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type float32 is not JSON serializable

22
command.sh Normal file
View File

@@ -0,0 +1,22 @@
python auto_cluster.py --embeddings_path /home/nguyendc/sonnh/embedding-clustering/result/embeddings_factures_donut.json --method dbscan
python auto_cluster.py --embeddings_path /home/nguyendc/sonnh/embedding-clustering/result/embeddings_factures_ostepoathie_1k.json --method dbscan
python auto_cluster.py --embeddings_path /home/nguyendc/phat-dev/clustering/embeddings_factures_osteopathie_1k.json --method dbscan
python auto_cluster.py --embeddings_path /home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json --method dbscan
python auto_cluster.py --embeddings_path /home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json --method None
python gmm_extensive.py --embeddings_path embeddings_factures_donut.json
nohup python -u gmm_extensive.py --embeddings_path /home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json > log_gmm_extensive.txt 2>&1 &
python auto_cluster.py \
--embeddings_path /home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_InternVL3_5-4B-Instruct.json \
--method None

View File

@@ -0,0 +1,874 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"id": "59f8a415",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/home/nguyendc/sonnh/venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
" from .autonotebook import tqdm as notebook_tqdm\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025-09-02 13:50:30.358544: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
"E0000 00:00:1756821030.369428 3858431 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
"E0000 00:00:1756821030.372761 3858431 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
"W0000 00:00:1756821030.382108 3858431 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
"W0000 00:00:1756821030.382119 3858431 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
"W0000 00:00:1756821030.382121 3858431 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
"W0000 00:00:1756821030.382123 3858431 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
"2025-09-02 13:50:30.385619: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
"To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2025-09-02 13:50:35,304] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/bin/ld: cannot find -laio: No such file or directory\n",
"collect2: error: ld returned 1 exit status\n",
"/usr/bin/ld: cannot find -laio: No such file or directory\n",
"collect2: error: ld returned 1 exit status\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using device: cuda\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"A new version of the following files was downloaded from https://huggingface.co/OpenGVLab/InternVL3_5-4B-Instruct:\n",
"- configuration_intern_vit.py\n",
". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n",
"A new version of the following files was downloaded from https://huggingface.co/OpenGVLab/InternVL3_5-4B-Instruct:\n",
"- configuration_internvl_chat.py\n",
"- configuration_intern_vit.py\n",
". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n",
"A new version of the following files was downloaded from https://huggingface.co/OpenGVLab/InternVL3_5-4B-Instruct:\n",
"- conversation.py\n",
". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n",
"A new version of the following files was downloaded from https://huggingface.co/OpenGVLab/InternVL3_5-4B-Instruct:\n",
"- modeling_intern_vit.py\n",
". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n",
"A new version of the following files was downloaded from https://huggingface.co/OpenGVLab/InternVL3_5-4B-Instruct:\n",
"- modeling_internvl_chat.py\n",
"- conversation.py\n",
"- modeling_intern_vit.py\n",
". Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.\n",
"Fetching 2 files: 100%|██████████| 2/2 [00:00<00:00, 7.56it/s]\n",
"Loading checkpoint shards: 100%|██████████| 2/2 [00:02<00:00, 1.03s/it]\n"
]
}
],
"source": [
"import torch\n",
"from transformers import Qwen2_5_VLForConditionalGeneration, AutoProcessor, AutoModel, InternVLChatModel\n",
"# from qwen_vl_utils import process_vision_info\n",
"from PIL import Image\n",
"import os\n",
"import numpy as np\n",
"from tqdm import tqdm\n",
"import math\n",
"import torch\n",
"from transformers import AutoTokenizer, AutoModel\n",
"import timm\n",
"\n",
"# --- Configuration ---\n",
"# MODEL_NAME = \"OpenGVLab/InternVL3_5-4B\" # You can choose other model sizes\n",
"MODEL_NAME = \"OpenGVLab/InternVL3_5-4B-Instruct\"\n",
"\n",
"\n",
"IMAGE_DIR = \"/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/\"\n",
"BATCH_SIZE = 4\n",
"# --- End Configuration ---\n",
"\n",
"# Check for GPU availability\n",
"device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
"print(f\"Using device: {device}\")\n",
"\n",
"# Load the model and processor\n",
"\n",
"model = AutoModel.from_pretrained(\n",
" MODEL_NAME,\n",
" torch_dtype=torch.bfloat16,\n",
" use_flash_attn=True,\n",
" attn_implementation=\"flash_attention_2\",\n",
" trust_remote_code=True,\n",
" device_map=\"cuda\").eval()\n",
"\n",
"processor = AutoProcessor.from_pretrained(\n",
" MODEL_NAME, \n",
" trust_remote_code=True\n",
" )\n",
"tokenizer = AutoTokenizer.from_pretrained(\n",
" MODEL_NAME, \n",
" trust_remote_code=True, \n",
" use_fast=False\n",
" )"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "6d826d19",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"InternVLChatModel(\n",
" (vision_model): InternVisionModel(\n",
" (embeddings): InternVisionEmbeddings(\n",
" (patch_embedding): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))\n",
" )\n",
" (encoder): InternVisionEncoder(\n",
" (layers): ModuleList(\n",
" (0-23): 24 x InternVisionEncoderLayer(\n",
" (attn): InternAttention(\n",
" (qkv): Linear(in_features=1024, out_features=3072, bias=True)\n",
" (attn_drop): Dropout(p=0.0, inplace=False)\n",
" (proj_drop): Dropout(p=0.0, inplace=False)\n",
" (inner_attn): FlashAttention()\n",
" (proj): Linear(in_features=1024, out_features=1024, bias=True)\n",
" )\n",
" (mlp): InternMLP(\n",
" (act): GELUActivation()\n",
" (fc1): Linear(in_features=1024, out_features=4096, bias=True)\n",
" (fc2): Linear(in_features=4096, out_features=1024, bias=True)\n",
" )\n",
" (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)\n",
" (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)\n",
" (drop_path1): Identity()\n",
" (drop_path2): Identity()\n",
" )\n",
" )\n",
" )\n",
" )\n",
" (language_model): Qwen3ForCausalLM(\n",
" (model): Qwen3Model(\n",
" (embed_tokens): Embedding(151936, 2560)\n",
" (layers): ModuleList(\n",
" (0-35): 36 x Qwen3DecoderLayer(\n",
" (self_attn): Qwen3Attention(\n",
" (q_proj): Linear(in_features=2560, out_features=4096, bias=False)\n",
" (k_proj): Linear(in_features=2560, out_features=1024, bias=False)\n",
" (v_proj): Linear(in_features=2560, out_features=1024, bias=False)\n",
" (o_proj): Linear(in_features=4096, out_features=2560, bias=False)\n",
" (q_norm): Qwen3RMSNorm((128,), eps=1e-06)\n",
" (k_norm): Qwen3RMSNorm((128,), eps=1e-06)\n",
" )\n",
" (mlp): Qwen3MLP(\n",
" (gate_proj): Linear(in_features=2560, out_features=9728, bias=False)\n",
" (up_proj): Linear(in_features=2560, out_features=9728, bias=False)\n",
" (down_proj): Linear(in_features=9728, out_features=2560, bias=False)\n",
" (act_fn): SiLU()\n",
" )\n",
" (input_layernorm): Qwen3RMSNorm((2560,), eps=1e-06)\n",
" (post_attention_layernorm): Qwen3RMSNorm((2560,), eps=1e-06)\n",
" )\n",
" )\n",
" (norm): Qwen3RMSNorm((2560,), eps=1e-06)\n",
" (rotary_emb): Qwen3RotaryEmbedding()\n",
" )\n",
" (lm_head): Linear(in_features=2560, out_features=151936, bias=False)\n",
" )\n",
" (mlp1): Sequential(\n",
" (0): LayerNorm((4096,), eps=1e-05, elementwise_affine=True)\n",
" (1): Linear(in_features=4096, out_features=2560, bias=True)\n",
" (2): GELU(approximate='none')\n",
" (3): Linear(in_features=2560, out_features=2560, bias=True)\n",
" )\n",
")"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "7bbfcf47",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"InternVisionModel(\n",
" (embeddings): InternVisionEmbeddings(\n",
" (patch_embedding): Conv2d(3, 1024, kernel_size=(14, 14), stride=(14, 14))\n",
" )\n",
" (encoder): InternVisionEncoder(\n",
" (layers): ModuleList(\n",
" (0-23): 24 x InternVisionEncoderLayer(\n",
" (attn): InternAttention(\n",
" (qkv): Linear(in_features=1024, out_features=3072, bias=True)\n",
" (attn_drop): Dropout(p=0.0, inplace=False)\n",
" (proj_drop): Dropout(p=0.0, inplace=False)\n",
" (inner_attn): FlashAttention()\n",
" (proj): Linear(in_features=1024, out_features=1024, bias=True)\n",
" )\n",
" (mlp): InternMLP(\n",
" (act): GELUActivation()\n",
" (fc1): Linear(in_features=1024, out_features=4096, bias=True)\n",
" (fc2): Linear(in_features=4096, out_features=1024, bias=True)\n",
" )\n",
" (norm1): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)\n",
" (norm2): LayerNorm((1024,), eps=1e-06, elementwise_affine=True)\n",
" (drop_path1): Identity()\n",
" (drop_path2): Identity()\n",
" )\n",
" )\n",
" )\n",
")"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model.vision_model"
]
},
{
"cell_type": "markdown",
"id": "ae26d6cf",
"metadata": {},
"source": [
"# demo ?"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "817d3ccb",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 4,
"id": "d41f94bd",
"metadata": {},
"outputs": [],
"source": [
"# import math\n",
"import numpy as np\n",
"import torch\n",
"import torchvision.transforms as T\n",
"# from decord import VideoReader, cpu\n",
"from PIL import Image\n",
"from torchvision.transforms.functional import InterpolationMode\n",
"# from modelscope import AutoModel, AutoTokenizer\n",
"\n",
"IMAGENET_MEAN = (0.485, 0.456, 0.406)\n",
"IMAGENET_STD = (0.229, 0.224, 0.225)\n",
"\n",
"def build_transform(input_size):\n",
" MEAN, STD = IMAGENET_MEAN, IMAGENET_STD\n",
" transform = T.Compose([\n",
" T.Lambda(lambda img: img.convert('RGB') if img.mode != 'RGB' else img),\n",
" T.Resize((input_size, input_size), interpolation=InterpolationMode.BICUBIC),\n",
" T.ToTensor(),\n",
" T.Normalize(mean=MEAN, std=STD)\n",
" ])\n",
" return transform\n",
"\n",
"def find_closest_aspect_ratio(aspect_ratio, target_ratios, width, height, image_size):\n",
" best_ratio_diff = float('inf')\n",
" best_ratio = (1, 1)\n",
" area = width * height\n",
" for ratio in target_ratios:\n",
" target_aspect_ratio = ratio[0] / ratio[1]\n",
" ratio_diff = abs(aspect_ratio - target_aspect_ratio)\n",
" if ratio_diff < best_ratio_diff:\n",
" best_ratio_diff = ratio_diff\n",
" best_ratio = ratio\n",
" elif ratio_diff == best_ratio_diff:\n",
" if area > 0.5 * image_size * image_size * ratio[0] * ratio[1]:\n",
" best_ratio = ratio\n",
" return best_ratio\n",
"\n",
"def dynamic_preprocess(image, min_num=1, max_num=12, image_size=448, use_thumbnail=False):\n",
" orig_width, orig_height = image.size\n",
" aspect_ratio = orig_width / orig_height\n",
"\n",
" # calculate the existing image aspect ratio\n",
" target_ratios = set(\n",
" (i, j) for n in range(min_num, max_num + 1) for i in range(1, n + 1) for j in range(1, n + 1) if\n",
" i * j <= max_num and i * j >= min_num)\n",
" target_ratios = sorted(target_ratios, key=lambda x: x[0] * x[1])\n",
"\n",
" # find the closest aspect ratio to the target\n",
" target_aspect_ratio = find_closest_aspect_ratio(\n",
" aspect_ratio, target_ratios, orig_width, orig_height, image_size)\n",
"\n",
" # calculate the target width and height\n",
" target_width = image_size * target_aspect_ratio[0]\n",
" target_height = image_size * target_aspect_ratio[1]\n",
" blocks = target_aspect_ratio[0] * target_aspect_ratio[1]\n",
"\n",
" # resize the image\n",
" resized_img = image.resize((target_width, target_height))\n",
" processed_images = []\n",
" for i in range(blocks):\n",
" box = (\n",
" (i % (target_width // image_size)) * image_size,\n",
" (i // (target_width // image_size)) * image_size,\n",
" ((i % (target_width // image_size)) + 1) * image_size,\n",
" ((i // (target_width // image_size)) + 1) * image_size\n",
" )\n",
" # split the image\n",
" split_img = resized_img.crop(box)\n",
" processed_images.append(split_img)\n",
" assert len(processed_images) == blocks\n",
" if use_thumbnail and len(processed_images) != 1:\n",
" thumbnail_img = image.resize((image_size, image_size))\n",
" processed_images.append(thumbnail_img)\n",
" return processed_images\n",
"\n",
"def load_image(image_file, input_size=448, max_num=12):\n",
" image = Image.open(image_file).convert('RGB')\n",
" transform = build_transform(input_size=input_size)\n",
" images = dynamic_preprocess(image, image_size=input_size, use_thumbnail=True, max_num=max_num)\n",
" pixel_values = [transform(image) for image in images]\n",
" pixel_values = torch.stack(pixel_values)\n",
" return pixel_values"
]
},
{
"cell_type": "markdown",
"id": "f2ec71a4",
"metadata": {},
"source": [
"# Attention pooling\n"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "a404fa19",
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn.functional as F\n",
"\n",
"def gem_pool(x, p: float = 3.0, eps: float = 1e-6):\n",
" # x: [B, N, D]\n",
" return (x.clamp(min=eps).pow(p).mean(dim=1)).pow(1.0/p)\n",
"\n",
"@torch.no_grad()\n",
"def image_embedding(pixel_values, model, use_tiling=True):\n",
" # pixel_values: nếu dùng processor của InternVL, có thể là [T,3,H,W]; nếu bạn tự resize = [1,3,H,W]\n",
" out = model.vision_model(pixel_values=pixel_values, output_hidden_states=True)\n",
" tok = out.last_hidden_state # [T, N, 1024] hoặc [1, N, 1024]\n",
" if tok.dim() == 2: # phòng trường hợp model trả [N, D]\n",
" tok = tok.unsqueeze(0)\n",
"\n",
" # 1) Attention pooling theo token, trong từng tile\n",
" w_tok = torch.softmax(tok.norm(dim=-1), dim=1).unsqueeze(-1) # [T,N,1]\n",
" attn_tile = (tok * w_tok).sum(dim=1) # [T,1024]\n",
"\n",
" # 2) Các pooling khác theo token\n",
" mean_tile = tok.mean(dim=1) # [T,1024]\n",
" max_tile = tok.max(dim=1).values # [T,1024]\n",
" gem_tile = gem_pool(tok, p=3.0) # [T,1024]\n",
"\n",
" # 3) Attention across-tiles (giữ multi-scale nhưng gọn)\n",
" tile_scores = attn_tile.norm(dim=-1) # [T]\n",
" w_tile = torch.softmax(tile_scores, dim=0).unsqueeze(-1) # [T,1]\n",
"\n",
" mean_vec = (mean_tile * w_tile).sum(dim=0)\n",
" max_vec = (max_tile * w_tile).sum(dim=0)\n",
" gem_vec = (gem_tile * w_tile).sum(dim=0)\n",
" attn_vec = (attn_tile * w_tile).sum(dim=0)\n",
"\n",
" # 4) Hợp nhất nhiều “góc nhìn” → 1 vector giàu thông tin\n",
" one_vec = torch.cat([mean_vec, max_vec, gem_vec, attn_vec], dim=0) # [4*1024]\n",
" one_vec = F.normalize(one_vec, dim=-1).unsqueeze(0) # [1, 4096]\n",
" return one_vec.half() # FP16 để tiết kiệm bộ nhớ"
]
},
{
"cell_type": "markdown",
"id": "ed35a4ce",
"metadata": {},
"source": [
"# pool"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "3edf8b67",
"metadata": {},
"outputs": [],
"source": [
"import torch\n",
"import torch.nn.functional as F\n",
"\n",
"# --- Pooling theo token (trong 1 tile) ---\n",
"def _pool_tokens(tokens: torch.Tensor, how: str = \"mean\") -> torch.Tensor:\n",
" \"\"\"\n",
" tokens: [1, N, D] hoặc [N, D]\n",
" return: [D]\n",
" \"\"\"\n",
" if tokens.dim() == 3: # [1, N, D] -> [N, D]\n",
" tokens = tokens.squeeze(0)\n",
"\n",
" if how == \"mean\":\n",
" v = tokens.mean(dim=0)\n",
" elif how == \"max\":\n",
" v = tokens.max(dim=0).values\n",
" elif how == \"gem\":\n",
" p = 3.0\n",
" v = (tokens.clamp(min=1e-6).pow(p).mean(dim=0)).pow(1.0/p)\n",
" elif how == \"cls\":\n",
" # chỉ dùng nếu backbone có CLS token ở vị trí đầu\n",
" v = tokens[0]\n",
" else:\n",
" raise ValueError(f\"Unknown pooling: {how}\")\n",
"\n",
" return v\n",
"\n",
"\n",
"@torch.no_grad()\n",
"def image_embedding_global(model, pixel_values: torch.Tensor,\n",
" pool: str = \"mean\",\n",
" normalize: bool = False,\n",
" global_index: int = 0,\n",
" use_projector: bool = False) -> torch.Tensor:\n",
" \"\"\"\n",
" Trả về 1 vector [1, D] mô tả toàn ảnh, chỉ dùng GLOBAL tile.\n",
" - pixel_values: [T,3,H,W] (ví dụ T=7) hoặc [1,3,H,W]\n",
" - global_index: thường = 0 (tile toàn ảnh nằm đầu)\n",
" - use_projector: CHỈ bật nếu bạn chắc chắn chiều khớp với projector (mlp1)\n",
" \"\"\"\n",
" model.eval()\n",
" device = next(model.parameters()).device\n",
" x = pixel_values.to(device)\n",
"\n",
" out = model.vision_model(pixel_values=x) # last_hidden_state: [T, N, D] hoặc [1, N, D]\n",
" tok = out.last_hidden_state\n",
"\n",
" # chọn global tile\n",
" if tok.size(0) > 1:\n",
" tok = tok[global_index:global_index+1] # [1, N, D]\n",
"\n",
" # (tuỳ chọn) projector sang không gian khác - cẩn thận mismatch chiều!\n",
" if use_projector:\n",
" # CHỈ nên bật khi biết chắc input dim của mlp1 khớp với tok.size(-1)\n",
" in_feat = getattr(model.mlp1[1], \"in_features\", None)\n",
" if in_feat is not None and tok.size(-1) == in_feat:\n",
" tok = model.mlp1(tok) # [1, N, D]\n",
" else:\n",
" raise ValueError(f\"Projector input dim mismatch: tokens={tok.size(-1)} vs mlp1.in={in_feat}\")\n",
"\n",
" v = _pool_tokens(tok, how=pool) # [D]\n",
" if normalize:\n",
" v = F.normalize(v, dim=-1)\n",
" return v.unsqueeze(0) # [1, D]\n",
"\n",
"\n",
"@torch.no_grad()\n",
"def image_embedding_mean(model, pixel_values: torch.Tensor,\n",
" pool: str = \"mean\",\n",
" normalize: bool = True,\n",
" use_projector: bool = False) -> torch.Tensor:\n",
" \"\"\"\n",
" Trả về 1 vector [1, D] mô tả toàn ảnh, bằng cách:\n",
" (1) pool theo token trong từng tile → [T, D]\n",
" (2) lấy mean across-tiles → [D]\n",
" \"\"\"\n",
" model.eval()\n",
" device = next(model.parameters()).device\n",
" x = pixel_values.to(device)\n",
"\n",
" out = model.vision_model(pixel_values=x)\n",
" tok = out.last_hidden_state # [T, N, D] hoặc [1, N, D]\n",
"\n",
" if use_projector:\n",
" in_feat = getattr(model.mlp1[1], \"in_features\", None)\n",
" if in_feat is not None and tok.size(-1) == in_feat:\n",
" tok = model.mlp1(tok)\n",
" else:\n",
" raise ValueError(f\"Projector input dim mismatch: tokens={tok.size(-1)} vs mlp1.in={in_feat}\")\n",
"\n",
" # pool theo token trong từng tile\n",
" T = tok.size(0)\n",
" per_tile = [ _pool_tokens(tok[t:t+1], how=pool) for t in range(T) ] # list of [D]\n",
" per_tile = torch.stack(per_tile, dim=0) # [T, D]\n",
"\n",
" # mean across-tiles\n",
" v = per_tile.mean(dim=0) # [D]\n",
" if normalize:\n",
" v = F.normalize(v, dim=-1)\n",
" return v.unsqueeze(0) # [1, D]\n"
]
},
{
"cell_type": "markdown",
"id": "613cf001",
"metadata": {},
"source": [
"# infer"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "cdfdab0e",
"metadata": {},
"outputs": [],
"source": [
"def get_image_embedding(path):\n",
" \"\"\"\n",
" Processes a batch of images and extracts their embeddings.\n",
" \"\"\"\n",
" images_pil = []\n",
" valid_paths = []\n",
" if path.lower().endswith(('.png', '.jpg', '.jpeg')):\n",
" try:\n",
" # The processor expects PIL images in RGB format\n",
" # images_pil.append(Image.open(path).convert(\"RGB\"))\n",
" # print(path)\n",
" valid_paths.append(path)\n",
" except Exception as e:\n",
" print(f\"Warning: Could not load image {path}. Skipping. Error: {e}\")\n",
"\n",
" if not valid_paths:\n",
" return np.array([]), []\n",
"\n",
" all_pixel_values = []\n",
" for valid_path in valid_paths:\n",
" pixel_values = load_image(valid_path, max_num=12).to(torch.bfloat16).cuda()\n",
" # print(pixel_values.shape)\n",
" all_pixel_values.append(pixel_values)\n",
" # For pure vision feature extraction, we can provide an empty text prompt.\n",
" # The processor handles tokenizing text and preparing images.\n",
" inputs = torch.cat(all_pixel_values, dim=0).to(device)\n",
" \n",
" # embeddings = image_embedding(inputs, model, use_tiling=True)\n",
" embeddings = image_embedding_mean(model, inputs)\n",
" \n",
" return embeddings.to(torch.float16).cpu().numpy()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "cdaebb7b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 2800/2800 [20:51<00:00, 2.24it/s]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Embeddings extracted and saved.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"import json\n",
"\n",
"# --- Process all images in the directory ---\n",
"image_files = [os.path.join(IMAGE_DIR, f) for f in os.listdir(IMAGE_DIR) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]\n",
"all_embeddings = []\n",
"filepaths = []\n",
"BATCH_SIZE = 1\n",
"\n",
"with open(\"embeddings_factures_osteopathie_1k_InternVL3_5-4B-Instruct.json\", \"w\") as f:\n",
"\n",
" f.write(\"[\\n\")\n",
" first = True\n",
" for i in tqdm(range(0, len(image_files), BATCH_SIZE)):\n",
" batch_paths = image_files[i]\n",
" batch_embeddings = get_image_embedding(batch_paths)\n",
" embeddings_list = [emb.tolist() for emb in batch_embeddings]\n",
" for path, emb in zip(batch_paths, embeddings_list):\n",
" if not first:\n",
" f.write(\",\\n\")\n",
" json.dump({\"filepath\": path, \"embedding\": emb}, f)\n",
" first = False\n",
" f.write(\"\\n]\\n\")\n",
"\n",
"print(\"Embeddings extracted and saved.\")"
]
},
{
"cell_type": "markdown",
"id": "f0d0bf0a",
"metadata": {},
"source": [
"# check"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "0772fc89",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loaded 2800 samples with embedding dimension 1024\n",
"Applied L2 normalization to embeddings\n",
"(2800, 1024)\n",
"(3918600,)\n",
"mean sim: 0.9939966 std: 0.0073577887\n"
]
}
],
"source": [
"from sklearn.cluster import DBSCAN, MeanShift, AffinityPropagation\n",
"from sklearn.preprocessing import normalize\n",
"from sklearn.metrics import silhouette_score\n",
"from sklearn.neighbors import NearestNeighbors\n",
"from sklearn.decomposition import PCA\n",
"import argparse\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"from sklearn.metrics.pairwise import cosine_similarity\n",
"from datetime import datetime\n",
"\n",
"\n",
"embeddings_path = \"/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_InternVL3_5-4B-Instruct.json\"\n",
"with open(embeddings_path, 'r') as f:\n",
" data = json.load(f)\n",
"\n",
"file_paths = []\n",
"embeddings_list = []\n",
"\n",
"for item in data:\n",
" file_paths.append(item['filepath'])\n",
" embeddings_list.append(item['embedding'])\n",
"\n",
"embeddings = np.array(embeddings_list, dtype=np.float32)\n",
"print(f\"Loaded {len(file_paths)} samples with embedding dimension {embeddings.shape[1]}\")\n",
"\n",
"# Normalize embeddings using L2 normalization for cosine distance\n",
"embeddings_normalized = normalize(embeddings, norm='l2', axis=1)\n",
"print(\"Applied L2 normalization to embeddings\")\n",
"\n",
"sims = cosine_similarity(embeddings)\n",
"print(embeddings.shape)\n",
"# lấy upper triangle exclude diagonal để inspect\n",
"triu_idxs = np.triu_indices_from(sims, k=1)\n",
"dist_vals = sims[triu_idxs]\n",
"print(dist_vals.shape)\n",
"print(\"mean sim:\", dist_vals.mean(), \"std:\", dist_vals.std())"
]
},
{
"cell_type": "markdown",
"id": "cb4ea42b",
"metadata": {},
"source": [
"# temp"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "2c3e6dd0",
"metadata": {},
"outputs": [],
"source": [
"image_path = \"/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c363e486-5d45-425e-aef9-4791cad120f7_20250213_120759_1_scale_1.0.jpg\""
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "29620d93",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n",
"Setting `pad_token_id` to `eos_token_id`:151645 for open-end generation.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"User: <image>\n",
"Please describe the image shortly.\n",
"Assistant: The image shows a receipt for a consultation with Noël Breignaud, an osteopath. It includes his contact information, with the address \"104, cours des fossés, 33210 Langon\" and his phone number. The receipt details a payment of 55€ for a consultation dated 15/06/2020. The receipt number is 1750401922774. There are handwritten details and signatures, with the amount and date written in ink. Noël Breignaud's signature and a circular stamp are also present.\n",
"User: <image>\n",
"Please describe the image in detail.\n",
"Assistant: The image is a handwritten receipt or invoice from a practitioner named Noël Breign aud, who is an osteopath. The text on the left side of the document includes the following details:\n",
"\n",
"- **Name:** Noël Breignaud\n",
"- **Profession:** Ouestopathe (Osteopath)\n",
"- **Address:** 104, cours des fossés, 33210 Lagnon\n",
"- **Phone Number:** Tel. 06 88 70 66 43\n",
"\n",
"On the right side, there are registration and identification numbers:\n",
"\n",
"- **Nº SIRET:** 510 123 631 00010\n",
"- **Nº ADELI:** 330001108\n",
"- **Code APE:** 8690E\n",
"\n",
"The handwritten section of the document is in French and reads:\n",
"\n",
"- \"Déclaire avoir reçu de M. M. (fils) G[obon], Acquitté la somme de 55 €\n",
"Pour 1 consultation en date du 05/04/2024\n",
"N°: 1750460-19212774\"\n",
"\n",
"At the bottom right, there is a signature that appears to be of Noël Breignaud, with a red stamp partially visible, which seems to contain the text \"Noël BREIGNAUD\" and other markings.\n",
"\n",
"The date in the handwritten section is \"05/04/2024,\" indicating the receipt or service provided on that date. The amount mentioned is 55 euros for one consultation.\n"
]
}
],
"source": [
"pixel_values = load_image(image_path, max_num=12).to(torch.bfloat16).cuda()\n",
"generation_config = dict(max_new_tokens=1024, do_sample=True)\n",
"\n",
"\n",
"\n",
"question = '<image>\\nPlease describe the image shortly.'\n",
"response = model.chat(tokenizer, pixel_values, question, generation_config)\n",
"print(f'User: {question}\\nAssistant: {response}')\n",
"\n",
"# single-image multi-round conversation (单图多轮对话)\n",
"question = '<image>\\nPlease describe the image in detail.'\n",
"response, history = model.chat(tokenizer, pixel_values, question, generation_config, history=None, return_history=True)\n",
"print(f'User: {question}\\nAssistant: {response}')"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "35dc90e0",
"metadata": {},
"outputs": [],
"source": [
"vout = model.vision_model(pixel_values=pixel_values, output_hidden_states=True)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "77f3720a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"torch.Size([7, 1025, 1024])\n"
]
}
],
"source": [
"patch_feats = vout.last_hidden_state # [B, N_patches, Dv], Dv ~ 1024 theo kiến trúc của bạn\n",
"print(patch_feats.shape)\n",
"# Nếu backbone có CLS token, bạn có thể dùng patch_feats[:,0]\n",
"# Cách an toàn chung: mean-pool\n",
"# img_vec = patch_feats.mean(dim=1) # [B, Dv]\n",
"# img_vec = torch.nn.functional.normalize(img_vec, dim=-1) # L2 normalize cho retrieval"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "0043634c",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"torch.Size([7, 1024])"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# img_vec.shape"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "92032162",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@@ -0,0 +1,432 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 2,
"id": "59f8a415",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"2025-09-02 15:00:12.976185: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:467] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered\n",
"WARNING: All log messages before absl::InitializeLog() is called are written to STDERR\n",
"E0000 00:00:1756825212.987686 3903757 cuda_dnn.cc:8579] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered\n",
"E0000 00:00:1756825212.991038 3903757 cuda_blas.cc:1407] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered\n",
"W0000 00:00:1756825213.000855 3903757 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
"W0000 00:00:1756825213.000880 3903757 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
"W0000 00:00:1756825213.000882 3903757 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
"W0000 00:00:1756825213.000884 3903757 computation_placer.cc:177] computation placer already registered. Please check linkage and avoid linking the same target more than once.\n",
"2025-09-02 15:00:13.005218: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.\n",
"To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"[2025-09-02 15:00:17,970] [INFO] [real_accelerator.py:222:get_accelerator] Setting ds_accelerator to cuda (auto detect)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/bin/ld: cannot find -laio: No such file or directory\n",
"collect2: error: ld returned 1 exit status\n",
"/usr/bin/ld: cannot find -laio: No such file or directory\n",
"collect2: error: ld returned 1 exit status\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using device: cuda\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"Loading checkpoint shards: 100%|██████████| 2/2 [00:01<00:00, 1.09it/s]\n",
"Using a slow image processor as `use_fast` is unset and a slow processor was saved with this model. `use_fast=True` will be the default behavior in v4.52, even if the model was saved with a slow processor. This will result in minor differences in outputs. You'll still be able to use a slow processor with `use_fast=False`.\n"
]
}
],
"source": [
"import torch\n",
"from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor\n",
"# from qwen_vl_utils import process_vision_info\n",
"from PIL import Image\n",
"import os\n",
"import numpy as np\n",
"from tqdm import tqdm\n",
"\n",
"# --- Configuration ---\n",
"MODEL_NAME = \"Qwen/Qwen2.5-VL-3B-Instruct\" # You can choose other model sizes\n",
"\n",
"IMAGE_DIR = \"/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/\"\n",
"BATCH_SIZE = 4\n",
"# --- End Configuration ---\n",
"\n",
"# Check for GPU availability\n",
"device = \"cuda\" if torch.cuda.is_available() else \"cpu\"\n",
"print(f\"Using device: {device}\")\n",
"\n",
"# Load the model and processor\n",
"model = Qwen2_5_VLForConditionalGeneration.from_pretrained(\n",
" MODEL_NAME, torch_dtype=\"bfloat16\", device_map=\"cuda\", attn_implementation=\"flash_attention_2\",\n",
")\n",
"processor = AutoProcessor.from_pretrained(MODEL_NAME, trust_remote_code=True)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "13479e1a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Qwen2_5_VLProcessor:\n",
"- image_processor: Qwen2VLImageProcessor {\n",
" \"do_convert_rgb\": true,\n",
" \"do_normalize\": true,\n",
" \"do_rescale\": true,\n",
" \"do_resize\": true,\n",
" \"image_mean\": [\n",
" 0.48145466,\n",
" 0.4578275,\n",
" 0.40821073\n",
" ],\n",
" \"image_processor_type\": \"Qwen2VLImageProcessor\",\n",
" \"image_std\": [\n",
" 0.26862954,\n",
" 0.26130258,\n",
" 0.27577711\n",
" ],\n",
" \"max_pixels\": 12845056,\n",
" \"merge_size\": 2,\n",
" \"min_pixels\": 3136,\n",
" \"patch_size\": 14,\n",
" \"processor_class\": \"Qwen2_5_VLProcessor\",\n",
" \"resample\": 3,\n",
" \"rescale_factor\": 0.00392156862745098,\n",
" \"size\": {\n",
" \"longest_edge\": 12845056,\n",
" \"shortest_edge\": 3136\n",
" },\n",
" \"temporal_patch_size\": 2\n",
"}\n",
"\n",
"- tokenizer: Qwen2TokenizerFast(name_or_path='Qwen/Qwen2.5-VL-3B-Instruct', vocab_size=151643, model_max_length=131072, is_fast=True, padding_side='right', truncation_side='right', special_tokens={'eos_token': '<|im_end|>', 'pad_token': '<|endoftext|>', 'additional_special_tokens': ['<|im_start|>', '<|im_end|>', '<|object_ref_start|>', '<|object_ref_end|>', '<|box_start|>', '<|box_end|>', '<|quad_start|>', '<|quad_end|>', '<|vision_start|>', '<|vision_end|>', '<|vision_pad|>', '<|image_pad|>', '<|video_pad|>']}, clean_up_tokenization_spaces=False, added_tokens_decoder={\n",
"\t151643: AddedToken(\"<|endoftext|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
"\t151644: AddedToken(\"<|im_start|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
"\t151645: AddedToken(\"<|im_end|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
"\t151646: AddedToken(\"<|object_ref_start|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
"\t151647: AddedToken(\"<|object_ref_end|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
"\t151648: AddedToken(\"<|box_start|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
"\t151649: AddedToken(\"<|box_end|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
"\t151650: AddedToken(\"<|quad_start|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
"\t151651: AddedToken(\"<|quad_end|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
"\t151652: AddedToken(\"<|vision_start|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
"\t151653: AddedToken(\"<|vision_end|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
"\t151654: AddedToken(\"<|vision_pad|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
"\t151655: AddedToken(\"<|image_pad|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
"\t151656: AddedToken(\"<|video_pad|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=True),\n",
"\t151657: AddedToken(\"<tool_call>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
"\t151658: AddedToken(\"</tool_call>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
"\t151659: AddedToken(\"<|fim_prefix|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
"\t151660: AddedToken(\"<|fim_middle|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
"\t151661: AddedToken(\"<|fim_suffix|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
"\t151662: AddedToken(\"<|fim_pad|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
"\t151663: AddedToken(\"<|repo_name|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
"\t151664: AddedToken(\"<|file_sep|>\", rstrip=False, lstrip=False, single_word=False, normalized=False, special=False),\n",
"}\n",
")\n",
"\n",
"{\n",
" \"processor_class\": \"Qwen2_5_VLProcessor\"\n",
"}"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"processor"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cdfdab0e",
"metadata": {},
"outputs": [],
"source": [
"def get_image_embeddings(image_paths):\n",
" \"\"\"\n",
" Processes a batch of images and extracts their embeddings.\n",
" \"\"\"\n",
" images_pil = []\n",
" valid_paths = []\n",
" for path in image_paths:\n",
" if path.lower().endswith(('.png', '.jpg', '.jpeg')):\n",
" try:\n",
" # The processor expects PIL images in RGB format\n",
" images_pil.append(Image.open(path).convert(\"RGB\"))\n",
" valid_paths.append(path)\n",
" except Exception as e:\n",
" print(f\"Warning: Could not load image {path}. Skipping. Error: {e}\")\n",
"\n",
" if not images_pil:\n",
" return np.array([]), []\n",
"\n",
" # For pure vision feature extraction, we can provide an empty text prompt.\n",
" # The processor handles tokenizing text and preparing images.\n",
" inputs = processor(\n",
" text=[\"\"] * len(images_pil),\n",
" images=images_pil,\n",
" padding=True,\n",
" return_tensors=\"pt\"\n",
" ).to(device)\n",
"\n",
" with torch.no_grad():\n",
" # Get the vision embeddings from the model's vision tower\n",
" vision_outputs = model.visual(inputs['pixel_values'].to(dtype=model.dtype), grid_thw=inputs['image_grid_thw'])\n",
" # We'll use the pooled output as the embedding\n",
" embeddings = vision_outputs\n",
"\n",
" return embeddings.to(torch.float16).cpu().numpy()"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "cdaebb7b",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"100%|██████████| 700/700 [22:12<00:00, 1.90s/it]"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Embeddings extracted and saved.\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n"
]
}
],
"source": [
"import json\n",
"\n",
"# --- Process all images in the directory ---\n",
"image_files = [os.path.join(IMAGE_DIR, f) for f in os.listdir(IMAGE_DIR) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]\n",
"all_embeddings = []\n",
"filepaths = []\n",
"\n",
"with open(\"embeddings_factures_osteopathie_1k_qwen.json\", \"w\") as f:\n",
"\n",
" f.write(\"[\\n\")\n",
" first = True\n",
" for i in tqdm(range(0, len(image_files), BATCH_SIZE)):\n",
" batch_paths = image_files[i:i+BATCH_SIZE]\n",
" batch_embeddings = get_image_embeddings(batch_paths)\n",
" embeddings_list = [emb.tolist() for emb in batch_embeddings]\n",
" for path, emb in zip(batch_paths, embeddings_list):\n",
" if not first:\n",
" f.write(\",\\n\")\n",
" json.dump({\"filepath\": path, \"embedding\": emb}, f)\n",
" first = False\n",
" f.write(\"\\n]\\n\")\n",
"\n",
"print(\"Embeddings extracted and saved.\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "2c3e6dd0",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loaded 2800 samples with embedding dimension 2048\n",
"Applied L2 normalization to embeddings\n",
"(2800, 2048)\n",
"(3918600,)\n",
"mean sim: 0.37961555 std: 0.22605234\n"
]
}
],
"source": [
"from sklearn.cluster import DBSCAN, MeanShift, AffinityPropagation\n",
"from sklearn.preprocessing import normalize\n",
"from sklearn.metrics import silhouette_score\n",
"from sklearn.neighbors import NearestNeighbors\n",
"from sklearn.decomposition import PCA\n",
"import argparse\n",
"import warnings\n",
"warnings.filterwarnings('ignore')\n",
"from sklearn.metrics.pairwise import cosine_similarity\n",
"from datetime import datetime\n",
"import json\n",
"\n",
"embeddings_path = \"/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json\"\n",
"with open(embeddings_path, 'r') as f:\n",
" data = json.load(f)\n",
"\n",
"file_paths = []\n",
"embeddings_list = []\n",
"\n",
"for item in data:\n",
" file_paths.append(item['filepath'])\n",
" embeddings_list.append(item['embedding'])\n",
"\n",
"embeddings = np.array(embeddings_list, dtype=np.float32)\n",
"print(f\"Loaded {len(file_paths)} samples with embedding dimension {embeddings.shape[1]}\")\n",
"\n",
"# Normalize embeddings using L2 normalization for cosine distance\n",
"embeddings_normalized = normalize(embeddings, norm='l2', axis=1)\n",
"print(\"Applied L2 normalization to embeddings\")\n",
"\n",
"sims = cosine_similarity(embeddings)\n",
"print(embeddings.shape)\n",
"# lấy upper triangle exclude diagonal để inspect\n",
"triu_idxs = np.triu_indices_from(sims, k=1)\n",
"dist_vals = sims[triu_idxs]\n",
"print(dist_vals.shape)\n",
"print(\"mean sim:\", dist_vals.mean(), \"std:\", dist_vals.std())"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "29620d93",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "27fea4f3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" 100% |███████████████| 1091/1091 [174.1ms elapsed, 0s remaining, 6.3K samples/s] \n"
]
}
],
"source": [
"import fiftyone as fo\n",
"import fiftyone.brain as fob\n",
"import numpy as np\n",
"from sklearn.mixture import GaussianMixture\n",
"import json\n",
"\n",
"DATASET_NAME = \"mock\"\n",
"\n",
"json_path = \"./embeddings_factures_osteopathie_1k_qwen.json\"\n",
"\n",
"with open(json_path, \"r\") as file:\n",
" embedding_data = json.load(file)\n",
"\n",
"file_paths = []\n",
"embeddings = []\n",
"for i, record in enumerate(embedding_data):\n",
" file_paths.append(record.get(\"filepath\"))\n",
" embeddings.append(record.get(\"embedding\"))\n",
"\n",
"if DATASET_NAME in fo.list_datasets():\n",
" dataset = fo.load_dataset(DATASET_NAME)\n",
" dataset.delete()\n",
"dataset = fo.Dataset(DATASET_NAME)\n",
"\n",
"# Add samples to the dataset\n",
"samples = [fo.Sample(filepath=p) for p in file_paths]\n",
"dataset.add_samples(samples)\n",
"\n",
"# Building Gaussian mixture model (GMM)\n",
"n_gaussians = 50\n",
"gmm = GaussianMixture(n_components=n_gaussians, random_state=42)\n",
"gmm.fit(embeddings)\n",
"cluster_labels = gmm.predict(embeddings)\n",
"\n",
"# Adding labeled embeddings to visulization\n",
"dataset.add_sample_field(\"gmm_cluster\", fo.IntField)\n",
"for sample, label in zip(dataset, cluster_labels):\n",
" sample[\"gmm_cluster_50_gaussians\"] = int(label)\n",
" sample.save()\n",
"\n",
"n_gaussians = 200\n",
"gmm = GaussianMixture(n_components=n_gaussians, random_state=42)\n",
"gmm.fit(embeddings)\n",
"cluster_labels = gmm.predict(embeddings)\n",
"\n",
"# Adding labeled embeddings to visulization\n",
"dataset.add_sample_field(\"gmm_cluster\", fo.IntField)\n",
"for sample, label in zip(dataset, cluster_labels):\n",
" sample[\"gmm_cluster_200_gaussians\"] = int(label)\n",
" sample.save()\n",
"\n",
"# --- Visualize the Embeddings with UMAP ---\n",
"# This will compute a 2D representation of your embeddings\n",
"# for visualization.\n",
"res = fob.compute_visualization(\n",
" dataset,\n",
" embeddings=embeddings,\n",
" brain_key=\"qwen_vision_viz\",\n",
" method=\"tsne\",\n",
" verbose=True\n",
")\n",
"dataset.set_values(\"qwen_umap\", res.current_points)\n",
"\n",
"print(\"UMAP visualization computed. Launch the app to see the plot.\")\n",
"session = fo.launch_app(dataset)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because it is too large Load Diff

90
extract/extract.py Normal file
View File

@@ -0,0 +1,90 @@
import torch
from transformers import Qwen2_5_VLForConditionalGeneration, AutoTokenizer, AutoProcessor, AutoModel
# from qwen_vl_utils import process_vision_info
from PIL import Image
import os
import numpy as np
import json
from tqdm import tqdm
from transformers import LayoutLMv3ImageProcessor, LayoutLMv3Model
# --- Configuration ---
MODEL_NAME = "microsoft/layoutlmv3-base" # You can choose other model sizes
IMAGE_DIR = "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/"
BATCH_SIZE = 8
# --- End Configuration ---
# Check for GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# Load the model and processor
# model = AutoModel.from_pretrained(
# MODEL_NAME, torch_dtype="bfloat16", device_map="cuda" # , attn_implementation="flash_attention_2",
# )
model = LayoutLMv3Model.from_pretrained(MODEL_NAME, device_map="cuda")
processor = LayoutLMv3ImageProcessor.from_pretrained(MODEL_NAME, trust_remote_code=True)
def get_image_embeddings(image_paths):
"""
Processes a batch of images and extracts their embeddings.
"""
images_pil = []
valid_paths = []
for path in image_paths:
if path.lower().endswith(('.png', '.jpg', '.jpeg')):
try:
# The processor expects PIL images in RGB format
images_pil.append(Image.open(path).convert("RGB"))
valid_paths.append(path)
except Exception as e:
print(f"Warning: Could not load image {path}. Skipping. Error: {e}")
if not images_pil:
return np.array([]), []
# For pure vision feature extraction, we can provide an empty text prompt.
# The processor handles tokenizing text and preparing images.
# LayoutLMv3 expects 224x224 images by default
inputs = processor(
# text=[""] * len(images_pil),
images=images_pil,
# padding=True,
size = {"height" : 224, "width": 224},
return_tensors="pt"
).to(device)
with torch.no_grad():
# Get the vision embeddings from the model's vision tower
vision_outputs = model.forward(pixel_values=inputs['pixel_values'].to(dtype=model.dtype)) # , grid_thw=inputs['image_grid_thw'])
# We'll use the pooled output as the embedding
embeddings = vision_outputs[0][:,0,:]
return embeddings.to(torch.float16).cpu().numpy()
# --- Process all images in the directory ---
image_files = [os.path.join(IMAGE_DIR, f) for f in os.listdir(IMAGE_DIR) if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
all_embeddings = []
filepaths = []
with open("embeddings_factures_ostepoathie_1k.json", "w") as f:
f.write("[\n")
first = True
for i in tqdm(range(0, len(image_files), BATCH_SIZE)):
batch_paths = image_files[i:i+BATCH_SIZE]
batch_embeddings = get_image_embeddings(batch_paths)
embeddings_list = [emb.tolist() for emb in batch_embeddings]
for path, emb in zip(batch_paths, embeddings_list):
if not first:
f.write(",\n")
json.dump({"filepath": path, "embedding": emb}, f)
first = False
f.write("\n]\n")
print("Embeddings extracted and saved.")

201
extract/extract_donut.py Normal file
View File

@@ -0,0 +1,201 @@
import torch
from transformers import DonutProcessor, VisionEncoderDecoderModel
from PIL import Image
import os
import numpy as np
import json
from tqdm import tqdm
# --- Configuration ---
MODEL_NAME = "naver-clova-ix/donut-base-finetuned-docvqa" # Donut model for document VQA
IMAGE_DIR = "/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/"
BATCH_SIZE = 4 # Smaller batch size for Donut as it's memory intensive
# --- End Configuration ---
# Check for GPU availability
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
# Load the Donut model and processor
print("Loading Donut model and processor...")
processor = DonutProcessor.from_pretrained(MODEL_NAME)
model = VisionEncoderDecoderModel.from_pretrained(MODEL_NAME)
model.to(device)
model.eval()
# Set model to half precision for efficiency if using GPU
if device == "cuda":
model = model.half()
def get_document_embeddings(image_paths):
"""
Processes a batch of document images and extracts their embeddings using Donut.
Uses the encoder part of the VisionEncoderDecoder model to get visual representations.
"""
images_pil = []
valid_paths = []
for path in image_paths:
if path.lower().endswith(('.png', '.jpg', '.jpeg')):
try:
# Load and convert image to RGB
image = Image.open(path).convert("RGB")
images_pil.append(image)
valid_paths.append(path)
except Exception as e:
print(f"Warning: Could not load image {path}. Skipping. Error: {e}")
if not images_pil:
return np.array([]), []
embeddings_list = []
# Process images one by one to avoid memory issues
for image in images_pil:
try:
# Preprocess the image
pixel_values = processor(image, return_tensors="pt").pixel_values
pixel_values = pixel_values.to(device)
if device == "cuda":
pixel_values = pixel_values.half()
with torch.no_grad():
# Get encoder outputs (visual features)
encoder_outputs = model.encoder(pixel_values=pixel_values)
# Use the last hidden state and apply global average pooling
# to get a fixed-size representation
last_hidden_state = encoder_outputs.last_hidden_state # [batch_size, seq_len, hidden_size]
# Global average pooling across the sequence dimension
embedding = torch.mean(last_hidden_state, dim=1) # [batch_size, hidden_size]
embeddings_list.append(embedding.squeeze().cpu().float().numpy())
except Exception as e:
print(f"Warning: Could not process image. Error: {e}")
# Add zero embedding for failed images to maintain consistency
embeddings_list.append(np.zeros(model.config.encoder.hidden_size))
return np.array(embeddings_list), valid_paths
def extract_document_info(image_path, question="What information is in this document?"):
"""
Extract specific information from a document using Donut's text generation capability.
This function demonstrates how to use Donut for document understanding tasks.
"""
try:
image = Image.open(image_path).convert("RGB")
# Prepare the task prompt for document VQA
task_prompt = f"<s_docvqa><s_question>{question}</s_question><s_answer>"
# Process the image and prompt
inputs = processor(image, task_prompt, return_tensors="pt")
inputs = {k: v.to(device) for k, v in inputs.items()}
if device == "cuda":
inputs["pixel_values"] = inputs["pixel_values"].half()
with torch.no_grad():
# Generate answer
generated_ids = model.generate(
**inputs,
max_length=512,
early_stopping=True,
pad_token_id=processor.tokenizer.pad_token_id,
eos_token_id=processor.tokenizer.eos_token_id,
use_cache=True,
num_beams=1,
bad_words_ids=[[processor.tokenizer.unk_token_id]],
return_dict_in_generate=True,
)
# Decode the generated answer
decoded_text = processor.batch_decode(generated_ids.sequences)[0]
# Extract the answer part
answer = decoded_text.split("<s_answer>")[-1].replace("</s_answer>", "").strip()
return answer
except Exception as e:
print(f"Error extracting info from {image_path}: {e}")
return ""
# --- Process all images in the directory ---
print("Scanning for image files...")
image_files = [os.path.join(IMAGE_DIR, f) for f in os.listdir(IMAGE_DIR)
if f.lower().endswith(('.png', '.jpg', '.jpeg'))]
print(f"Found {len(image_files)} image files")
all_embeddings = []
filepaths = []
# Extract embeddings and save to JSON
print("Extracting embeddings using Donut...")
with open("embeddings_factures_donut.json", "w") as f:
f.write("[\n")
first = True
for i in tqdm(range(0, len(image_files), BATCH_SIZE), desc="Processing batches"):
batch_paths = image_files[i:i+BATCH_SIZE]
batch_embeddings, valid_paths = get_document_embeddings(batch_paths)
if len(batch_embeddings) > 0:
embeddings_list = [emb.tolist() for emb in batch_embeddings]
for path, emb in zip(valid_paths, embeddings_list):
if not first:
f.write(",\n")
entry = {
"filepath": path,
"embedding": emb,
"model": "donut-base-finetuned-docvqa",
"embedding_size": len(emb)
}
json.dump(entry, f)
first = False
f.write("\n]\n")
print("Embeddings extracted and saved to 'embeddings_factures_donut.json'")
# Optional: Extract some sample document information
print("\nExtracting sample document information...")
sample_images = image_files[:3] # Process first 3 images as samples
sample_info = []
for img_path in sample_images:
print(f"Processing: {os.path.basename(img_path)}")
# Extract different types of information
questions = [
"What is the total amount?",
"What is the invoice number?",
"What is the date?",
"Who is the vendor?",
"What are the main items?"
]
info = {"filepath": img_path, "extracted_info": {}}
for question in questions:
answer = extract_document_info(img_path, question)
info["extracted_info"][question] = answer
print(f" {question}: {answer}")
sample_info.append(info)
# Save sample extraction results
with open("donut_sample_extractions.json", "w") as f:
json.dump(sample_info, f, indent=2, ensure_ascii=False)
print("Sample document information extracted and saved to 'donut_sample_extractions.json'")
print("Processing completed!")

139
extract/test.ipynb Normal file
View File

@@ -0,0 +1,139 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 10,
"id": "a314a8ac",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([[0. , 0. ],\n",
" [0.26726124, 0.56694671]])"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from sklearn.metrics.pairwise import cosine_similarity\n",
"import numpy as np\n",
"X = [[0, 0, 0], [1, 2, 3]]\n",
"Y = [[1, 0, 0], [1, 1, 0]]\n",
"cosine_similarity(X, Y)"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "4b560c4f",
"metadata": {},
"outputs": [],
"source": [
"sims = cosine_similarity(X)"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d8d5d17a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 34,
"id": "a1098a5a",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(3, 3)\n",
"(array([0, 0, 1]), array([1, 2, 2]))\n",
"(3,)\n",
"mean sim: -0.3333333333333334 std: 0.47140452079103173\n"
]
}
],
"source": [
"# X = np.array([\n",
"# [0, 0, 0], \n",
"# [-1, 100, -1000],\n",
"# [-1, -2, -4]\n",
"# ]\n",
"# )\n",
"\n",
"X = np.array([\n",
" [0, 0, 0], \n",
" [1,1,1],\n",
" [-1, -1, -1]\n",
" ]\n",
" )\n",
"print(X.shape)\n",
"sims = cosine_similarity(X)\n",
"\n",
"triu_idxs = np.triu_indices_from(sims, k=1)\n",
"print(triu_idxs)\n",
"dist_vals = sims[triu_idxs]\n",
"print(dist_vals.shape)\n",
"print(\"mean sim:\", dist_vals.mean(), \"std:\", dist_vals.std())"
]
},
{
"cell_type": "code",
"execution_count": 27,
"id": "2dacad18",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([0.])"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dist_vals"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "76d25e07",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "venv",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

560
filter/analyze_labels.py Normal file
View File

@@ -0,0 +1,560 @@
#!/usr/bin/env python3
"""
Analyze 'label' fields in a JSON dataset and produce summaries.
- Handles entries where 'label' is either an object or a list of objects.
- Computes distributions (is_bill, profession, currency, IDs presence, handwriting/rotation).
- Computes numeric stats (total_billed, amount_paid, remaining_payment, coverages).
- Parses dates and shows temporal distribution.
- Analyzes items: count, sum of amounts and coverages, and mismatches vs total_billed.
- Emits a concise stdout summary and writes CSVs and a Markdown report.
Usage:
python analyze_labels.py --input 008_label_data_sample_seed_1997.json --out-dir .
"""
from __future__ import annotations
import argparse
import csv
import json
import math
import re
from collections import Counter
from datetime import datetime
from pathlib import Path
from statistics import mean, median
from typing import Any, Dict, Iterable, List, Optional, Tuple
NUMERIC_FIELDS = [
"total_billed",
"amount_paid",
"remaining_payment",
"client_part",
"mandatory_coverage",
"complementary_coverage",
]
def parse_args() -> argparse.Namespace:
p = argparse.ArgumentParser(description="Analyze 'label' fields in JSON dataset")
p.add_argument("--input", required=True, help="Path to JSON file (list of records)")
p.add_argument(
"--out-dir", default=None, help="Output directory for reports (default: alongside input)"
)
p.add_argument(
"--max-professions", type=int, default=50, help="Max professions to list in report"
)
p.add_argument(
"--no-plots",
action="store_true",
help="Disable generating plots (PNG) and embedding into report",
)
p.add_argument(
"--plot-top-k",
type=int,
default=20,
help="Top-K categories to visualize for profession/currency",
)
p.add_argument(
"--plot-format",
type=str,
default="png",
choices=["png", "jpg", "jpeg"],
help="Image format for plots",
)
return p.parse_args()
def load_json(path: Path) -> List[Dict[str, Any]]:
with path.open("r", encoding="utf-8") as f:
data = json.load(f)
if not isinstance(data, list):
raise ValueError("Top-level JSON must be a list of records")
return data
def to_bool(value: Any) -> Optional[bool]:
if value is None:
return None
if isinstance(value, bool):
return value
if isinstance(value, (int, float)):
return bool(value)
if isinstance(value, str):
v = value.strip().lower()
if v in {"true", "t", "1", "yes", "y"}:
return True
if v in {"false", "f", "0", "no", "n"}:
return False
return None
def to_float(value: Any) -> Optional[float]:
if value is None or value == "":
return None
try:
return float(value)
except (TypeError, ValueError):
return None
def parse_date(value: Any) -> Optional[datetime]:
if not value or not isinstance(value, str):
return None
s = value.strip()
if not s:
return None
# Common patterns (day-first)
fmts = [
"%d-%m-%Y",
"%d/%m/%Y",
"%Y-%m-%d",
"%d-%m-%y",
"%d/%m/%y",
]
for fmt in fmts:
try:
return datetime.strptime(s, fmt)
except ValueError:
pass
# Try to extract a date-like token using regex (e.g., 2025-02-07 or 07-02-2025)
m = re.search(r"(\d{2}[/-]\d{2}[/-]\d{4}|\d{4}-\d{2}-\d{2})", s)
if m:
token = m.group(1)
for fmt in fmts:
try:
return datetime.strptime(token, fmt)
except ValueError:
continue
return None
def safe_get(d: Dict[str, Any], key: str, default=None):
return d.get(key, default)
def flatten_labels(records: List[Dict[str, Any]]) -> List[Dict[str, Any]]:
out: List[Dict[str, Any]] = []
for rec in records:
src_image = rec.get("image") or ",".join(rec.get("image_files", []) or [])
label = rec.get("label")
if label is None:
continue
if isinstance(label, list):
for idx, lab in enumerate(label):
if not isinstance(lab, dict):
continue
o = dict(lab)
o["__source_image__"] = src_image
o["__multi_index__"] = idx
out.append(o)
elif isinstance(label, dict):
o = dict(label)
o["__source_image__"] = src_image
out.append(o)
return out
def presence_counts(labels: List[Dict[str, Any]], fields: Iterable[str]) -> Dict[str, int]:
counts: Dict[str, int] = {}
for field in fields:
present = 0
for lbl in labels:
if safe_get(lbl, field) not in (None, ""):
present += 1
counts[field] = present
return counts
def numeric_summary(values: List[Optional[float]]) -> Dict[str, Any]:
clean = [v for v in values if isinstance(v, (int, float)) and not math.isnan(v)]
if not clean:
return {"count": 0}
return {
"count": len(clean),
"min": min(clean),
"p25": percentile(clean, 25),
"median": median(clean),
"p75": percentile(clean, 75),
"max": max(clean),
"mean": mean(clean),
"sum": sum(clean),
"missing": len(values) - len(clean),
}
def percentile(arr: List[float], p: float) -> float:
if not arr:
return float("nan")
a = sorted(arr)
k = (len(a) - 1) * (p / 100.0)
f = math.floor(k)
c = math.ceil(k)
if f == c:
return a[int(k)]
d0 = a[f] * (c - k)
d1 = a[c] * (k - f)
return d0 + d1
def write_csv(path: Path, headers: List[str], rows: Iterable[Iterable[Any]]) -> None:
path.parent.mkdir(parents=True, exist_ok=True)
with path.open("w", encoding="utf-8", newline="") as f:
w = csv.writer(f)
w.writerow(headers)
for row in rows:
w.writerow(row)
def try_import_matplotlib():
try:
import matplotlib # type: ignore[import-not-found]
matplotlib.use("Agg") # headless backend
import matplotlib.pyplot as plt # type: ignore[import-not-found]
return plt
except Exception:
return None
def save_bar_plot(plt, x_labels: List[str], values: List[float], title: str, out_path: Path, rotation: int = 0):
out_path.parent.mkdir(parents=True, exist_ok=True)
fig, ax = plt.subplots(figsize=(max(6, min(14, 0.4 * len(x_labels) + 3)), 4))
ax.bar(range(len(values)), values, color="#4C78A8")
ax.set_title(title)
ax.set_ylabel("count")
ax.set_xticks(range(len(x_labels)))
ax.set_xticklabels(x_labels, rotation=rotation, ha="right" if rotation else "center")
fig.tight_layout()
fig.savefig(out_path, dpi=150)
plt.close(fig)
def save_hist_plot(plt, values: List[float], title: str, out_path: Path, bins: int = 30):
out_path.parent.mkdir(parents=True, exist_ok=True)
fig, ax = plt.subplots(figsize=(7, 4))
ax.hist(values, bins=bins, color="#72B7B2", edgecolor="white")
ax.set_title(title)
ax.set_ylabel("count")
ax.set_xlabel("value")
fig.tight_layout()
fig.savefig(out_path, dpi=150)
plt.close(fig)
def produce_plots(
out_dir: Path,
args: argparse.Namespace,
is_bill_counter: Counter,
bill_paid_counter: Counter,
handwriting_counter: Counter,
rotation_counter: Counter,
profession_counter: Counter,
currency_counter: Counter,
year_month_counter: Counter,
numeric_data: Dict[str, List[Optional[float]]],
items_per_label: List[int],
) -> List[Path]:
"""Generate plots and return list of created file paths."""
if args.no_plots:
return []
plt = try_import_matplotlib()
if plt is None:
# matplotlib not available; skip plotting gracefully
return []
created: List[Path] = []
plots_dir = out_dir / "plots"
ext = args.plot_format
# is_bill
if is_bill_counter:
labels = [str(k) for k, _ in is_bill_counter.items()]
vals = [v for _, v in is_bill_counter.items()]
p = plots_dir / f"is_bill.{ext}"
save_bar_plot(plt, labels, vals, "is_bill distribution", p)
created.append(p)
# bill_paid
if bill_paid_counter:
labels = [str(k) for k, _ in bill_paid_counter.items()]
vals = [v for _, v in bill_paid_counter.items()]
p = plots_dir / f"bill_paid.{ext}"
save_bar_plot(plt, labels, vals, "bill_paid distribution", p)
created.append(p)
# Flags
if handwriting_counter:
labels = [str(k) for k, _ in handwriting_counter.items()]
vals = [v for _, v in handwriting_counter.items()]
p = plots_dir / f"is_handwriting.{ext}"
save_bar_plot(plt, labels, vals, "is_handwriting", p)
created.append(p)
if rotation_counter:
labels = [str(k) for k, _ in rotation_counter.items()]
vals = [v for _, v in rotation_counter.items()]
p = plots_dir / f"is_rotated.{ext}"
save_bar_plot(plt, labels, vals, "is_rotated", p)
created.append(p)
# Professions (top-K)
if profession_counter:
top = profession_counter.most_common(max(1, min(args.plot_top_k, len(profession_counter))))
labels = [k if len(str(k)) <= 20 else str(k)[:17] + "" for k, _ in top]
vals = [v for _, v in top]
p = plots_dir / f"professions_top{len(labels)}.{ext}"
save_bar_plot(plt, labels, vals, f"Top {len(labels)} professions", p, rotation=45)
created.append(p)
# Currency
if currency_counter:
top = currency_counter.most_common(max(1, min(args.plot_top_k, len(currency_counter))))
labels = [str(k) for k, _ in top]
vals = [v for _, v in top]
p = plots_dir / f"currency.{ext}"
save_bar_plot(plt, labels, vals, "Currency distribution", p)
created.append(p)
# Year-month
if year_month_counter:
items = sorted(year_month_counter.items(), key=lambda x: (x[0][0], x[0][1]))
labels = [f"{y:04d}-{m:02d}" for (y, m), _ in items]
vals = [v for _, v in items]
p = plots_dir / f"invoice_year_month.{ext}"
save_bar_plot(plt, labels, vals, "Invoices by year-month", p, rotation=45)
created.append(p)
# Items per label
if items_per_label:
p = plots_dir / f"items_per_label.{ext}"
save_hist_plot(plt, items_per_label, "Items per label (histogram)", p, bins=min(30, max(5, int(len(items_per_label) ** 0.5))))
created.append(p)
# Numeric fields histograms
for k, vals_all in numeric_data.items():
vals = [float(v) for v in vals_all if isinstance(v, (int, float)) and not math.isnan(v)]
if not vals:
continue
p = plots_dir / f"hist_{k}.{ext}"
save_hist_plot(plt, vals, f"{k} (histogram)", p)
created.append(p)
return created
def main() -> None:
args = parse_args()
in_path = Path(args.input).resolve()
out_dir = Path(args.out_dir).resolve() if args.out_dir else in_path.parent
out_dir.mkdir(parents=True, exist_ok=True)
records = load_json(in_path)
labels = flatten_labels(records)
n_total_rec = len(records)
n_labels = len(labels)
# Normalize some fields
for lbl in labels:
lbl["is_bill"] = to_bool(lbl.get("is_bill"))
lbl["bill_paid"] = to_bool(lbl.get("bill_paid"))
# Normalize numeric fields in-place for ease of stats
for k in NUMERIC_FIELDS:
lbl[k] = to_float(lbl.get(k))
# Basic distributions
is_bill_counter = Counter(lbl.get("is_bill") for lbl in labels)
bill_paid_counter = Counter(lbl.get("bill_paid") for lbl in labels)
currency_counter = Counter(lbl.get("currency") for lbl in labels if lbl.get("currency"))
profession_counter = Counter((lbl.get("profession") or "").strip() or "(missing)" for lbl in labels)
# Presence of identifiers and key fields
id_presence = presence_counts(labels, [
"adeli_number",
"rpps_number",
"finess_number",
"prescripteur_finess_number",
"doctor_name",
"invoice_issuer",
"insured_name",
"beneficiary_name",
"security_number",
"currency",
])
# Handwriting/rotation flags
handwriting_counter = Counter(to_bool(lbl.get("is_handwriting")) for lbl in labels)
rotation_counter = Counter(to_bool(lbl.get("is_rotated")) for lbl in labels)
# Numeric stats
numeric_stats: Dict[str, Dict[str, Any]] = {}
for k in NUMERIC_FIELDS:
numeric_stats[k] = numeric_summary([lbl.get(k) for lbl in labels])
# Keep raw numeric data for histograms
numeric_raw: Dict[str, List[Optional[float]]] = {k: [lbl.get(k) for lbl in labels] for k in NUMERIC_FIELDS}
# Dates
invoice_dates = [parse_date(lbl.get("invoice_date")) for lbl in labels]
invoice_dates_clean = [d for d in invoice_dates if d is not None]
year_month_counter = Counter((d.year, d.month) for d in invoice_dates_clean)
# Items analysis
items_per_label: List[int] = []
sum_item_amount: List[Optional[float]] = []
sum_item_mandatory: List[Optional[float]] = []
mismatch_records: List[Tuple[str, Optional[float], Optional[float], Optional[float]]] = []
for lbl in labels:
items = lbl.get("items") or []
if not isinstance(items, list):
items = []
items_per_label.append(len(items))
s_amount = None
s_mand = None
for it in items:
if not isinstance(it, dict):
continue
a = to_float(it.get("amount"))
m = to_float(it.get("mandatory_coverage"))
s_amount = (s_amount or 0.0) + (a or 0.0)
s_mand = (s_mand or 0.0) + (m or 0.0)
sum_item_amount.append(s_amount)
sum_item_mandatory.append(s_mand)
total_billed = to_float(lbl.get("total_billed"))
if total_billed is not None and s_amount is not None:
diff = total_billed - s_amount
if abs(diff) > 1e-6:
mismatch_records.append((
str(lbl.get("__source_image__")), total_billed, s_amount, diff
))
# Data quality issues
issues: List[Dict[str, Any]] = []
for lbl in labels:
src = str(lbl.get("__source_image__"))
# is_bill must be True/False or None (unknown); flag strings that could not be parsed
if "is_bill" in lbl and not isinstance(lbl.get("is_bill"), (bool, type(None))):
issues.append({"source": src, "issue": "is_bill not boolean"})
# bill_paid True but amount_paid missing
if lbl.get("bill_paid") is True and to_float(lbl.get("amount_paid")) is None:
issues.append({"source": src, "issue": "bill_paid True but amount_paid missing"})
# remaining_payment > 0 but bill_paid True
rp = to_float(lbl.get("remaining_payment"))
if lbl.get("bill_paid") is True and (rp or 0) > 0:
issues.append({"source": src, "issue": "bill_paid True but remaining_payment > 0"})
# Negative or zero amounts on items
items = lbl.get("items") or []
if isinstance(items, list):
for idx, it in enumerate(items):
if not isinstance(it, dict):
continue
a = to_float(it.get("amount"))
if a is not None and a < 0:
issues.append({"source": src, "issue": f"item[{idx}].amount negative: {a}"})
q = to_float(it.get("quantity"))
if q is None:
# Not strictly an issue, but mark for completeness
issues.append({"source": src, "issue": f"item[{idx}].quantity missing"})
# Missing currency on bill
if lbl.get("is_bill") is True and not lbl.get("currency"):
issues.append({"source": src, "issue": "currency missing for bill"})
# Outputs
# 1) CSVs
write_csv(out_dir / "professions_counts.csv", ["profession", "count"], profession_counter.most_common())
write_csv(out_dir / "currency_counts.csv", ["currency", "count"], currency_counter.most_common())
write_csv(out_dir / "is_bill_counts.csv", ["is_bill", "count"], is_bill_counter.items())
write_csv(out_dir / "bill_paid_counts.csv", ["bill_paid", "count"], bill_paid_counter.items())
write_csv(out_dir / "id_presence.csv", ["field", "present_count"], id_presence.items())
write_csv(out_dir / "item_total_billed_mismatches.csv", ["source_image", "total_billed", "sum_item_amount", "diff"], mismatch_records)
write_csv(out_dir / "issues.csv", ["source", "issue"], ((i["source"], i["issue"]) for i in issues))
# 2) Markdown report
md = []
md.append("# Label Analysis Report\n")
md.append(f"Input: `{in_path.name}`\n")
md.append("")
md.append("## Overview\n")
md.append(f"- Total records: {n_total_rec}")
md.append(f"- Total labels (flattened): {n_labels}")
md.append(f"- is_bill distribution: {dict(is_bill_counter)}")
md.append(f"- bill_paid distribution: {dict(bill_paid_counter)}")
if invoice_dates_clean:
md.append(
f"- Invoice dates span: {min(invoice_dates_clean).date()} .. {max(invoice_dates_clean).date()}"
)
md.append(f"- Unique year-month pairs: {len(year_month_counter)}")
else:
md.append("- Invoice dates: none parseable")
md.append("\n## Professions (top)\n")
for prof, cnt in profession_counter.most_common(args.max_professions):
md.append(f"- {prof}: {cnt}")
md.append("\n## Currency distribution\n")
for cur, cnt in currency_counter.most_common():
md.append(f"- {cur}: {cnt}")
md.append("\n## Identifier and key field presence\n")
for k, v in id_presence.items():
md.append(f"- {k}: {v} present")
md.append("\n## Flags\n")
md.append(f"- is_handwriting: {dict(handwriting_counter)}")
md.append(f"- is_rotated: {dict(rotation_counter)}")
md.append("\n## Numeric summaries\n")
for k, stats in numeric_stats.items():
md.append(f"- {k}: {stats}")
if items_per_label:
md.append("\n## Items analysis\n")
md.append(f"- Items per label: count={len(items_per_label)}, min={min(items_per_label)}, max={max(items_per_label)}, mean={mean(items_per_label):.2f}")
n_mismatch = len(mismatch_records)
md.append(f"- total_billed vs sum(items.amount) mismatches: {n_mismatch}")
if issues:
md.append("\n## Data quality issues (sample)\n")
for row in issues[:50]:
md.append(f"- {row['source']}: {row['issue']}")
# 3) Plots (if enabled)
created_plots = produce_plots(
out_dir=out_dir,
args=args,
is_bill_counter=is_bill_counter,
bill_paid_counter=bill_paid_counter,
handwriting_counter=handwriting_counter,
rotation_counter=rotation_counter,
profession_counter=profession_counter,
currency_counter=currency_counter,
year_month_counter=year_month_counter,
numeric_data=numeric_raw,
items_per_label=items_per_label,
)
if created_plots:
md.append("\n## Plots\n")
for p in created_plots:
rel = p.relative_to(out_dir)
md.append(f"- {p.stem}")
md.append(f"![]({rel.as_posix()})\n")
elif not args.no_plots:
md.append("\n## Plots\n")
md.append("- matplotlib not available or no data to plot.")
report_path = out_dir / "label_analysis_report.md"
report_path.write_text("\n".join(md), encoding="utf-8")
# Console summary
print("Label analysis complete.")
print(f"- Records: {n_total_rec}, Labels: {n_labels}")
print(f"- is_bill: {dict(is_bill_counter)} | bill_paid: {dict(bill_paid_counter)}")
print(f"- Professions (top 10): {profession_counter.most_common(10)}")
print(f"- Currency: {dict(currency_counter)}")
print(f"Report written to: {report_path}")
if created_plots:
print(f"- Plots saved under: {(out_dir / 'plots').as_posix()} ({len(created_plots)} files)")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,4 @@
bill_paid,count
False,68
True,1015
,180
1 bill_paid count
2 False 68
3 True 1015
4 180

View File

@@ -0,0 +1,3 @@
currency,count
EUR,1135
F,2
1 currency count
2 EUR 1135
3 F 2

View File

@@ -0,0 +1,11 @@
field,present_count
adeli_number,898
rpps_number,182
finess_number,139
prescripteur_finess_number,41
doctor_name,1040
invoice_issuer,1120
insured_name,208
beneficiary_name,1093
security_number,492
currency,1137
1 field present_count
2 adeli_number 898
3 rpps_number 182
4 finess_number 139
5 prescripteur_finess_number 41
6 doctor_name 1040
7 invoice_issuer 1120
8 insured_name 208
9 beneficiary_name 1093
10 security_number 492
11 currency 1137

View File

@@ -0,0 +1,3 @@
is_bill,count
True,1124
False,139
1 is_bill count
2 True 1124
3 False 139

279
filter/check_008/issues.csv Normal file
View File

@@ -0,0 +1,279 @@
source,issue
Facture médecine douce-27776417,item[0].quantity missing
551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024,item[0].quantity missing
551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024,item[1].quantity missing
551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024,item[2].quantity missing
551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024,item[3].quantity missing
551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024,item[4].quantity missing
551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024,item[5].quantity missing
Facture médecine douce-27640153,item[0].quantity missing
Facture médecine douce-27640153,item[1].quantity missing
1724397366229Facture_RIVALLANDChrystelle_20240413ICQO,bill_paid True but amount_paid missing
Facture ostéopathe-25417095,bill_paid True but remaining_payment > 0
Facture ostéopathe-GED N-R-2323015,item[0].quantity missing
74441931-f8d0-4cb0-8eb5-df4147a72bbe_mgp,item[0].quantity missing
74441931-f8d0-4cb0-8eb5-df4147a72bbe_mgp,currency missing for bill
74441931-f8d0-4cb0-8eb5-df4147a72bbe_mgp,currency missing for bill
1724658126141JACOB_Francois_2024-111,item[0].quantity missing
Facture des lentilles de contact-27559701,item[5].amount negative: -6.67
91c41171-ac88-4ce4-8f15-48c4ad2c5aa2_Devis_lunette,item[0].quantity missing
0252b5fb-bc84-4fca-bfa7-483ac611885a_Scan_0115,item[0].quantity missing
0252b5fb-bc84-4fca-bfa7-483ac611885a_Scan_0115,item[1].quantity missing
7f084bf4-357d-452d-bb4c-76bff332ea0c_IMG_7791,item[0].quantity missing
7f084bf4-357d-452d-bb4c-76bff332ea0c_IMG_7791,item[1].quantity missing
70c3e7f5-405b-49d8-a73c-dae83beafb59_Facture_ophtalmo,item[0].quantity missing
70c3e7f5-405b-49d8-a73c-dae83beafb59_Facture_ophtalmo,item[1].quantity missing
70c3e7f5-405b-49d8-a73c-dae83beafb59_Facture_ophtalmo,item[2].quantity missing
Facture médecine douce-27776613,item[0].quantity missing
6eeacb70-27be-40dd-9511-522025b459a3_20241226_080214,item[0].quantity missing
6eeacb70-27be-40dd-9511-522025b459a3_20241226_080214,item[1].quantity missing
1724399050339FACTURE_132120424_MME_RONDIN_MAHEVA,item[0].quantity missing
ec117b21-9348-4a76-9a22-2bae87639392_facture_du_14.02_Tony,item[0].quantity missing
ec117b21-9348-4a76-9a22-2bae87639392_facture_du_14.02_Tony,item[1].quantity missing
Facture ostéopathe-25979892,item[0].quantity missing
bf09a877-0705-4c79-b908-6e8da44e44c7_17490641879424403676372514736136,item[0].quantity missing
bf09a877-0705-4c79-b908-6e8da44e44c7_17490641879424403676372514736136,item[1].quantity missing
d4f310c8-3bcf-4bb5-bf53-27ad74d66c1e_FACTURE_OPTICIEN_OPHTALMO,item[0].quantity missing
DDE prestation Facture des lentilles de contact-26740934,item[0].quantity missing
45ccffcd-5fe8-418b-80c2-894086de9087_facture_ophtalmo_Fred_,item[0].quantity missing
45ccffcd-5fe8-418b-80c2-894086de9087_facture_ophtalmo_Fred_,item[1].quantity missing
Facture ostéopathe-26626551,item[0].quantity missing
27ddc814-9239-4944-951a-fdd8a48c5926_IMG_20240922_0001,item[0].quantity missing
27ddc814-9239-4944-951a-fdd8a48c5926_IMG_20240922_0001,item[1].quantity missing
27ddc814-9239-4944-951a-fdd8a48c5926_IMG_20240922_0001,item[2].quantity missing
27ddc814-9239-4944-951a-fdd8a48c5926_IMG_20240922_0001,item[3].quantity missing
8eceb56a-b571-4ab6-a64e-668db6bf4ad5_chiropracteur_fevrier_25,item[0].quantity missing
Facture des lentilles de contact-27576495,item[2].quantity missing
32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337,item[0].quantity missing
32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337,item[1].quantity missing
32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337,item[2].quantity missing
a5bd8b97-a2a8-49ae-900d-0673d2f96637_quittance_optique_,item[0].quantity missing
e997eddb-05a4-49f9-a7fd-82b48c2694b3_IMG_OPHTALMOLOGIE_20250117_0001,item[0].quantity missing
Facture ostéopathe-26300731,item[0].quantity missing
Facture ostéopathe-26300731,item[1].quantity missing
Facture ostéopathe-26300731,item[2].quantity missing
64fa4c19-efa4-48f1-acf8-8c2e8f573b8b_Facture_osteopathe_,item[0].quantity missing
c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo,item[0].quantity missing
c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo,item[1].quantity missing
c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo,item[0].quantity missing
1724658376419facture_ostheo_avril_24,item[0].quantity missing
66de7232-78a6-46a0-8392-47ec4630ad31_image,item[0].quantity missing
66de7232-78a6-46a0-8392-47ec4630ad31_image,item[1].quantity missing
66de7232-78a6-46a0-8392-47ec4630ad31_image,item[2].quantity missing
DDE prestation Facture des lentilles de contact-26772554,item[0].quantity missing
DDE prestation Facture des lentilles de contact-26772554,item[1].quantity missing
DDE prestation Facture des lentilles de contact-26705902,item[3].amount negative: -3.0
c2009b55-58d4-411f-a25a-3dc1e32e4a89_facture_lentille,item[3].amount negative: -3.0
Facture ostéopathe-26112640,item[0].quantity missing
915b97eb-e7d5-415f-ae89-823f08ae3bc0_ACfacture_opthalmo,item[0].quantity missing
915b97eb-e7d5-415f-ae89-823f08ae3bc0_ACfacture_opthalmo,item[1].quantity missing
fa25423e-b79c-48f9-89ae-32ffd95e3101_Facture_Ophtalmologie,item[0].quantity missing
Facture ostéopathe-25073562,item[0].quantity missing
14a6fe3e-c49c-4544-b960-43def15eaf83_doc00563220250401124926,item[0].quantity missing
1724399050374facture_podologue,item[0].quantity missing
Facture des lentilles de contact-27680116,item[0].quantity missing
Facture des lentilles de contact-27680116,item[1].quantity missing
Facture des lentilles de contact-27551187,item[0].quantity missing
46ddeec5-97bf-4175-b214-b7bd74c4fa24_Renouvin_Ferlicot_Andre,item[0].quantity missing
46ddeec5-97bf-4175-b214-b7bd74c4fa24_Renouvin_Ferlicot_Andre,item[1].quantity missing
cbc7ddff-2ed1-441b-85cb-5c1079588a52_Mes_des_factures,item[0].quantity missing
cbc7ddff-2ed1-441b-85cb-5c1079588a52_Mes_des_factures,item[1].quantity missing
1724658298461facture_podologue_alicia,item[0].quantity missing
1724397366268IMG_20240417_143900,item[0].quantity missing
b410345a-31de-4397-a896-bc11d97e524c_FACTURE_OPHTALMOLOGISTE,item[0].quantity missing
Facture ostéopathe-26972680,item[0].quantity missing
Facture ostéopathe-25936311,item[0].quantity missing
b96f6ca3-3175-4cd8-a827-e886b5799867_Facture_ophtalmologue_24022025,item[0].quantity missing
9494858a-65e3-425d-a452-17d4300f0115_demande_de_remboursement_mgp_mutuelle_001,bill_paid True but amount_paid missing
895777f3-4478-4e8d-a616-d498850d1523_factures-lunettes_Tess,bill_paid True but remaining_payment > 0
Facture ostéopathe-25152747,bill_paid True but amount_paid missing
Facture ostéopathe-25332784,item[0].quantity missing
Facture ostéopathe-1216961,bill_paid True but amount_paid missing
Facture ostéopathe-1216961,item[0].quantity missing
6f544990-74c6-4c7e-8a25-08eb3f9138c8_RemboursementSoins01082024_1,item[0].quantity missing
6f544990-74c6-4c7e-8a25-08eb3f9138c8_RemboursementSoins01082024_1,item[1].quantity missing
9901adc0-96d4-4cfd-a028-8286fd1f7841_Fre_Ophtalmo,item[0].quantity missing
9901adc0-96d4-4cfd-a028-8286fd1f7841_Fre_Ophtalmo,item[1].quantity missing
141f7ccc-37ab-4da5-9788-80a27e819b8b_Feuilles_de_soins_Psychologue_x2,bill_paid True but remaining_payment > 0
598221c3-d0b3-403a-a261-c68cbe56b453_facture_ophtalmo,item[0].quantity missing
598221c3-d0b3-403a-a261-c68cbe56b453_facture_ophtalmo,item[1].quantity missing
70e53730-c41c-402b-a227-2262cdd9fa7b_osteo_Laurette,item[0].quantity missing
39384a4b-4117-4f56-8cca-55fd51e5b062_image,item[0].quantity missing
39384a4b-4117-4f56-8cca-55fd51e5b062_image,item[1].quantity missing
39384a4b-4117-4f56-8cca-55fd51e5b062_image,item[2].quantity missing
d9e1cb2a-b44b-4cc8-b07a-030ddf3acd28_Ordonnance_lentilles_Mary,bill_paid True but amount_paid missing
Facture ostéopathe-25417096,item[0].quantity missing
Facture ostéopathe-26585743,item[0].quantity missing
05c2ef83-54cb-419e-949f-74f898b459bd_osteo_13112024,item[0].quantity missing
Facture ostéopathe-26077624,item[0].quantity missing
e405dba0-fd0b-48a0-9e3f-576398fa3318_Scan2025-02-28_143544_1,item[0].quantity missing
Facture ostéopathe-25847017,bill_paid True but remaining_payment > 0
8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes,item[0].quantity missing
8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes,item[1].quantity missing
8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes,item[2].quantity missing
8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes,item[3].quantity missing
8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes,item[4].quantity missing
47399c82-030f-4904-b8f4-2c3dc7c46723_image,item[0].quantity missing
47399c82-030f-4904-b8f4-2c3dc7c46723_image,item[1].quantity missing
Facture des lentilles de contact-27708923,item[0].quantity missing
Facture-quittance toutes spécialités-27235184,item[0].quantity missing
Facture-quittance toutes spécialités-27235184,item[1].quantity missing
Facture-quittance toutes spécialités-27235184,item[2].quantity missing
Facture-quittance toutes spécialités-27235184,item[3].quantity missing
1724399050377psy_rachel_4,item[0].quantity missing
e0f0eab0-f98c-4b74-95f2-bfc61ad76d28_Ophtalmo_Isa_01-24,item[0].quantity missing
e0f0eab0-f98c-4b74-95f2-bfc61ad76d28_Ophtalmo_Isa_01-24,item[1].quantity missing
172439712563420240417_105010,item[0].quantity missing
172439712563420240417_105010,item[1].quantity missing
Facture ostéopathe-26750142,item[0].quantity missing
DDE prestation Facture des lentilles de contact-26723200,item[0].quantity missing
DDE prestation Facture des lentilles de contact-26736761,item[0].quantity missing
DDE prestation Facture des lentilles de contact-26736761,item[1].quantity missing
28f8623a-8962-4ad1-88bb-4914109e1d42_20241118103848034,item[3].amount negative: -4.08
28f8623a-8962-4ad1-88bb-4914109e1d42_20241118103848034,item[0].quantity missing
28f8623a-8962-4ad1-88bb-4914109e1d42_20241118103848034,item[1].quantity missing
0a26adda-beef-4977-966c-12efc73f1d15_20250129_190625,item[0].quantity missing
0a26adda-beef-4977-966c-12efc73f1d15_20250129_190625,item[1].quantity missing
b442261a-d504-4cda-98cd-e22a6875bfd0_M._ZIMMER_2401,item[0].quantity missing
b8083bbb-9489-4706-af99-69f625bbc530_facture_OPHALMOLOGIE,item[0].quantity missing
b8083bbb-9489-4706-af99-69f625bbc530_facture_OPHALMOLOGIE,item[1].quantity missing
b8083bbb-9489-4706-af99-69f625bbc530_facture_OPHALMOLOGIE,item[2].quantity missing
bedf599f-99db-4613-aa22-17d52690c34c_doc00194520240521104544,item[0].quantity missing
bedf599f-99db-4613-aa22-17d52690c34c_doc00194520240521104544,item[1].quantity missing
8f8adcc8-9b8f-48e0-94d5-d8cfb9d84a1f_Scanned_20250212_154615,item[0].quantity missing
Facture des lentilles de contact-27647140,item[4].amount negative: -2.9
Facture des lentilles de contact-27647140,item[5].amount negative: -3.0
8de5aacb-e142-4321-aaaf-c4179f031d7a_20240810152224_001,item[0].quantity missing
8de5aacb-e142-4321-aaaf-c4179f031d7a_20240810152224_001,item[1].quantity missing
8de5aacb-e142-4321-aaaf-c4179f031d7a_20240810152224_001,item[2].quantity missing
8de6df48-c40d-4e21-ad21-edc9a95e3bfb_image,item[0].quantity missing
8de6df48-c40d-4e21-ad21-edc9a95e3bfb_image,item[1].quantity missing
8de6df48-c40d-4e21-ad21-edc9a95e3bfb_image,item[2].quantity missing
DDE prestation Facture des lentilles de contact-26542194,item[0].quantity missing
Facture des lentilles de contact-27654407,bill_paid True but amount_paid missing
fd8187e1-b9eb-4727-8041-9fc9e6f1affb_0556_001,item[0].quantity missing
Facture des lentilles de contact-27572543,item[0].quantity missing
c6013936-5850-4027-9f61-32c30486e6d5_optique,item[0].quantity missing
c6013936-5850-4027-9f61-32c30486e6d5_optique,item[1].quantity missing
2c75bd51-e61c-452e-b19b-494fa70e9cc9_Facture_lentilles_janvier_2024,item[4].amount negative: -2.9
2c75bd51-e61c-452e-b19b-494fa70e9cc9_Facture_lentilles_janvier_2024,item[5].amount negative: -3.0
Facture des lentilles de contact-27601451,item[0].quantity missing
Facture des lentilles de contact-27601451,item[1].quantity missing
Facture ostéopathe-26660518,item[0].quantity missing
Facture ostéopathe-26660518,item[0].quantity missing
Facture ostéopathe-26479073,item[0].quantity missing
c4930995-bbbe-45dd-9afe-800005fb5890_Facture_lunettes_,item[0].quantity missing
c4930995-bbbe-45dd-9afe-800005fb5890_Facture_lunettes_,item[1].quantity missing
c4930995-bbbe-45dd-9afe-800005fb5890_Facture_lunettes_,item[2].quantity missing
511ba153-1eae-4682-ade0-01217ff522bf_Facture_consultation_130125,item[0].quantity missing
8019accc-33b4-4f20-95eb-3ff1e1d4db0b_nnn,item[0].quantity missing
8019accc-33b4-4f20-95eb-3ff1e1d4db0b_nnn,item[1].quantity missing
Facture ostéopathe-1119174,item[0].quantity missing
17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo,item[0].quantity missing
17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo,item[1].quantity missing
17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo,item[0].quantity missing
eedda33e-049b-4ab8-aee8-6f5d0c88b500_IMG_0358,item[0].quantity missing
eedda33e-049b-4ab8-aee8-6f5d0c88b500_IMG_0358,item[1].quantity missing
Facture des lentilles de contact-27646193,item[4].amount negative: -4.9
Facture des lentilles de contact-27646193,item[5].amount negative: -13.8
f339c5ad-7df8-4f3e-a5fa-9dc7616abc3e_IMG_20250327_165906,item[0].quantity missing
f339c5ad-7df8-4f3e-a5fa-9dc7616abc3e_IMG_20250327_165906,item[1].quantity missing
4f09f855-c993-42ea-97b1-3dd728b53e16_devis_ophtalmo,item[0].quantity missing
Facture des lentilles de contact-27665791,bill_paid True but amount_paid missing
Facture des lentilles de contact-27665791,item[0].quantity missing
Facture des lentilles de contact-27665791,item[1].quantity missing
Facture des lentilles de contact-27665791,item[2].quantity missing
252c4ad4-8fa3-4ed1-9508-ee77f6dce6fe_img20250206_20040851,item[0].quantity missing
Facture ostéopathe-25145711,item[0].quantity missing
Facture ostéopathe-26626836,item[0].quantity missing
Facture ostéopathe-26626836,item[0].quantity missing
Facture ostéopathe-26991829,item[0].quantity missing
Facture ostéopathe-26991829,item[1].quantity missing
f6db258d-f400-41b5-a5ca-7878010f0fd7_doc00945920250206091811,item[0].quantity missing
Facture médecine douce-27784833,item[0].quantity missing
abff81b4-d895-47d5-b3d4-93da48f5c1e6_20241212_172044,item[0].quantity missing
abff81b4-d895-47d5-b3d4-93da48f5c1e6_20241212_172044,item[1].quantity missing
1724397366294irm,item[0].quantity missing
Facture ostéopathe-25233929,item[0].quantity missing
8b6539db-21b4-41dc-a27f-db2cd351fe5f_Doc_34,item[0].quantity missing
819f6faf-d9f3-4668-9000-e414d92c7f41_ROYER_LUDIVINE_FACTURE_OPHTALMO,item[0].quantity missing
819f6faf-d9f3-4668-9000-e414d92c7f41_ROYER_LUDIVINE_FACTURE_OPHTALMO,item[1].quantity missing
209e4abe-4973-4fd3-83cc-64f940fb7372_facture_ophtalmologue,item[0].quantity missing
209e4abe-4973-4fd3-83cc-64f940fb7372_facture_ophtalmologue,item[1].quantity missing
719ba06c-91ee-44dc-b7b6-4a3132f733af_FievreVillaniCarlaLunettes,item[0].quantity missing
719ba06c-91ee-44dc-b7b6-4a3132f733af_FievreVillaniCarlaLunettes,item[1].quantity missing
e07571f8-3ba4-46fc-a37e-53bd6dd965c7_IMG20250123190152,item[0].quantity missing
e07571f8-3ba4-46fc-a37e-53bd6dd965c7_IMG20250123190152,item[1].quantity missing
e07571f8-3ba4-46fc-a37e-53bd6dd965c7_IMG20250123190152,item[2].quantity missing
e07571f8-3ba4-46fc-a37e-53bd6dd965c7_IMG20250123190152,item[3].quantity missing
e07571f8-3ba4-46fc-a37e-53bd6dd965c7_IMG20250123190152,item[4].quantity missing
e07571f8-3ba4-46fc-a37e-53bd6dd965c7_IMG20250123190152,item[5].quantity missing
e07571f8-3ba4-46fc-a37e-53bd6dd965c7_IMG20250123190152,item[6].quantity missing
DDE prestation Facture des lentilles de contact-26772555,item[0].quantity missing
DDE prestation Facture des lentilles de contact-26772555,item[1].quantity missing
5ff72751-5d92-463c-a272-97fda77c34f8_2025-01-31-13187-Luc_Lisa,item[0].quantity missing
Facture ostéopathe-26744045,item[0].quantity missing
Facture ostéopathe-26699212,item[0].quantity missing
Facture médecine douce-27781814,item[0].quantity missing
Facture ostéopathe-26699279,item[0].quantity missing
1724399050308image,item[0].quantity missing
44e87f9e-a4e2-4851-b383-e1401b36e853_image,item[0].quantity missing
44e87f9e-a4e2-4851-b383-e1401b36e853_image,item[1].quantity missing
c53ffdcc-0ee6-4bf2-8c43-923c5471310f_IMG_7285,item[0].quantity missing
c53ffdcc-0ee6-4bf2-8c43-923c5471310f_IMG_7285,item[1].quantity missing
8605959d-bf31-4ea3-a820-bde824a1a4de_20241226_080232,item[0].quantity missing
3ee0ac3b-4d77-4692-80fa-15004f8a3499_facture_ophta,item[0].quantity missing
3ee0ac3b-4d77-4692-80fa-15004f8a3499_facture_ophta,item[1].quantity missing
29d72de2-da83-483a-8702-939fbe4addfa_20250207_112449,item[0].quantity missing
1724425079263JACOB_Francois_2024-111,item[0].quantity missing
1724425079184Facture_Osteopathe_Brousseau_sader_17avr24,item[0].quantity missing
DDE prestation Facture des lentilles de contact-26685394,item[0].quantity missing
Facture ostéopathe-26447822,bill_paid True but amount_paid missing
DDE prestation Facture des lentilles de contact-26705131,item[2].amount negative: -5.0
3fe044b5-fd6a-4f5c-accc-ce7b987575a8_IMG_6817,item[0].quantity missing
3fe044b5-fd6a-4f5c-accc-ce7b987575a8_IMG_6817,item[1].quantity missing
Facture ostéopathe-25187936,item[0].quantity missing
Facture des lentilles de contact-27545715,item[1].quantity missing
82ac654c-54d4-401f-a006-96d9e46531fe_Facture_ostheo_JEAN,item[0].quantity missing
Facture des lentilles de contact-27653858,item[4].amount negative: -2.9
Facture des lentilles de contact-27653858,item[5].amount negative: -3.0
3d4c57ed-eecc-4557-81c9-9e46e46b5a82_IMG_8797,item[0].quantity missing
3d4c57ed-eecc-4557-81c9-9e46e46b5a82_IMG_8797,item[1].quantity missing
3d4c57ed-eecc-4557-81c9-9e46e46b5a82_IMG_8797,item[2].quantity missing
3d4c57ed-eecc-4557-81c9-9e46e46b5a82_IMG_8797,item[3].quantity missing
3d4c57ed-eecc-4557-81c9-9e46e46b5a82_IMG_8797,item[4].quantity missing
3d4c57ed-eecc-4557-81c9-9e46e46b5a82_IMG_8797,item[5].quantity missing
Facture ostéopathe-26675471,item[0].quantity missing
38b89b50-a6b6-4816-a495-ce2d9e71082f_Kine-28-11-2024,item[0].quantity missing
43666807-3eb2-40b9-bdf6-94d4fe31b71e_factures,item[0].quantity missing
43666807-3eb2-40b9-bdf6-94d4fe31b71e_factures,item[1].quantity missing
43666807-3eb2-40b9-bdf6-94d4fe31b71e_factures,item[2].quantity missing
9c1cc24a-5f7e-42ef-998d-c813a686a7b5_549D8459-524A-4A5C-9612-09EEA1ADDB58,item[0].quantity missing
9c1cc24a-5f7e-42ef-998d-c813a686a7b5_549D8459-524A-4A5C-9612-09EEA1ADDB58,item[1].quantity missing
c052b9e0-c0d4-473f-93a9-a5dc396a0d86_Facture_ophtalmologique_,item[0].quantity missing
c052b9e0-c0d4-473f-93a9-a5dc396a0d86_Facture_ophtalmologique_,item[1].quantity missing
ec6baa8c-a189-4850-b310-c90bab372cc2_Facture_Krys,bill_paid True but remaining_payment > 0
Facture ostéopathe-26539711,item[0].quantity missing
0bfb5041-2751-421d-b5d5-60d696c361c1_17391960081576654684571490136238,item[0].quantity missing
Facture ostéopathe-26605349,item[0].quantity missing
de6a66cd-d729-479c-a119-8b9e442702db_Screenshot_20250227_112521_Gallery,item[0].quantity missing
de6a66cd-d729-479c-a119-8b9e442702db_Screenshot_20250227_112521_Gallery,item[1].quantity missing
1d4346b3-1322-457d-9436-ea1d82f74cea_Facture_ophtalmo_enfants,item[0].quantity missing
1d4346b3-1322-457d-9436-ea1d82f74cea_Facture_ophtalmo_enfants,item[0].quantity missing
1d4346b3-1322-457d-9436-ea1d82f74cea_Facture_ophtalmo_enfants,item[1].quantity missing
Facture ostéopathe-25358482,item[0].quantity missing
34f7e622-ffd8-493d-b955-10a692290a75_image,item[0].quantity missing
Facture ostéopathe-26595219,item[0].quantity missing
Facture des lentilles de contact-27716093,item[0].quantity missing
6d9a0fdf-c334-43b6-946b-45b1b1ab83d7_Facture_osteo_FEV_25,item[0].quantity missing
24ca9fa1-064d-49a0-8d54-d6f0171c8187_Facture_lunette,item[0].quantity missing
24ca9fa1-064d-49a0-8d54-d6f0171c8187_Facture_lunette,item[1].quantity missing
24ca9fa1-064d-49a0-8d54-d6f0171c8187_Facture_lunette,item[2].quantity missing
24ca9fa1-064d-49a0-8d54-d6f0171c8187_Facture_lunette,item[3].quantity missing
1724399050380osteo_Lina,item[0].quantity missing
106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie,item[0].quantity missing
106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie,item[1].quantity missing
106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie,item[2].quantity missing
106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie,item[3].quantity missing
106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie,item[4].quantity missing
106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie,item[5].quantity missing
1 source issue
2 Facture médecine douce-27776417 item[0].quantity missing
3 551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024 item[0].quantity missing
4 551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024 item[1].quantity missing
5 551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024 item[2].quantity missing
6 551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024 item[3].quantity missing
7 551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024 item[4].quantity missing
8 551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024 item[5].quantity missing
9 Facture médecine douce-27640153 item[0].quantity missing
10 Facture médecine douce-27640153 item[1].quantity missing
11 1724397366229Facture_RIVALLANDChrystelle_20240413ICQO bill_paid True but amount_paid missing
12 Facture ostéopathe-25417095 bill_paid True but remaining_payment > 0
13 Facture ostéopathe-GED N-R-2323015 item[0].quantity missing
14 74441931-f8d0-4cb0-8eb5-df4147a72bbe_mgp item[0].quantity missing
15 74441931-f8d0-4cb0-8eb5-df4147a72bbe_mgp currency missing for bill
16 74441931-f8d0-4cb0-8eb5-df4147a72bbe_mgp currency missing for bill
17 1724658126141JACOB_Francois_2024-111 item[0].quantity missing
18 Facture des lentilles de contact-27559701 item[5].amount negative: -6.67
19 91c41171-ac88-4ce4-8f15-48c4ad2c5aa2_Devis_lunette item[0].quantity missing
20 0252b5fb-bc84-4fca-bfa7-483ac611885a_Scan_0115 item[0].quantity missing
21 0252b5fb-bc84-4fca-bfa7-483ac611885a_Scan_0115 item[1].quantity missing
22 7f084bf4-357d-452d-bb4c-76bff332ea0c_IMG_7791 item[0].quantity missing
23 7f084bf4-357d-452d-bb4c-76bff332ea0c_IMG_7791 item[1].quantity missing
24 70c3e7f5-405b-49d8-a73c-dae83beafb59_Facture_ophtalmo item[0].quantity missing
25 70c3e7f5-405b-49d8-a73c-dae83beafb59_Facture_ophtalmo item[1].quantity missing
26 70c3e7f5-405b-49d8-a73c-dae83beafb59_Facture_ophtalmo item[2].quantity missing
27 Facture médecine douce-27776613 item[0].quantity missing
28 6eeacb70-27be-40dd-9511-522025b459a3_20241226_080214 item[0].quantity missing
29 6eeacb70-27be-40dd-9511-522025b459a3_20241226_080214 item[1].quantity missing
30 1724399050339FACTURE_132120424_MME_RONDIN_MAHEVA item[0].quantity missing
31 ec117b21-9348-4a76-9a22-2bae87639392_facture_du_14.02_Tony item[0].quantity missing
32 ec117b21-9348-4a76-9a22-2bae87639392_facture_du_14.02_Tony item[1].quantity missing
33 Facture ostéopathe-25979892 item[0].quantity missing
34 bf09a877-0705-4c79-b908-6e8da44e44c7_17490641879424403676372514736136 item[0].quantity missing
35 bf09a877-0705-4c79-b908-6e8da44e44c7_17490641879424403676372514736136 item[1].quantity missing
36 d4f310c8-3bcf-4bb5-bf53-27ad74d66c1e_FACTURE_OPTICIEN_OPHTALMO item[0].quantity missing
37 DDE prestation Facture des lentilles de contact-26740934 item[0].quantity missing
38 45ccffcd-5fe8-418b-80c2-894086de9087_facture_ophtalmo_Fred_ item[0].quantity missing
39 45ccffcd-5fe8-418b-80c2-894086de9087_facture_ophtalmo_Fred_ item[1].quantity missing
40 Facture ostéopathe-26626551 item[0].quantity missing
41 27ddc814-9239-4944-951a-fdd8a48c5926_IMG_20240922_0001 item[0].quantity missing
42 27ddc814-9239-4944-951a-fdd8a48c5926_IMG_20240922_0001 item[1].quantity missing
43 27ddc814-9239-4944-951a-fdd8a48c5926_IMG_20240922_0001 item[2].quantity missing
44 27ddc814-9239-4944-951a-fdd8a48c5926_IMG_20240922_0001 item[3].quantity missing
45 8eceb56a-b571-4ab6-a64e-668db6bf4ad5_chiropracteur_fevrier_25 item[0].quantity missing
46 Facture des lentilles de contact-27576495 item[2].quantity missing
47 32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337 item[0].quantity missing
48 32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337 item[1].quantity missing
49 32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337 item[2].quantity missing
50 a5bd8b97-a2a8-49ae-900d-0673d2f96637_quittance_optique_ item[0].quantity missing
51 e997eddb-05a4-49f9-a7fd-82b48c2694b3_IMG_OPHTALMOLOGIE_20250117_0001 item[0].quantity missing
52 Facture ostéopathe-26300731 item[0].quantity missing
53 Facture ostéopathe-26300731 item[1].quantity missing
54 Facture ostéopathe-26300731 item[2].quantity missing
55 64fa4c19-efa4-48f1-acf8-8c2e8f573b8b_Facture_osteopathe_ item[0].quantity missing
56 c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo item[0].quantity missing
57 c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo item[1].quantity missing
58 c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo item[0].quantity missing
59 1724658376419facture_ostheo_avril_24 item[0].quantity missing
60 66de7232-78a6-46a0-8392-47ec4630ad31_image item[0].quantity missing
61 66de7232-78a6-46a0-8392-47ec4630ad31_image item[1].quantity missing
62 66de7232-78a6-46a0-8392-47ec4630ad31_image item[2].quantity missing
63 DDE prestation Facture des lentilles de contact-26772554 item[0].quantity missing
64 DDE prestation Facture des lentilles de contact-26772554 item[1].quantity missing
65 DDE prestation Facture des lentilles de contact-26705902 item[3].amount negative: -3.0
66 c2009b55-58d4-411f-a25a-3dc1e32e4a89_facture_lentille item[3].amount negative: -3.0
67 Facture ostéopathe-26112640 item[0].quantity missing
68 915b97eb-e7d5-415f-ae89-823f08ae3bc0_ACfacture_opthalmo item[0].quantity missing
69 915b97eb-e7d5-415f-ae89-823f08ae3bc0_ACfacture_opthalmo item[1].quantity missing
70 fa25423e-b79c-48f9-89ae-32ffd95e3101_Facture_Ophtalmologie item[0].quantity missing
71 Facture ostéopathe-25073562 item[0].quantity missing
72 14a6fe3e-c49c-4544-b960-43def15eaf83_doc00563220250401124926 item[0].quantity missing
73 1724399050374facture_podologue item[0].quantity missing
74 Facture des lentilles de contact-27680116 item[0].quantity missing
75 Facture des lentilles de contact-27680116 item[1].quantity missing
76 Facture des lentilles de contact-27551187 item[0].quantity missing
77 46ddeec5-97bf-4175-b214-b7bd74c4fa24_Renouvin_Ferlicot_Andre item[0].quantity missing
78 46ddeec5-97bf-4175-b214-b7bd74c4fa24_Renouvin_Ferlicot_Andre item[1].quantity missing
79 cbc7ddff-2ed1-441b-85cb-5c1079588a52_Mes_des_factures item[0].quantity missing
80 cbc7ddff-2ed1-441b-85cb-5c1079588a52_Mes_des_factures item[1].quantity missing
81 1724658298461facture_podologue_alicia item[0].quantity missing
82 1724397366268IMG_20240417_143900 item[0].quantity missing
83 b410345a-31de-4397-a896-bc11d97e524c_FACTURE_OPHTALMOLOGISTE item[0].quantity missing
84 Facture ostéopathe-26972680 item[0].quantity missing
85 Facture ostéopathe-25936311 item[0].quantity missing
86 b96f6ca3-3175-4cd8-a827-e886b5799867_Facture_ophtalmologue_24022025 item[0].quantity missing
87 9494858a-65e3-425d-a452-17d4300f0115_demande_de_remboursement_mgp_mutuelle_001 bill_paid True but amount_paid missing
88 895777f3-4478-4e8d-a616-d498850d1523_factures-lunettes_Tess bill_paid True but remaining_payment > 0
89 Facture ostéopathe-25152747 bill_paid True but amount_paid missing
90 Facture ostéopathe-25332784 item[0].quantity missing
91 Facture ostéopathe-1216961 bill_paid True but amount_paid missing
92 Facture ostéopathe-1216961 item[0].quantity missing
93 6f544990-74c6-4c7e-8a25-08eb3f9138c8_RemboursementSoins01082024_1 item[0].quantity missing
94 6f544990-74c6-4c7e-8a25-08eb3f9138c8_RemboursementSoins01082024_1 item[1].quantity missing
95 9901adc0-96d4-4cfd-a028-8286fd1f7841_Fre_Ophtalmo item[0].quantity missing
96 9901adc0-96d4-4cfd-a028-8286fd1f7841_Fre_Ophtalmo item[1].quantity missing
97 141f7ccc-37ab-4da5-9788-80a27e819b8b_Feuilles_de_soins_Psychologue_x2 bill_paid True but remaining_payment > 0
98 598221c3-d0b3-403a-a261-c68cbe56b453_facture_ophtalmo item[0].quantity missing
99 598221c3-d0b3-403a-a261-c68cbe56b453_facture_ophtalmo item[1].quantity missing
100 70e53730-c41c-402b-a227-2262cdd9fa7b_osteo_Laurette item[0].quantity missing
101 39384a4b-4117-4f56-8cca-55fd51e5b062_image item[0].quantity missing
102 39384a4b-4117-4f56-8cca-55fd51e5b062_image item[1].quantity missing
103 39384a4b-4117-4f56-8cca-55fd51e5b062_image item[2].quantity missing
104 d9e1cb2a-b44b-4cc8-b07a-030ddf3acd28_Ordonnance_lentilles_Mary bill_paid True but amount_paid missing
105 Facture ostéopathe-25417096 item[0].quantity missing
106 Facture ostéopathe-26585743 item[0].quantity missing
107 05c2ef83-54cb-419e-949f-74f898b459bd_osteo_13112024 item[0].quantity missing
108 Facture ostéopathe-26077624 item[0].quantity missing
109 e405dba0-fd0b-48a0-9e3f-576398fa3318_Scan2025-02-28_143544_1 item[0].quantity missing
110 Facture ostéopathe-25847017 bill_paid True but remaining_payment > 0
111 8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes item[0].quantity missing
112 8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes item[1].quantity missing
113 8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes item[2].quantity missing
114 8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes item[3].quantity missing
115 8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes item[4].quantity missing
116 47399c82-030f-4904-b8f4-2c3dc7c46723_image item[0].quantity missing
117 47399c82-030f-4904-b8f4-2c3dc7c46723_image item[1].quantity missing
118 Facture des lentilles de contact-27708923 item[0].quantity missing
119 Facture-quittance toutes spécialités-27235184 item[0].quantity missing
120 Facture-quittance toutes spécialités-27235184 item[1].quantity missing
121 Facture-quittance toutes spécialités-27235184 item[2].quantity missing
122 Facture-quittance toutes spécialités-27235184 item[3].quantity missing
123 1724399050377psy_rachel_4 item[0].quantity missing
124 e0f0eab0-f98c-4b74-95f2-bfc61ad76d28_Ophtalmo_Isa_01-24 item[0].quantity missing
125 e0f0eab0-f98c-4b74-95f2-bfc61ad76d28_Ophtalmo_Isa_01-24 item[1].quantity missing
126 172439712563420240417_105010 item[0].quantity missing
127 172439712563420240417_105010 item[1].quantity missing
128 Facture ostéopathe-26750142 item[0].quantity missing
129 DDE prestation Facture des lentilles de contact-26723200 item[0].quantity missing
130 DDE prestation Facture des lentilles de contact-26736761 item[0].quantity missing
131 DDE prestation Facture des lentilles de contact-26736761 item[1].quantity missing
132 28f8623a-8962-4ad1-88bb-4914109e1d42_20241118103848034 item[3].amount negative: -4.08
133 28f8623a-8962-4ad1-88bb-4914109e1d42_20241118103848034 item[0].quantity missing
134 28f8623a-8962-4ad1-88bb-4914109e1d42_20241118103848034 item[1].quantity missing
135 0a26adda-beef-4977-966c-12efc73f1d15_20250129_190625 item[0].quantity missing
136 0a26adda-beef-4977-966c-12efc73f1d15_20250129_190625 item[1].quantity missing
137 b442261a-d504-4cda-98cd-e22a6875bfd0_M._ZIMMER_2401 item[0].quantity missing
138 b8083bbb-9489-4706-af99-69f625bbc530_facture_OPHALMOLOGIE item[0].quantity missing
139 b8083bbb-9489-4706-af99-69f625bbc530_facture_OPHALMOLOGIE item[1].quantity missing
140 b8083bbb-9489-4706-af99-69f625bbc530_facture_OPHALMOLOGIE item[2].quantity missing
141 bedf599f-99db-4613-aa22-17d52690c34c_doc00194520240521104544 item[0].quantity missing
142 bedf599f-99db-4613-aa22-17d52690c34c_doc00194520240521104544 item[1].quantity missing
143 8f8adcc8-9b8f-48e0-94d5-d8cfb9d84a1f_Scanned_20250212_154615 item[0].quantity missing
144 Facture des lentilles de contact-27647140 item[4].amount negative: -2.9
145 Facture des lentilles de contact-27647140 item[5].amount negative: -3.0
146 8de5aacb-e142-4321-aaaf-c4179f031d7a_20240810152224_001 item[0].quantity missing
147 8de5aacb-e142-4321-aaaf-c4179f031d7a_20240810152224_001 item[1].quantity missing
148 8de5aacb-e142-4321-aaaf-c4179f031d7a_20240810152224_001 item[2].quantity missing
149 8de6df48-c40d-4e21-ad21-edc9a95e3bfb_image item[0].quantity missing
150 8de6df48-c40d-4e21-ad21-edc9a95e3bfb_image item[1].quantity missing
151 8de6df48-c40d-4e21-ad21-edc9a95e3bfb_image item[2].quantity missing
152 DDE prestation Facture des lentilles de contact-26542194 item[0].quantity missing
153 Facture des lentilles de contact-27654407 bill_paid True but amount_paid missing
154 fd8187e1-b9eb-4727-8041-9fc9e6f1affb_0556_001 item[0].quantity missing
155 Facture des lentilles de contact-27572543 item[0].quantity missing
156 c6013936-5850-4027-9f61-32c30486e6d5_optique item[0].quantity missing
157 c6013936-5850-4027-9f61-32c30486e6d5_optique item[1].quantity missing
158 2c75bd51-e61c-452e-b19b-494fa70e9cc9_Facture_lentilles_janvier_2024 item[4].amount negative: -2.9
159 2c75bd51-e61c-452e-b19b-494fa70e9cc9_Facture_lentilles_janvier_2024 item[5].amount negative: -3.0
160 Facture des lentilles de contact-27601451 item[0].quantity missing
161 Facture des lentilles de contact-27601451 item[1].quantity missing
162 Facture ostéopathe-26660518 item[0].quantity missing
163 Facture ostéopathe-26660518 item[0].quantity missing
164 Facture ostéopathe-26479073 item[0].quantity missing
165 c4930995-bbbe-45dd-9afe-800005fb5890_Facture_lunettes_ item[0].quantity missing
166 c4930995-bbbe-45dd-9afe-800005fb5890_Facture_lunettes_ item[1].quantity missing
167 c4930995-bbbe-45dd-9afe-800005fb5890_Facture_lunettes_ item[2].quantity missing
168 511ba153-1eae-4682-ade0-01217ff522bf_Facture_consultation_130125 item[0].quantity missing
169 8019accc-33b4-4f20-95eb-3ff1e1d4db0b_nnn item[0].quantity missing
170 8019accc-33b4-4f20-95eb-3ff1e1d4db0b_nnn item[1].quantity missing
171 Facture ostéopathe-1119174 item[0].quantity missing
172 17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo item[0].quantity missing
173 17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo item[1].quantity missing
174 17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo item[0].quantity missing
175 eedda33e-049b-4ab8-aee8-6f5d0c88b500_IMG_0358 item[0].quantity missing
176 eedda33e-049b-4ab8-aee8-6f5d0c88b500_IMG_0358 item[1].quantity missing
177 Facture des lentilles de contact-27646193 item[4].amount negative: -4.9
178 Facture des lentilles de contact-27646193 item[5].amount negative: -13.8
179 f339c5ad-7df8-4f3e-a5fa-9dc7616abc3e_IMG_20250327_165906 item[0].quantity missing
180 f339c5ad-7df8-4f3e-a5fa-9dc7616abc3e_IMG_20250327_165906 item[1].quantity missing
181 4f09f855-c993-42ea-97b1-3dd728b53e16_devis_ophtalmo item[0].quantity missing
182 Facture des lentilles de contact-27665791 bill_paid True but amount_paid missing
183 Facture des lentilles de contact-27665791 item[0].quantity missing
184 Facture des lentilles de contact-27665791 item[1].quantity missing
185 Facture des lentilles de contact-27665791 item[2].quantity missing
186 252c4ad4-8fa3-4ed1-9508-ee77f6dce6fe_img20250206_20040851 item[0].quantity missing
187 Facture ostéopathe-25145711 item[0].quantity missing
188 Facture ostéopathe-26626836 item[0].quantity missing
189 Facture ostéopathe-26626836 item[0].quantity missing
190 Facture ostéopathe-26991829 item[0].quantity missing
191 Facture ostéopathe-26991829 item[1].quantity missing
192 f6db258d-f400-41b5-a5ca-7878010f0fd7_doc00945920250206091811 item[0].quantity missing
193 Facture médecine douce-27784833 item[0].quantity missing
194 abff81b4-d895-47d5-b3d4-93da48f5c1e6_20241212_172044 item[0].quantity missing
195 abff81b4-d895-47d5-b3d4-93da48f5c1e6_20241212_172044 item[1].quantity missing
196 1724397366294irm item[0].quantity missing
197 Facture ostéopathe-25233929 item[0].quantity missing
198 8b6539db-21b4-41dc-a27f-db2cd351fe5f_Doc_34 item[0].quantity missing
199 819f6faf-d9f3-4668-9000-e414d92c7f41_ROYER_LUDIVINE_FACTURE_OPHTALMO item[0].quantity missing
200 819f6faf-d9f3-4668-9000-e414d92c7f41_ROYER_LUDIVINE_FACTURE_OPHTALMO item[1].quantity missing
201 209e4abe-4973-4fd3-83cc-64f940fb7372_facture_ophtalmologue item[0].quantity missing
202 209e4abe-4973-4fd3-83cc-64f940fb7372_facture_ophtalmologue item[1].quantity missing
203 719ba06c-91ee-44dc-b7b6-4a3132f733af_FievreVillaniCarlaLunettes item[0].quantity missing
204 719ba06c-91ee-44dc-b7b6-4a3132f733af_FievreVillaniCarlaLunettes item[1].quantity missing
205 e07571f8-3ba4-46fc-a37e-53bd6dd965c7_IMG20250123190152 item[0].quantity missing
206 e07571f8-3ba4-46fc-a37e-53bd6dd965c7_IMG20250123190152 item[1].quantity missing
207 e07571f8-3ba4-46fc-a37e-53bd6dd965c7_IMG20250123190152 item[2].quantity missing
208 e07571f8-3ba4-46fc-a37e-53bd6dd965c7_IMG20250123190152 item[3].quantity missing
209 e07571f8-3ba4-46fc-a37e-53bd6dd965c7_IMG20250123190152 item[4].quantity missing
210 e07571f8-3ba4-46fc-a37e-53bd6dd965c7_IMG20250123190152 item[5].quantity missing
211 e07571f8-3ba4-46fc-a37e-53bd6dd965c7_IMG20250123190152 item[6].quantity missing
212 DDE prestation Facture des lentilles de contact-26772555 item[0].quantity missing
213 DDE prestation Facture des lentilles de contact-26772555 item[1].quantity missing
214 5ff72751-5d92-463c-a272-97fda77c34f8_2025-01-31-13187-Luc_Lisa item[0].quantity missing
215 Facture ostéopathe-26744045 item[0].quantity missing
216 Facture ostéopathe-26699212 item[0].quantity missing
217 Facture médecine douce-27781814 item[0].quantity missing
218 Facture ostéopathe-26699279 item[0].quantity missing
219 1724399050308image item[0].quantity missing
220 44e87f9e-a4e2-4851-b383-e1401b36e853_image item[0].quantity missing
221 44e87f9e-a4e2-4851-b383-e1401b36e853_image item[1].quantity missing
222 c53ffdcc-0ee6-4bf2-8c43-923c5471310f_IMG_7285 item[0].quantity missing
223 c53ffdcc-0ee6-4bf2-8c43-923c5471310f_IMG_7285 item[1].quantity missing
224 8605959d-bf31-4ea3-a820-bde824a1a4de_20241226_080232 item[0].quantity missing
225 3ee0ac3b-4d77-4692-80fa-15004f8a3499_facture_ophta item[0].quantity missing
226 3ee0ac3b-4d77-4692-80fa-15004f8a3499_facture_ophta item[1].quantity missing
227 29d72de2-da83-483a-8702-939fbe4addfa_20250207_112449 item[0].quantity missing
228 1724425079263JACOB_Francois_2024-111 item[0].quantity missing
229 1724425079184Facture_Osteopathe_Brousseau_sader_17avr24 item[0].quantity missing
230 DDE prestation Facture des lentilles de contact-26685394 item[0].quantity missing
231 Facture ostéopathe-26447822 bill_paid True but amount_paid missing
232 DDE prestation Facture des lentilles de contact-26705131 item[2].amount negative: -5.0
233 3fe044b5-fd6a-4f5c-accc-ce7b987575a8_IMG_6817 item[0].quantity missing
234 3fe044b5-fd6a-4f5c-accc-ce7b987575a8_IMG_6817 item[1].quantity missing
235 Facture ostéopathe-25187936 item[0].quantity missing
236 Facture des lentilles de contact-27545715 item[1].quantity missing
237 82ac654c-54d4-401f-a006-96d9e46531fe_Facture_ostheo_JEAN item[0].quantity missing
238 Facture des lentilles de contact-27653858 item[4].amount negative: -2.9
239 Facture des lentilles de contact-27653858 item[5].amount negative: -3.0
240 3d4c57ed-eecc-4557-81c9-9e46e46b5a82_IMG_8797 item[0].quantity missing
241 3d4c57ed-eecc-4557-81c9-9e46e46b5a82_IMG_8797 item[1].quantity missing
242 3d4c57ed-eecc-4557-81c9-9e46e46b5a82_IMG_8797 item[2].quantity missing
243 3d4c57ed-eecc-4557-81c9-9e46e46b5a82_IMG_8797 item[3].quantity missing
244 3d4c57ed-eecc-4557-81c9-9e46e46b5a82_IMG_8797 item[4].quantity missing
245 3d4c57ed-eecc-4557-81c9-9e46e46b5a82_IMG_8797 item[5].quantity missing
246 Facture ostéopathe-26675471 item[0].quantity missing
247 38b89b50-a6b6-4816-a495-ce2d9e71082f_Kine-28-11-2024 item[0].quantity missing
248 43666807-3eb2-40b9-bdf6-94d4fe31b71e_factures item[0].quantity missing
249 43666807-3eb2-40b9-bdf6-94d4fe31b71e_factures item[1].quantity missing
250 43666807-3eb2-40b9-bdf6-94d4fe31b71e_factures item[2].quantity missing
251 9c1cc24a-5f7e-42ef-998d-c813a686a7b5_549D8459-524A-4A5C-9612-09EEA1ADDB58 item[0].quantity missing
252 9c1cc24a-5f7e-42ef-998d-c813a686a7b5_549D8459-524A-4A5C-9612-09EEA1ADDB58 item[1].quantity missing
253 c052b9e0-c0d4-473f-93a9-a5dc396a0d86_Facture_ophtalmologique_ item[0].quantity missing
254 c052b9e0-c0d4-473f-93a9-a5dc396a0d86_Facture_ophtalmologique_ item[1].quantity missing
255 ec6baa8c-a189-4850-b310-c90bab372cc2_Facture_Krys bill_paid True but remaining_payment > 0
256 Facture ostéopathe-26539711 item[0].quantity missing
257 0bfb5041-2751-421d-b5d5-60d696c361c1_17391960081576654684571490136238 item[0].quantity missing
258 Facture ostéopathe-26605349 item[0].quantity missing
259 de6a66cd-d729-479c-a119-8b9e442702db_Screenshot_20250227_112521_Gallery item[0].quantity missing
260 de6a66cd-d729-479c-a119-8b9e442702db_Screenshot_20250227_112521_Gallery item[1].quantity missing
261 1d4346b3-1322-457d-9436-ea1d82f74cea_Facture_ophtalmo_enfants item[0].quantity missing
262 1d4346b3-1322-457d-9436-ea1d82f74cea_Facture_ophtalmo_enfants item[0].quantity missing
263 1d4346b3-1322-457d-9436-ea1d82f74cea_Facture_ophtalmo_enfants item[1].quantity missing
264 Facture ostéopathe-25358482 item[0].quantity missing
265 34f7e622-ffd8-493d-b955-10a692290a75_image item[0].quantity missing
266 Facture ostéopathe-26595219 item[0].quantity missing
267 Facture des lentilles de contact-27716093 item[0].quantity missing
268 6d9a0fdf-c334-43b6-946b-45b1b1ab83d7_Facture_osteo_FEV_25 item[0].quantity missing
269 24ca9fa1-064d-49a0-8d54-d6f0171c8187_Facture_lunette item[0].quantity missing
270 24ca9fa1-064d-49a0-8d54-d6f0171c8187_Facture_lunette item[1].quantity missing
271 24ca9fa1-064d-49a0-8d54-d6f0171c8187_Facture_lunette item[2].quantity missing
272 24ca9fa1-064d-49a0-8d54-d6f0171c8187_Facture_lunette item[3].quantity missing
273 1724399050380osteo_Lina item[0].quantity missing
274 106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie item[0].quantity missing
275 106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie item[1].quantity missing
276 106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie item[2].quantity missing
277 106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie item[3].quantity missing
278 106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie item[4].quantity missing
279 106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie item[5].quantity missing

View File

@@ -0,0 +1,54 @@
source_image,total_billed,sum_item_amount,diff
74441931-f8d0-4cb0-8eb5-df4147a72bbe_mgp,71.9,0.0,71.9
Facture des lentilles de contact-27718628,201.52,223.92,-22.399999999999977
7f084bf4-357d-452d-bb4c-76bff332ea0c_IMG_7791,78.43,0.0,78.43
6eeacb70-27be-40dd-9511-522025b459a3_20241226_080214,65.5,65.05000000000001,0.44999999999998863
Facture ostéopathe-1066603,45.0,65.0,-20.0
fa95b732-7a88-4fea-a8e7-9975dbbc69a8_Facture_0122202407740700218,238.03,218.03000000000003,19.99999999999997
cc0c0b33-1a94-469b-bd88-ca534e257d33_Osteo,60.0,80.0,-20.0
32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337,92.4,75.72,16.680000000000007
e997eddb-05a4-49f9-a7fd-82b48c2694b3_IMG_OPHTALMOLOGIE_20250117_0001,119.48,0.0,119.48
c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo,120.0,0.0,120.0
c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo,20.8,0.0,20.8
edb77e5c-ea68-4bad-a177-1256a3bafc9c_CF_1686176,64.48,59.88,4.600000000000001
Facture ostéopathe-26112640,66.0,65.0,1.0
1498c5f5-22cf-41d4-87f0-9b295ff481f4_IMG_5659,50.0,41.41,8.590000000000003
Facture ostéopathe-25449739,70.0,60.0,10.0
4fa21bd9-393e-41d4-9af2-bf42004f4e94_image,15.93,14.48,1.4499999999999993
895777f3-4478-4e8d-a616-d498850d1523_factures-lunettes_Tess,41.0,79.0,-38.0
6f544990-74c6-4c7e-8a25-08eb3f9138c8_RemboursementSoins01082024_1,24.98,41.41,-16.429999999999996
9901adc0-96d4-4cfd-a028-8286fd1f7841_Fre_Ophtalmo,70.69,72.69,-2.0
5c4353b0-55a4-4687-9744-d1a6da183abd_facture_lentilles_de_contact,78.98,65.82,13.16000000000001
598221c3-d0b3-403a-a261-c68cbe56b453_facture_ophtalmo,44.0,88.0,-44.0
803f61ed-759c-4773-90f2-d8737ef911ab_image,115.0,0.0,115.0
8c8c6ff8-8e9c-49fc-b131-1dce75bcdfc2_Facture_FA1947_04_02_2025,70.0,58.33,11.670000000000002
e6272522-cc2a-44b0-af14-9f54d1a76a59_Facture_lunettes_,691.62,1253.62,-561.9999999999999
8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes,262.0,257.0,5.0
47399c82-030f-4904-b8f4-2c3dc7c46723_image,77.61,0.0,77.61
e0f0eab0-f98c-4b74-95f2-bfc61ad76d28_Ophtalmo_Isa_01-24,76.0,0.0,76.0
DDE prestation Facture des lentilles de contact-26736761,78.0,76.0,2.0
8ed22fd8-18ae-4c54-93ae-2f5cd4ec2af6_17375530503755104210735675998866,73.43,0.0,73.43
7b830a57-2131-479c-bf31-1673e7308903_invoice_2025_1_31,75.95,59.96,15.990000000000002
0a26adda-beef-4977-966c-12efc73f1d15_20250129_190625,74.61,0.0,74.61
bedf599f-99db-4613-aa22-17d52690c34c_doc00194520240521104544,95.29,0.0,95.29
ee2bacc2-7a66-4adf-b4ff-95fc5c13a2da_20250213_112821,10.26,9.629999999999999,0.6300000000000008
172442503141724-04-08_recu_kine_Suz,66.12,0.0,66.12
8019accc-33b4-4f20-95eb-3ff1e1d4db0b_nnn,127.77,0.0,127.77
Facture des lentilles de contact-27700403,41.2,41.8,-0.5999999999999943
9c2763f2-fb0c-4fe4-be38-6f94fba64162_optical_facture_lunettes_apres_accident,551.58,620.5799999999999,-68.99999999999989
1724397366294irm,252.72,0.0,252.72
209e4abe-4973-4fd3-83cc-64f940fb7372_facture_ophtalmologue,72.63,0.0,72.63
e07571f8-3ba4-46fc-a37e-53bd6dd965c7_IMG20250123190152,167.0,174.65,-7.650000000000006
fecefa42-3451-496b-bcb5-d5ebd4cdd8da_invoice_2024_10_14,195.92,163.28,32.639999999999986
44e87f9e-a4e2-4851-b383-e1401b36e853_image,118.14,0.0,118.14
96be3553-7261-4698-bf20-88ca5c826889_ReleveMensuel_Mai,154.04,535.1099999999999,-381.06999999999994
a13a6613-da2d-48b7-96ef-412ba5a88af0_Factures_Clemence_,71.34,39.0,32.34
78ec53bb-a4c5-469f-a4d0-f42db4cf9b11_CF_1697735,64.48,59.88,4.600000000000001
2dbb4025-f570-49f9-83d1-08aedb2b122d_lunettes_Lou-Ann,213.4,253.0,-39.599999999999994
3fe044b5-fd6a-4f5c-accc-ce7b987575a8_IMG_6817,98.61,0.0,98.61
82ac654c-54d4-401f-a006-96d9e46531fe_Facture_ostheo_JEAN,40.0,60.0,-20.0
605bfa50-15ce-48a0-87ec-201c3aa0557a_lunette_auregane_daden_001,627.9,557.7,70.19999999999993
43666807-3eb2-40b9-bdf6-94d4fe31b71e_factures,147.0,105.0,42.0
9c1cc24a-5f7e-42ef-998d-c813a686a7b5_549D8459-524A-4A5C-9612-09EEA1ADDB58,77.61,0.0,77.61
Facture des lentilles de contact-27693842,130.0,150.0,-20.0
626c99a4-986b-401f-b91e-9b6c0f15cce6_Commande_n100691223,39.51,43.9,-4.390000000000001
1 source_image total_billed sum_item_amount diff
2 74441931-f8d0-4cb0-8eb5-df4147a72bbe_mgp 71.9 0.0 71.9
3 Facture des lentilles de contact-27718628 201.52 223.92 -22.399999999999977
4 7f084bf4-357d-452d-bb4c-76bff332ea0c_IMG_7791 78.43 0.0 78.43
5 6eeacb70-27be-40dd-9511-522025b459a3_20241226_080214 65.5 65.05000000000001 0.44999999999998863
6 Facture ostéopathe-1066603 45.0 65.0 -20.0
7 fa95b732-7a88-4fea-a8e7-9975dbbc69a8_Facture_0122202407740700218 238.03 218.03000000000003 19.99999999999997
8 cc0c0b33-1a94-469b-bd88-ca534e257d33_Osteo 60.0 80.0 -20.0
9 32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337 92.4 75.72 16.680000000000007
10 e997eddb-05a4-49f9-a7fd-82b48c2694b3_IMG_OPHTALMOLOGIE_20250117_0001 119.48 0.0 119.48
11 c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo 120.0 0.0 120.0
12 c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo 20.8 0.0 20.8
13 edb77e5c-ea68-4bad-a177-1256a3bafc9c_CF_1686176 64.48 59.88 4.600000000000001
14 Facture ostéopathe-26112640 66.0 65.0 1.0
15 1498c5f5-22cf-41d4-87f0-9b295ff481f4_IMG_5659 50.0 41.41 8.590000000000003
16 Facture ostéopathe-25449739 70.0 60.0 10.0
17 4fa21bd9-393e-41d4-9af2-bf42004f4e94_image 15.93 14.48 1.4499999999999993
18 895777f3-4478-4e8d-a616-d498850d1523_factures-lunettes_Tess 41.0 79.0 -38.0
19 6f544990-74c6-4c7e-8a25-08eb3f9138c8_RemboursementSoins01082024_1 24.98 41.41 -16.429999999999996
20 9901adc0-96d4-4cfd-a028-8286fd1f7841_Fre_Ophtalmo 70.69 72.69 -2.0
21 5c4353b0-55a4-4687-9744-d1a6da183abd_facture_lentilles_de_contact 78.98 65.82 13.16000000000001
22 598221c3-d0b3-403a-a261-c68cbe56b453_facture_ophtalmo 44.0 88.0 -44.0
23 803f61ed-759c-4773-90f2-d8737ef911ab_image 115.0 0.0 115.0
24 8c8c6ff8-8e9c-49fc-b131-1dce75bcdfc2_Facture_FA1947_04_02_2025 70.0 58.33 11.670000000000002
25 e6272522-cc2a-44b0-af14-9f54d1a76a59_Facture_lunettes_ 691.62 1253.62 -561.9999999999999
26 8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes 262.0 257.0 5.0
27 47399c82-030f-4904-b8f4-2c3dc7c46723_image 77.61 0.0 77.61
28 e0f0eab0-f98c-4b74-95f2-bfc61ad76d28_Ophtalmo_Isa_01-24 76.0 0.0 76.0
29 DDE prestation Facture des lentilles de contact-26736761 78.0 76.0 2.0
30 8ed22fd8-18ae-4c54-93ae-2f5cd4ec2af6_17375530503755104210735675998866 73.43 0.0 73.43
31 7b830a57-2131-479c-bf31-1673e7308903_invoice_2025_1_31 75.95 59.96 15.990000000000002
32 0a26adda-beef-4977-966c-12efc73f1d15_20250129_190625 74.61 0.0 74.61
33 bedf599f-99db-4613-aa22-17d52690c34c_doc00194520240521104544 95.29 0.0 95.29
34 ee2bacc2-7a66-4adf-b4ff-95fc5c13a2da_20250213_112821 10.26 9.629999999999999 0.6300000000000008
35 172442503141724-04-08_recu_kine_Suz 66.12 0.0 66.12
36 8019accc-33b4-4f20-95eb-3ff1e1d4db0b_nnn 127.77 0.0 127.77
37 Facture des lentilles de contact-27700403 41.2 41.8 -0.5999999999999943
38 9c2763f2-fb0c-4fe4-be38-6f94fba64162_optical_facture_lunettes_apres_accident 551.58 620.5799999999999 -68.99999999999989
39 1724397366294irm 252.72 0.0 252.72
40 209e4abe-4973-4fd3-83cc-64f940fb7372_facture_ophtalmologue 72.63 0.0 72.63
41 e07571f8-3ba4-46fc-a37e-53bd6dd965c7_IMG20250123190152 167.0 174.65 -7.650000000000006
42 fecefa42-3451-496b-bcb5-d5ebd4cdd8da_invoice_2024_10_14 195.92 163.28 32.639999999999986
43 44e87f9e-a4e2-4851-b383-e1401b36e853_image 118.14 0.0 118.14
44 96be3553-7261-4698-bf20-88ca5c826889_ReleveMensuel_Mai 154.04 535.1099999999999 -381.06999999999994
45 a13a6613-da2d-48b7-96ef-412ba5a88af0_Factures_Clemence_ 71.34 39.0 32.34
46 78ec53bb-a4c5-469f-a4d0-f42db4cf9b11_CF_1697735 64.48 59.88 4.600000000000001
47 2dbb4025-f570-49f9-83d1-08aedb2b122d_lunettes_Lou-Ann 213.4 253.0 -39.599999999999994
48 3fe044b5-fd6a-4f5c-accc-ce7b987575a8_IMG_6817 98.61 0.0 98.61
49 82ac654c-54d4-401f-a006-96d9e46531fe_Facture_ostheo_JEAN 40.0 60.0 -20.0
50 605bfa50-15ce-48a0-87ec-201c3aa0557a_lunette_auregane_daden_001 627.9 557.7 70.19999999999993
51 43666807-3eb2-40b9-bdf6-94d4fe31b71e_factures 147.0 105.0 42.0
52 9c1cc24a-5f7e-42ef-998d-c813a686a7b5_549D8459-524A-4A5C-9612-09EEA1ADDB58 77.61 0.0 77.61
53 Facture des lentilles de contact-27693842 130.0 150.0 -20.0
54 626c99a4-986b-401f-b91e-9b6c0f15cce6_Commande_n100691223 39.51 43.9 -4.390000000000001

View File

@@ -0,0 +1,188 @@
# Label Analysis Report
Input: `008_label_data_sample_seed_1997.json`
## Overview
- Total records: 1232
- Total labels (flattened): 1263
- is_bill distribution: {True: 1124, False: 139}
- bill_paid distribution: {False: 68, True: 1015, None: 180}
- Invoice dates span: 2012-04-17 .. 2025-06-12
- Unique year-month pairs: 63
## Professions (top)
- Ostéopathe: 371
- Ostéopathie: 295
- Optique: 228
- (missing): 119
- Psychologue: 67
- Chiropractie: 57
- Unknown: 20
- Kinésithérapie: 16
- Sophrologie: 8
- Podologue: 8
- Hypnothérapie: 7
- Pharmacie: 7
- Sophrologue: 6
- Ophtalmologie: 6
- Réflexologie: 5
- Radiologie: 5
- Étiopathie: 4
- Sage-femme: 3
- Psychologie: 3
- Soins hospitalier: 3
- Naturopathie: 2
- Addictologie: 2
- Etiopathie: 2
- Kinésiologie: 2
- Diététicienne: 2
- Étiopathe: 2
- Etiopathe: 2
- Psychomotricité: 1
- Homéopathie: 1
- Kinésiologue: 1
- Optique hay Ophtalmologue???: 1
- Médecine du sport: 1
- Otique: 1
- Biologie: 1
- Chiropratie: 1
- Chirurgie dentaire: 1
- Diététique: 1
- Audiopathe: 1
## Currency distribution
- EUR: 1135
- F: 2
## Identifier and key field presence
- adeli_number: 898 present
- rpps_number: 182 present
- finess_number: 139 present
- prescripteur_finess_number: 41 present
- doctor_name: 1040 present
- invoice_issuer: 1120 present
- insured_name: 208 present
- beneficiary_name: 1093 present
- security_number: 492 present
- currency: 1137 present
## Flags
- is_handwriting: {False: 327, None: 880, True: 56}
- is_rotated: {False: 358, None: 879, True: 26}
## Numeric summaries
- total_billed: {'count': 1130, 'min': 9.9, 'p25': 55.0, 'median': 60.0, 'p75': 70.0, 'max': 8000.0, 'mean': 105.40392920353982, 'sum': 119106.44, 'missing': 133}
- amount_paid: {'count': 1011, 'min': 0.0, 'p25': 50.0, 'median': 60.0, 'p75': 66.5, 'max': 8000.0, 'mean': 98.68897131552917, 'sum': 99774.55, 'missing': 252}
- remaining_payment: {'count': 247, 'min': 0.0, 'p25': 0.0, 'median': 0.0, 'p75': 0.0, 'max': 204.06, 'mean': 2.793765182186235, 'sum': 690.06, 'missing': 1016}
- client_part: {'count': 145, 'min': 0.0, 'p25': 26.44, 'median': 48.77, 'p75': 90.0, 'max': 1420.0, 'mean': 99.44468965517241, 'sum': 14419.480000000003, 'missing': 1118}
- mandatory_coverage: {'count': 137, 'min': 0.0, 'p25': 0.09, 'median': 27.63, 'p75': 48.55, 'max': 1232.47, 'mean': 43.21014598540146, 'sum': 5919.790000000002, 'missing': 1126}
- complementary_coverage: {'count': 93, 'min': 0.0, 'p25': 0.0, 'median': 0.0, 'p75': 90.0, 'max': 499.91, 'mean': 63.8452688172043, 'sum': 5937.61, 'missing': 1170}
## Items analysis
- Items per label: count=1263, min=0, max=10, mean=1.25
- total_billed vs sum(items.amount) mismatches: 53
## Data quality issues (sample)
- Facture médecine douce-27776417: item[0].quantity missing
- 551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024: item[0].quantity missing
- 551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024: item[1].quantity missing
- 551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024: item[2].quantity missing
- 551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024: item[3].quantity missing
- 551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024: item[4].quantity missing
- 551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024: item[5].quantity missing
- Facture médecine douce-27640153: item[0].quantity missing
- Facture médecine douce-27640153: item[1].quantity missing
- 1724397366229Facture_RIVALLANDChrystelle_20240413ICQO: bill_paid True but amount_paid missing
- Facture ostéopathe-25417095: bill_paid True but remaining_payment > 0
- Facture ostéopathe-GED N-R-2323015: item[0].quantity missing
- 74441931-f8d0-4cb0-8eb5-df4147a72bbe_mgp: item[0].quantity missing
- 74441931-f8d0-4cb0-8eb5-df4147a72bbe_mgp: currency missing for bill
- 74441931-f8d0-4cb0-8eb5-df4147a72bbe_mgp: currency missing for bill
- 1724658126141JACOB_Francois_2024-111: item[0].quantity missing
- Facture des lentilles de contact-27559701: item[5].amount negative: -6.67
- 91c41171-ac88-4ce4-8f15-48c4ad2c5aa2_Devis_lunette: item[0].quantity missing
- 0252b5fb-bc84-4fca-bfa7-483ac611885a_Scan_0115: item[0].quantity missing
- 0252b5fb-bc84-4fca-bfa7-483ac611885a_Scan_0115: item[1].quantity missing
- 7f084bf4-357d-452d-bb4c-76bff332ea0c_IMG_7791: item[0].quantity missing
- 7f084bf4-357d-452d-bb4c-76bff332ea0c_IMG_7791: item[1].quantity missing
- 70c3e7f5-405b-49d8-a73c-dae83beafb59_Facture_ophtalmo: item[0].quantity missing
- 70c3e7f5-405b-49d8-a73c-dae83beafb59_Facture_ophtalmo: item[1].quantity missing
- 70c3e7f5-405b-49d8-a73c-dae83beafb59_Facture_ophtalmo: item[2].quantity missing
- Facture médecine douce-27776613: item[0].quantity missing
- 6eeacb70-27be-40dd-9511-522025b459a3_20241226_080214: item[0].quantity missing
- 6eeacb70-27be-40dd-9511-522025b459a3_20241226_080214: item[1].quantity missing
- 1724399050339FACTURE_132120424_MME_RONDIN_MAHEVA: item[0].quantity missing
- ec117b21-9348-4a76-9a22-2bae87639392_facture_du_14.02_Tony: item[0].quantity missing
- ec117b21-9348-4a76-9a22-2bae87639392_facture_du_14.02_Tony: item[1].quantity missing
- Facture ostéopathe-25979892: item[0].quantity missing
- bf09a877-0705-4c79-b908-6e8da44e44c7_17490641879424403676372514736136: item[0].quantity missing
- bf09a877-0705-4c79-b908-6e8da44e44c7_17490641879424403676372514736136: item[1].quantity missing
- d4f310c8-3bcf-4bb5-bf53-27ad74d66c1e_FACTURE_OPTICIEN_OPHTALMO: item[0].quantity missing
- DDE prestation Facture des lentilles de contact-26740934: item[0].quantity missing
- 45ccffcd-5fe8-418b-80c2-894086de9087_facture_ophtalmo_Fred_: item[0].quantity missing
- 45ccffcd-5fe8-418b-80c2-894086de9087_facture_ophtalmo_Fred_: item[1].quantity missing
- Facture ostéopathe-26626551: item[0].quantity missing
- 27ddc814-9239-4944-951a-fdd8a48c5926_IMG_20240922_0001: item[0].quantity missing
- 27ddc814-9239-4944-951a-fdd8a48c5926_IMG_20240922_0001: item[1].quantity missing
- 27ddc814-9239-4944-951a-fdd8a48c5926_IMG_20240922_0001: item[2].quantity missing
- 27ddc814-9239-4944-951a-fdd8a48c5926_IMG_20240922_0001: item[3].quantity missing
- 8eceb56a-b571-4ab6-a64e-668db6bf4ad5_chiropracteur_fevrier_25: item[0].quantity missing
- Facture des lentilles de contact-27576495: item[2].quantity missing
- 32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337: item[0].quantity missing
- 32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337: item[1].quantity missing
- 32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337: item[2].quantity missing
- a5bd8b97-a2a8-49ae-900d-0673d2f96637_quittance_optique_: item[0].quantity missing
- e997eddb-05a4-49f9-a7fd-82b48c2694b3_IMG_OPHTALMOLOGIE_20250117_0001: item[0].quantity missing
## Plots
- is_bill
![](plots/is_bill.png)
- bill_paid
![](plots/bill_paid.png)
- is_handwriting
![](plots/is_handwriting.png)
- is_rotated
![](plots/is_rotated.png)
- professions_top20
![](plots/professions_top20.png)
- currency
![](plots/currency.png)
- invoice_year_month
![](plots/invoice_year_month.png)
- items_per_label
![](plots/items_per_label.png)
- hist_total_billed
![](plots/hist_total_billed.png)
- hist_amount_paid
![](plots/hist_amount_paid.png)
- hist_remaining_payment
![](plots/hist_remaining_payment.png)
- hist_client_part
![](plots/hist_client_part.png)
- hist_mandatory_coverage
![](plots/hist_mandatory_coverage.png)
- hist_complementary_coverage
![](plots/hist_complementary_coverage.png)

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 25 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 39 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 17 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 89 KiB

View File

@@ -0,0 +1,39 @@
profession,count
Ostéopathe,371
Ostéopathie,295
Optique,228
(missing),119
Psychologue,67
Chiropractie,57
Unknown,20
Kinésithérapie,16
Sophrologie,8
Podologue,8
Hypnothérapie,7
Pharmacie,7
Sophrologue,6
Ophtalmologie,6
Réflexologie,5
Radiologie,5
Étiopathie,4
Sage-femme,3
Psychologie,3
Soins hospitalier,3
Naturopathie,2
Addictologie,2
Etiopathie,2
Kinésiologie,2
Diététicienne,2
Étiopathe,2
Etiopathe,2
Psychomotricité,1
Homéopathie,1
Kinésiologue,1
Optique hay Ophtalmologue???,1
Médecine du sport,1
Otique,1
Biologie,1
Chiropratie,1
Chirurgie dentaire,1
Diététique,1
Audiopathe,1
1 profession count
2 Ostéopathe 371
3 Ostéopathie 295
4 Optique 228
5 (missing) 119
6 Psychologue 67
7 Chiropractie 57
8 Unknown 20
9 Kinésithérapie 16
10 Sophrologie 8
11 Podologue 8
12 Hypnothérapie 7
13 Pharmacie 7
14 Sophrologue 6
15 Ophtalmologie 6
16 Réflexologie 5
17 Radiologie 5
18 Étiopathie 4
19 Sage-femme 3
20 Psychologie 3
21 Soins hospitalier 3
22 Naturopathie 2
23 Addictologie 2
24 Etiopathie 2
25 Kinésiologie 2
26 Diététicienne 2
27 Étiopathe 2
28 Etiopathe 2
29 Psychomotricité 1
30 Homéopathie 1
31 Kinésiologue 1
32 Optique hay Ophtalmologue??? 1
33 Médecine du sport 1
34 Otique 1
35 Biologie 1
36 Chiropratie 1
37 Chirurgie dentaire 1
38 Diététique 1
39 Audiopathe 1

View File

@@ -0,0 +1,4 @@
bill_paid,count
False,74
True,1011
,174
1 bill_paid count
2 False 74
3 True 1011
4 174

View File

@@ -0,0 +1,3 @@
currency,count
EUR,1137
F,2
1 currency count
2 EUR 1137
3 F 2

View File

@@ -0,0 +1,11 @@
field,present_count
adeli_number,902
rpps_number,179
finess_number,133
prescripteur_finess_number,42
doctor_name,1051
invoice_issuer,1122
insured_name,203
beneficiary_name,1098
security_number,494
currency,1139
1 field present_count
2 adeli_number 902
3 rpps_number 179
4 finess_number 133
5 prescripteur_finess_number 42
6 doctor_name 1051
7 invoice_issuer 1122
8 insured_name 203
9 beneficiary_name 1098
10 security_number 494
11 currency 1139

View File

@@ -0,0 +1,3 @@
is_bill,count
True,1126
False,133
1 is_bill count
2 True 1126
3 False 133

277
filter/check_009/issues.csv Normal file
View File

@@ -0,0 +1,277 @@
source,issue
eedda33e-049b-4ab8-aee8-6f5d0c88b500_IMG_0358,item[0].quantity missing
eedda33e-049b-4ab8-aee8-6f5d0c88b500_IMG_0358,item[1].quantity missing
e72e49e0-7891-4dc8-b62f-40c7bf6252a4_Nuria_Ravier_Ophtalomologiste,item[0].quantity missing
e72e49e0-7891-4dc8-b62f-40c7bf6252a4_Nuria_Ravier_Ophtalomologiste,item[0].quantity missing
Facture ostéopathe-25417096,item[0].quantity missing
eb5561dd-937b-4c0b-b6c6-56da8b528c94_facture_vision_claire_ophtalmo_1_001,item[0].quantity missing
6f544990-74c6-4c7e-8a25-08eb3f9138c8_RemboursementSoins01082024_1,item[0].quantity missing
6f544990-74c6-4c7e-8a25-08eb3f9138c8_RemboursementSoins01082024_1,item[1].quantity missing
1150ac6a-abdc-409d-b7ca-296e7b35769c_Facture_BOLZERJean_Francois_20250131GMAM,item[0].quantity missing
Facture ostéopathe-26738638,item[0].quantity missing
34162b51-6c3f-4dcc-a18a-d6b9e45e4f22_Recu_ophtalmo_Cadet_Corine_2025-03-09,bill_paid True but remaining_payment > 0
34162b51-6c3f-4dcc-a18a-d6b9e45e4f22_Recu_ophtalmo_Cadet_Corine_2025-03-09,item[0].quantity missing
34162b51-6c3f-4dcc-a18a-d6b9e45e4f22_Recu_ophtalmo_Cadet_Corine_2025-03-09,item[1].quantity missing
abff81b4-d895-47d5-b3d4-93da48f5c1e6_20241212_172044,item[0].quantity missing
abff81b4-d895-47d5-b3d4-93da48f5c1e6_20241212_172044,item[1].quantity missing
2c75bd51-e61c-452e-b19b-494fa70e9cc9_Facture_lentilles_janvier_2024,item[4].amount negative: -2.9
2c75bd51-e61c-452e-b19b-494fa70e9cc9_Facture_lentilles_janvier_2024,item[5].amount negative: -3.0
Facture ostéopathe-26605349,item[0].quantity missing
Facture médecine douce-27702698,item[0].quantity missing
6221379e-7344-4868-9ee0-04f2c4e26874_consultation_ophtalmo27janvier2025BEAUPERINyann,item[0].quantity missing
f55f9121-499d-4c25-b79b-464413d7ca6c_Quittance_Ophtalmologue,item[0].quantity missing
f55f9121-499d-4c25-b79b-464413d7ca6c_Quittance_Ophtalmologue,item[0].quantity missing
62b45277-a437-48c6-af52-d1814f23434f_17460042180346042803920585280783,item[0].quantity missing
62b45277-a437-48c6-af52-d1814f23434f_17460042180346042803920585280783,item[1].quantity missing
Facture ostéopathe-25847017,bill_paid True but remaining_payment > 0
02358c4c-820a-409f-9489-a75f22af3f32_image,item[0].quantity missing
02358c4c-820a-409f-9489-a75f22af3f32_image,item[1].quantity missing
7ef7306d-400e-4a56-b31c-02237212c470_Scan,item[0].quantity missing
7ef7306d-400e-4a56-b31c-02237212c470_Scan,item[1].quantity missing
f482ebef-e885-4627-a927-96ad54ab5aa0_Facture,item[0].quantity missing
f482ebef-e885-4627-a927-96ad54ab5aa0_Facture,item[1].quantity missing
f482ebef-e885-4627-a927-96ad54ab5aa0_Facture,item[3].quantity missing
f482ebef-e885-4627-a927-96ad54ab5aa0_Facture,item[4].quantity missing
f482ebef-e885-4627-a927-96ad54ab5aa0_Facture,item[6].quantity missing
f482ebef-e885-4627-a927-96ad54ab5aa0_Facture,item[7].quantity missing
f482ebef-e885-4627-a927-96ad54ab5aa0_Facture,item[8].quantity missing
f482ebef-e885-4627-a927-96ad54ab5aa0_Facture,item[9].quantity missing
f482ebef-e885-4627-a927-96ad54ab5aa0_Facture,item[11].quantity missing
Facture ostéopathe-26610949,item[0].quantity missing
8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes,item[0].quantity missing
8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes,item[1].quantity missing
8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes,item[2].quantity missing
8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes,item[3].quantity missing
8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes,item[4].quantity missing
bf09a877-0705-4c79-b908-6e8da44e44c7_17490641879424403676372514736136,item[0].quantity missing
bf09a877-0705-4c79-b908-6e8da44e44c7_17490641879424403676372514736136,item[1].quantity missing
17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo,item[0].quantity missing
17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo,item[1].quantity missing
17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo,item[0].quantity missing
e566a3b0-cc99-4c4d-8715-9f3be841a066_noname,item[0].quantity missing
e566a3b0-cc99-4c4d-8715-9f3be841a066_noname,item[1].quantity missing
c6013936-5850-4027-9f61-32c30486e6d5_optique,item[0].quantity missing
c6013936-5850-4027-9f61-32c30486e6d5_optique,item[1].quantity missing
47399c82-030f-4904-b8f4-2c3dc7c46723_image,item[0].quantity missing
47399c82-030f-4904-b8f4-2c3dc7c46723_image,item[1].quantity missing
57188e7d-280a-4f94-9d7d-6af62ede30ed_lunettes,item[0].quantity missing
209e4abe-4973-4fd3-83cc-64f940fb7372_facture_ophtalmologue,item[0].quantity missing
209e4abe-4973-4fd3-83cc-64f940fb7372_facture_ophtalmologue,item[1].quantity missing
85120394-0ea3-4a2c-b0d4-11b0fe31a4e7_20250213_145033,item[0].quantity missing
85120394-0ea3-4a2c-b0d4-11b0fe31a4e7_20250213_145033,item[1].quantity missing
Facture ostéopathe-26972680,item[0].quantity missing
Facture médecine douce-27784971,item[0].quantity missing
8de5aacb-e142-4321-aaaf-c4179f031d7a_20240810152224_001,item[0].quantity missing
8de5aacb-e142-4321-aaaf-c4179f031d7a_20240810152224_001,item[1].quantity missing
8de5aacb-e142-4321-aaaf-c4179f031d7a_20240810152224_001,item[2].quantity missing
Facture ostéopathe-26479073,item[0].quantity missing
b5cfbf34-f15b-4f2e-bbe9-e83c86549d98_FACTURE_OPHTALMO,item[0].quantity missing
b5cfbf34-f15b-4f2e-bbe9-e83c86549d98_FACTURE_OPHTALMO,item[1].quantity missing
43666807-3eb2-40b9-bdf6-94d4fe31b71e_factures,item[0].quantity missing
43666807-3eb2-40b9-bdf6-94d4fe31b71e_factures,item[1].quantity missing
43666807-3eb2-40b9-bdf6-94d4fe31b71e_factures,item[2].quantity missing
70e53730-c41c-402b-a227-2262cdd9fa7b_osteo_Laurette,item[0].quantity missing
1724226151795facture_du_30_mars_2024,item[0].quantity missing
Facture médecine douce-27688568,bill_paid True but amount_paid missing
Facture ostéopathe-26626551,item[0].quantity missing
Facture des lentilles de contact-27559701,item[5].amount negative: -6.67
Facture des lentilles de contact-27601451,item[0].quantity missing
Facture des lentilles de contact-27601451,item[1].quantity missing
Facture des lentilles de contact-27680116,item[0].quantity missing
Facture des lentilles de contact-27680116,item[1].quantity missing
Facture médecine douce-27640153,item[0].quantity missing
Facture médecine douce-27640153,item[1].quantity missing
Facture des lentilles de contact-27653858,item[4].amount negative: -2.9
Facture des lentilles de contact-27653858,item[5].amount negative: -3.0
Facture des lentilles de contact-27545715,item[1].quantity missing
ed44bada-dfae-4ab1-b810-e47cec83e3d9_PXL_20250206_222803759.MP,item[0].quantity missing
f6fc955d-e941-4a94-892b-ffebcfbb949a_image,item[0].quantity missing
f6fc955d-e941-4a94-892b-ffebcfbb949a_image,item[1].quantity missing
f6fc955d-e941-4a94-892b-ffebcfbb949a_image,item[2].quantity missing
252c4ad4-8fa3-4ed1-9508-ee77f6dce6fe_img20250206_20040851,item[0].quantity missing
c53ffdcc-0ee6-4bf2-8c43-923c5471310f_IMG_7285,item[0].quantity missing
c53ffdcc-0ee6-4bf2-8c43-923c5471310f_IMG_7285,item[1].quantity missing
d8203a60-049f-4f78-befd-556987e766d7_Facture_Mme_GLACON_Adeline,item[0].quantity missing
d8203a60-049f-4f78-befd-556987e766d7_Facture_Mme_GLACON_Adeline,item[1].quantity missing
821b6297-8c00-4f88-89a7-a9213c38c3e8_JEAN_PIERRE_001,item[0].quantity missing
821b6297-8c00-4f88-89a7-a9213c38c3e8_JEAN_PIERRE_001,item[1].quantity missing
b442261a-d504-4cda-98cd-e22a6875bfd0_M._ZIMMER_2401,item[0].quantity missing
819f6faf-d9f3-4668-9000-e414d92c7f41_ROYER_LUDIVINE_FACTURE_OPHTALMO,item[0].quantity missing
819f6faf-d9f3-4668-9000-e414d92c7f41_ROYER_LUDIVINE_FACTURE_OPHTALMO,item[1].quantity missing
Facture ostéopathe-25152747,bill_paid True but amount_paid missing
DDE prestation Facture des lentilles de contact-26705902,item[3].amount negative: -3.0
Facture ostéopathe-26750142,item[0].quantity missing
9934f5be-de66-495a-a478-f34e4ced1fa5_Facture_lunettes_Emnah,item[0].quantity missing
9934f5be-de66-495a-a478-f34e4ced1fa5_Facture_lunettes_Emnah,item[1].quantity missing
9934f5be-de66-495a-a478-f34e4ced1fa5_Facture_lunettes_Emnah,item[2].quantity missing
9934f5be-de66-495a-a478-f34e4ced1fa5_Facture_lunettes_Emnah,item[3].quantity missing
9934f5be-de66-495a-a478-f34e4ced1fa5_Facture_lunettes_Emnah,item[4].quantity missing
9934f5be-de66-495a-a478-f34e4ced1fa5_Facture_lunettes_Emnah,item[5].quantity missing
e997eddb-05a4-49f9-a7fd-82b48c2694b3_IMG_OPHTALMOLOGIE_20250117_0001,item[0].quantity missing
fa25423e-b79c-48f9-89ae-32ffd95e3101_Facture_Ophtalmologie,item[0].quantity missing
91c41171-ac88-4ce4-8f15-48c4ad2c5aa2_Devis_lunette,item[0].quantity missing
DDE prestation Facture des lentilles de contact-26736761,item[0].quantity missing
DDE prestation Facture des lentilles de contact-26736761,item[1].quantity missing
Facture ostéopathe-26077624,item[0].quantity missing
Facture médecine douce-27701986,item[0].quantity missing
801fa72b-9dba-4852-9378-3b69ea925679_17354854480549213702943309634816,item[0].quantity missing
Facture médecine douce-27784881,item[0].quantity missing
d3ef8b3d-b78f-4455-b2d6-1c9ef9ec5e60_Quittance_Ophtalmologue,item[0].quantity missing
d3ef8b3d-b78f-4455-b2d6-1c9ef9ec5e60_Quittance_Ophtalmologue,item[0].quantity missing
Facture des lentilles de contact-27654482,item[3].amount negative: -5.9
Facture des lentilles de contact-27654482,item[4].amount negative: -17.38
Facture médecine douce-27784857,item[0].quantity missing
DDE prestation Facture des lentilles de contact-26729570,item[0].quantity missing
1724156677847Facture_osteophate_13_08_2024_POIGNANT_Thierry,item[0].quantity missing
5cf26866-c1ae-4dfd-8793-54ef5148224c_FACTURE_OSTEO_SILLON_DU_22-11-2024,item[0].quantity missing
Facture ostéopathe-26300731,item[0].quantity missing
Facture ostéopathe-26300731,item[1].quantity missing
Facture ostéopathe-26300731,item[2].quantity missing
dc4e7302-925d-438f-82d5-e78510d858c5_7447113,item[3].amount negative: -5.9
dc4e7302-925d-438f-82d5-e78510d858c5_7447113,item[4].amount negative: -5.0
Facture ostéopathe-25089174,item[0].quantity missing
d9e1cb2a-b44b-4cc8-b07a-030ddf3acd28_Ordonnance_lentilles_Mary,bill_paid True but amount_paid missing
1724658376437factosteo,item[0].quantity missing
Facture des lentilles de contact-27708923,item[0].quantity missing
b46e8411-29d3-44b6-ae65-55f11bf360bd_FACTURE_OCULISTE_JANE,item[0].quantity missing
b46e8411-29d3-44b6-ae65-55f11bf360bd_FACTURE_OCULISTE_JANE,item[1].quantity missing
b46e8411-29d3-44b6-ae65-55f11bf360bd_FACTURE_OCULISTE_JANE,item[2].quantity missing
27c16ad0-3309-4eb1-9432-ab8a546fee7c_osteo,currency missing for bill
c2009b55-58d4-411f-a25a-3dc1e32e4a89_facture_lentille,item[3].amount negative: -3.0
25f19f18-b19d-4e1e-b105-349dd1584c81_75109795868__1FEA46AA-A04E-44AF-9748-541DE3F7B438,item[0].quantity missing
25f19f18-b19d-4e1e-b105-349dd1584c81_75109795868__1FEA46AA-A04E-44AF-9748-541DE3F7B438,item[1].quantity missing
1724399050339FACTURE_132120424_MME_RONDIN_MAHEVA,item[0].quantity missing
64fa4c19-efa4-48f1-acf8-8c2e8f573b8b_Facture_osteopathe_,item[0].quantity missing
c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo,item[0].quantity missing
c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo,item[1].quantity missing
c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo,item[0].quantity missing
bae2e636-32c5-4b2c-b2d2-a7b28b30760f_KM_C250i24082011140,item[0].quantity missing
bae2e636-32c5-4b2c-b2d2-a7b28b30760f_KM_C250i24082011140,item[1].quantity missing
e0f0eab0-f98c-4b74-95f2-bfc61ad76d28_Ophtalmo_Isa_01-24,item[0].quantity missing
e0f0eab0-f98c-4b74-95f2-bfc61ad76d28_Ophtalmo_Isa_01-24,item[1].quantity missing
Facture des lentilles de contact-27647325,item[0].quantity missing
1724399050380osteo_Lina,item[0].quantity missing
70c3e7f5-405b-49d8-a73c-dae83beafb59_Facture_ophtalmo,item[0].quantity missing
70c3e7f5-405b-49d8-a73c-dae83beafb59_Facture_ophtalmo,item[1].quantity missing
70c3e7f5-405b-49d8-a73c-dae83beafb59_Facture_ophtalmo,item[2].quantity missing
66de7232-78a6-46a0-8392-47ec4630ad31_image,item[0].quantity missing
66de7232-78a6-46a0-8392-47ec4630ad31_image,item[1].quantity missing
66de7232-78a6-46a0-8392-47ec4630ad31_image,item[2].quantity missing
0be67d5b-5af6-49ba-8851-d3976f9920a0_Facture_optalmo_Emma_PINSARD,item[0].quantity missing
0be67d5b-5af6-49ba-8851-d3976f9920a0_Facture_optalmo_Emma_PINSARD,item[1].quantity missing
b79c0ed4-4126-4ff5-86a8-0348cd1c308c_IMG_0048,item[0].quantity missing
b79c0ed4-4126-4ff5-86a8-0348cd1c308c_IMG_0048,item[1].quantity missing
b79c0ed4-4126-4ff5-86a8-0348cd1c308c_IMG_0048,item[2].quantity missing
Facture ostéopathe-26252579,item[0].quantity missing
Facture ostéopathe-26252579,item[1].quantity missing
Facture ostéopathe-26252579,item[2].quantity missing
Facture ostéopathe-26252579,item[3].quantity missing
a5bd8b97-a2a8-49ae-900d-0673d2f96637_quittance_optique_,item[0].quantity missing
8b6539db-21b4-41dc-a27f-db2cd351fe5f_Doc_34,item[0].quantity missing
172465822585720240416133632216,item[0].quantity missing
Facture médecine douce-27781814,item[0].quantity missing
Facture ostéopathe-26349654,item[0].quantity missing
4950b870-6662-4fcd-a7e9-1fc3abd0fdc9_IMG_20250321_182918,item[0].quantity missing
4950b870-6662-4fcd-a7e9-1fc3abd0fdc9_IMG_20250321_182918,currency missing for bill
cbc7ddff-2ed1-441b-85cb-5c1079588a52_Mes_des_factures,item[0].quantity missing
cbc7ddff-2ed1-441b-85cb-5c1079588a52_Mes_des_factures,item[1].quantity missing
91efe623-7d6b-4a96-8b03-87199c56566a_image,item[0].quantity missing
DDE prestation Facture des lentilles de contact-26705131,item[2].amount negative: -5.0
Facture des lentilles de contact-27620389,item[0].quantity missing
Facture des lentilles de contact-27620389,item[1].quantity missing
2da98e03-19d0-430e-90da-a09706de143b_Facture_Lentilles_Stephanie_Marchenay_,item[0].quantity missing
Facture-quittance toutes spécialités-27648502,item[0].quantity missing
Facture médecine douce-27776613,item[0].quantity missing
DDE prestation Facture des lentilles de contact-26685394,item[0].quantity missing
ec6baa8c-a189-4850-b310-c90bab372cc2_Facture_Krys,bill_paid True but remaining_payment > 0
DDE prestation Facture des lentilles de contact-26772555,item[0].quantity missing
DDE prestation Facture des lentilles de contact-26772555,item[1].quantity missing
Facture des lentilles de contact-27700117,item[0].quantity missing
1724399050308image,item[0].quantity missing
179673c3-7b80-47cb-9467-3425e010905a_ordonnance08.10.2024,item[0].quantity missing
179673c3-7b80-47cb-9467-3425e010905a_ordonnance08.10.2024,item[1].quantity missing
cdea6c95-be16-4d2b-ba78-95ef182d8e63_IMG_20250523_084341,item[0].quantity missing
Facture ostéopathe-25073562,item[0].quantity missing
499639d7-4faa-4d3e-b412-7f75da0b4e01_quercy,item[0].quantity missing
499639d7-4faa-4d3e-b412-7f75da0b4e01_quercy,item[1].quantity missing
Facture ostéopathe-26189649,item[0].quantity missing
Facture psychologie - 27581557,item[0].quantity missing
Facture psychologie - 27581557,item[0].quantity missing
fdaa0d42-8e79-4506-b222-373c88891ff5_notehonoraire__20250212_143947,item[0].quantity missing
92340ad4-27cf-476b-a842-051d0b6a2c9f_IMG_20250619_231842,item[0].quantity missing
Facture ostéopathe-1142513,item[0].quantity missing
Facture ostéopathe-26447822,bill_paid True but amount_paid missing
1724156469365facture_osteo08.24,item[0].quantity missing
1eea4521-e98f-4f41-911b-07ffaf3b52a0_Facture_MGP,item[0].quantity missing
1eea4521-e98f-4f41-911b-07ffaf3b52a0_Facture_MGP,item[1].quantity missing
Facture ostéopathe-26595219,item[0].quantity missing
0aa44c3d-2dc8-47fc-9307-d1485df454c1_20250207152016_001,item[0].quantity missing
Facture ostéopathe-25416514,item[0].quantity missing
Sans titre,item[0].quantity missing
707771f6-bb3b-4e5e-8d7d-acd584a23009_IMG_20240726_142929,item[0].quantity missing
707771f6-bb3b-4e5e-8d7d-acd584a23009_IMG_20240726_142929,item[1].quantity missing
d35e4ea0-6678-4dd1-b0d5-fe905d262430_FACTURE_GAFFE_Thierry,item[0].quantity missing
Facture ostéopathe-25921103,item[0].quantity missing
DDE prestation Facture des lentilles de contact-26764274,item[0].quantity missing
DDE prestation Facture des lentilles de contact-26764274,item[1].quantity missing
1724658376419facture_ostheo_avril_24,item[0].quantity missing
Facture des lentilles de contact-27654407,bill_paid True but amount_paid missing
Facture ostéopathe-26610950,item[0].quantity missing
Facture des lentilles de contact-27668729,item[4].amount negative: -4.9
Facture des lentilles de contact-27668729,item[5].amount negative: -12.8
a05800f6-c1f1-4d50-9598-98e0f90b83f0_17476639221036811513370155179329,item[0].quantity missing
32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337,item[0].quantity missing
32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337,item[1].quantity missing
32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337,item[2].quantity missing
Facture ostéopathe-1119174,item[0].quantity missing
Facture ostéopathe-26298895,item[0].quantity missing
82ac654c-54d4-401f-a006-96d9e46531fe_Facture_ostheo_JEAN,item[0].quantity missing
Facture ostéopathe-26585743,item[0].quantity missing
106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie,item[0].quantity missing
106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie,item[1].quantity missing
106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie,item[2].quantity missing
106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie,item[3].quantity missing
106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie,item[4].quantity missing
106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie,item[5].quantity missing
284cb22c-17ad-409b-8d5f-79e43a6dd51b_Facture_,item[0].quantity missing
fe86f6a8-8360-458e-834c-805ac99350cf_Monsieur_OUEDRAOGO_DAOUDA,item[0].quantity missing
fe86f6a8-8360-458e-834c-805ac99350cf_Monsieur_OUEDRAOGO_DAOUDA,item[1].quantity missing
Facture des lentilles de contact-27576127,item[2].amount negative: -53.0
1724425079263JACOB_Francois_2024-111,item[0].quantity missing
46ddeec5-97bf-4175-b214-b7bd74c4fa24_Renouvin_Ferlicot_Andre,item[0].quantity missing
46ddeec5-97bf-4175-b214-b7bd74c4fa24_Renouvin_Ferlicot_Andre,item[1].quantity missing
0a26adda-beef-4977-966c-12efc73f1d15_20250129_190625,item[0].quantity missing
0a26adda-beef-4977-966c-12efc73f1d15_20250129_190625,item[1].quantity missing
Facture ostéopathe-26699279,item[0].quantity missing
Facture ostéopathe-1216961,bill_paid True but amount_paid missing
Facture ostéopathe-1216961,item[0].quantity missing
Facture ostéopathe-26935993,item[0].quantity missing
1724425079207douilly_6,item[0].quantity missing
Facture ostéopathe-26298310,item[0].quantity missing
8019accc-33b4-4f20-95eb-3ff1e1d4db0b_nnn,item[0].quantity missing
8019accc-33b4-4f20-95eb-3ff1e1d4db0b_nnn,item[1].quantity missing
379ed230-4e27-4f61-9039-226972f26cc9_Factures_Dani_medecin_,item[0].quantity missing
379ed230-4e27-4f61-9039-226972f26cc9_Factures_Dani_medecin_,item[1].quantity missing
b410345a-31de-4397-a896-bc11d97e524c_FACTURE_OPHTALMOLOGISTE,item[0].quantity missing
0268b186-6926-46af-b75f-ba01c02cb647_IMG_20240514_152542,item[0].quantity missing
0bfb5041-2751-421d-b5d5-60d696c361c1_17391960081576654684571490136238,item[0].quantity missing
598221c3-d0b3-403a-a261-c68cbe56b453_facture_ophtalmo,item[0].quantity missing
598221c3-d0b3-403a-a261-c68cbe56b453_facture_ophtalmo,item[1].quantity missing
1724226151831IMG_8005,item[0].quantity missing
c8fa8d11-c2df-4f39-9909-28790acc5957_Factures_opthalmo,item[0].quantity missing
50eeaebf-d552-4a12-92a4-66d185bb992b_facture_optique,item[0].quantity missing
50eeaebf-d552-4a12-92a4-66d185bb992b_facture_optique,item[1].quantity missing
50eeaebf-d552-4a12-92a4-66d185bb992b_facture_optique,item[2].quantity missing
Facture ostéopathe-26744045,item[0].quantity missing
1724425031419Facture_Osteo_20240417,item[0].quantity missing
Facture ostéopathe-25187936,item[0].quantity missing
ec117b21-9348-4a76-9a22-2bae87639392_facture_du_14.02_Tony,item[0].quantity missing
ec117b21-9348-4a76-9a22-2bae87639392_facture_du_14.02_Tony,item[1].quantity missing
Facture ostéopathe-26052732,item[0].quantity missing
de6a66cd-d729-479c-a119-8b9e442702db_Screenshot_20250227_112521_Gallery,item[0].quantity missing
de6a66cd-d729-479c-a119-8b9e442702db_Screenshot_20250227_112521_Gallery,item[1].quantity missing
Facture médecine douce-27709304,item[0].quantity missing
DDE prestation Facture des lentilles de contact-26715605,item[0].quantity missing
Facture médecine douce-27702699,item[0].quantity missing
70546a38-b6d7-4151-b08a-82b01c8e4a98_480755849_2183001178819517_1003204107786000285_n,item[0].quantity missing
1724658298461facture_podologue_alicia,item[0].quantity missing
1 source issue
2 eedda33e-049b-4ab8-aee8-6f5d0c88b500_IMG_0358 item[0].quantity missing
3 eedda33e-049b-4ab8-aee8-6f5d0c88b500_IMG_0358 item[1].quantity missing
4 e72e49e0-7891-4dc8-b62f-40c7bf6252a4_Nuria_Ravier_Ophtalomologiste item[0].quantity missing
5 e72e49e0-7891-4dc8-b62f-40c7bf6252a4_Nuria_Ravier_Ophtalomologiste item[0].quantity missing
6 Facture ostéopathe-25417096 item[0].quantity missing
7 eb5561dd-937b-4c0b-b6c6-56da8b528c94_facture_vision_claire_ophtalmo_1_001 item[0].quantity missing
8 6f544990-74c6-4c7e-8a25-08eb3f9138c8_RemboursementSoins01082024_1 item[0].quantity missing
9 6f544990-74c6-4c7e-8a25-08eb3f9138c8_RemboursementSoins01082024_1 item[1].quantity missing
10 1150ac6a-abdc-409d-b7ca-296e7b35769c_Facture_BOLZERJean_Francois_20250131GMAM item[0].quantity missing
11 Facture ostéopathe-26738638 item[0].quantity missing
12 34162b51-6c3f-4dcc-a18a-d6b9e45e4f22_Recu_ophtalmo_Cadet_Corine_2025-03-09 bill_paid True but remaining_payment > 0
13 34162b51-6c3f-4dcc-a18a-d6b9e45e4f22_Recu_ophtalmo_Cadet_Corine_2025-03-09 item[0].quantity missing
14 34162b51-6c3f-4dcc-a18a-d6b9e45e4f22_Recu_ophtalmo_Cadet_Corine_2025-03-09 item[1].quantity missing
15 abff81b4-d895-47d5-b3d4-93da48f5c1e6_20241212_172044 item[0].quantity missing
16 abff81b4-d895-47d5-b3d4-93da48f5c1e6_20241212_172044 item[1].quantity missing
17 2c75bd51-e61c-452e-b19b-494fa70e9cc9_Facture_lentilles_janvier_2024 item[4].amount negative: -2.9
18 2c75bd51-e61c-452e-b19b-494fa70e9cc9_Facture_lentilles_janvier_2024 item[5].amount negative: -3.0
19 Facture ostéopathe-26605349 item[0].quantity missing
20 Facture médecine douce-27702698 item[0].quantity missing
21 6221379e-7344-4868-9ee0-04f2c4e26874_consultation_ophtalmo27janvier2025BEAUPERINyann item[0].quantity missing
22 f55f9121-499d-4c25-b79b-464413d7ca6c_Quittance_Ophtalmologue item[0].quantity missing
23 f55f9121-499d-4c25-b79b-464413d7ca6c_Quittance_Ophtalmologue item[0].quantity missing
24 62b45277-a437-48c6-af52-d1814f23434f_17460042180346042803920585280783 item[0].quantity missing
25 62b45277-a437-48c6-af52-d1814f23434f_17460042180346042803920585280783 item[1].quantity missing
26 Facture ostéopathe-25847017 bill_paid True but remaining_payment > 0
27 02358c4c-820a-409f-9489-a75f22af3f32_image item[0].quantity missing
28 02358c4c-820a-409f-9489-a75f22af3f32_image item[1].quantity missing
29 7ef7306d-400e-4a56-b31c-02237212c470_Scan item[0].quantity missing
30 7ef7306d-400e-4a56-b31c-02237212c470_Scan item[1].quantity missing
31 f482ebef-e885-4627-a927-96ad54ab5aa0_Facture item[0].quantity missing
32 f482ebef-e885-4627-a927-96ad54ab5aa0_Facture item[1].quantity missing
33 f482ebef-e885-4627-a927-96ad54ab5aa0_Facture item[3].quantity missing
34 f482ebef-e885-4627-a927-96ad54ab5aa0_Facture item[4].quantity missing
35 f482ebef-e885-4627-a927-96ad54ab5aa0_Facture item[6].quantity missing
36 f482ebef-e885-4627-a927-96ad54ab5aa0_Facture item[7].quantity missing
37 f482ebef-e885-4627-a927-96ad54ab5aa0_Facture item[8].quantity missing
38 f482ebef-e885-4627-a927-96ad54ab5aa0_Facture item[9].quantity missing
39 f482ebef-e885-4627-a927-96ad54ab5aa0_Facture item[11].quantity missing
40 Facture ostéopathe-26610949 item[0].quantity missing
41 8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes item[0].quantity missing
42 8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes item[1].quantity missing
43 8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes item[2].quantity missing
44 8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes item[3].quantity missing
45 8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes item[4].quantity missing
46 bf09a877-0705-4c79-b908-6e8da44e44c7_17490641879424403676372514736136 item[0].quantity missing
47 bf09a877-0705-4c79-b908-6e8da44e44c7_17490641879424403676372514736136 item[1].quantity missing
48 17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo item[0].quantity missing
49 17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo item[1].quantity missing
50 17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo item[0].quantity missing
51 e566a3b0-cc99-4c4d-8715-9f3be841a066_noname item[0].quantity missing
52 e566a3b0-cc99-4c4d-8715-9f3be841a066_noname item[1].quantity missing
53 c6013936-5850-4027-9f61-32c30486e6d5_optique item[0].quantity missing
54 c6013936-5850-4027-9f61-32c30486e6d5_optique item[1].quantity missing
55 47399c82-030f-4904-b8f4-2c3dc7c46723_image item[0].quantity missing
56 47399c82-030f-4904-b8f4-2c3dc7c46723_image item[1].quantity missing
57 57188e7d-280a-4f94-9d7d-6af62ede30ed_lunettes item[0].quantity missing
58 209e4abe-4973-4fd3-83cc-64f940fb7372_facture_ophtalmologue item[0].quantity missing
59 209e4abe-4973-4fd3-83cc-64f940fb7372_facture_ophtalmologue item[1].quantity missing
60 85120394-0ea3-4a2c-b0d4-11b0fe31a4e7_20250213_145033 item[0].quantity missing
61 85120394-0ea3-4a2c-b0d4-11b0fe31a4e7_20250213_145033 item[1].quantity missing
62 Facture ostéopathe-26972680 item[0].quantity missing
63 Facture médecine douce-27784971 item[0].quantity missing
64 8de5aacb-e142-4321-aaaf-c4179f031d7a_20240810152224_001 item[0].quantity missing
65 8de5aacb-e142-4321-aaaf-c4179f031d7a_20240810152224_001 item[1].quantity missing
66 8de5aacb-e142-4321-aaaf-c4179f031d7a_20240810152224_001 item[2].quantity missing
67 Facture ostéopathe-26479073 item[0].quantity missing
68 b5cfbf34-f15b-4f2e-bbe9-e83c86549d98_FACTURE_OPHTALMO item[0].quantity missing
69 b5cfbf34-f15b-4f2e-bbe9-e83c86549d98_FACTURE_OPHTALMO item[1].quantity missing
70 43666807-3eb2-40b9-bdf6-94d4fe31b71e_factures item[0].quantity missing
71 43666807-3eb2-40b9-bdf6-94d4fe31b71e_factures item[1].quantity missing
72 43666807-3eb2-40b9-bdf6-94d4fe31b71e_factures item[2].quantity missing
73 70e53730-c41c-402b-a227-2262cdd9fa7b_osteo_Laurette item[0].quantity missing
74 1724226151795facture_du_30_mars_2024 item[0].quantity missing
75 Facture médecine douce-27688568 bill_paid True but amount_paid missing
76 Facture ostéopathe-26626551 item[0].quantity missing
77 Facture des lentilles de contact-27559701 item[5].amount negative: -6.67
78 Facture des lentilles de contact-27601451 item[0].quantity missing
79 Facture des lentilles de contact-27601451 item[1].quantity missing
80 Facture des lentilles de contact-27680116 item[0].quantity missing
81 Facture des lentilles de contact-27680116 item[1].quantity missing
82 Facture médecine douce-27640153 item[0].quantity missing
83 Facture médecine douce-27640153 item[1].quantity missing
84 Facture des lentilles de contact-27653858 item[4].amount negative: -2.9
85 Facture des lentilles de contact-27653858 item[5].amount negative: -3.0
86 Facture des lentilles de contact-27545715 item[1].quantity missing
87 ed44bada-dfae-4ab1-b810-e47cec83e3d9_PXL_20250206_222803759.MP item[0].quantity missing
88 f6fc955d-e941-4a94-892b-ffebcfbb949a_image item[0].quantity missing
89 f6fc955d-e941-4a94-892b-ffebcfbb949a_image item[1].quantity missing
90 f6fc955d-e941-4a94-892b-ffebcfbb949a_image item[2].quantity missing
91 252c4ad4-8fa3-4ed1-9508-ee77f6dce6fe_img20250206_20040851 item[0].quantity missing
92 c53ffdcc-0ee6-4bf2-8c43-923c5471310f_IMG_7285 item[0].quantity missing
93 c53ffdcc-0ee6-4bf2-8c43-923c5471310f_IMG_7285 item[1].quantity missing
94 d8203a60-049f-4f78-befd-556987e766d7_Facture_Mme_GLACON_Adeline item[0].quantity missing
95 d8203a60-049f-4f78-befd-556987e766d7_Facture_Mme_GLACON_Adeline item[1].quantity missing
96 821b6297-8c00-4f88-89a7-a9213c38c3e8_JEAN_PIERRE_001 item[0].quantity missing
97 821b6297-8c00-4f88-89a7-a9213c38c3e8_JEAN_PIERRE_001 item[1].quantity missing
98 b442261a-d504-4cda-98cd-e22a6875bfd0_M._ZIMMER_2401 item[0].quantity missing
99 819f6faf-d9f3-4668-9000-e414d92c7f41_ROYER_LUDIVINE_FACTURE_OPHTALMO item[0].quantity missing
100 819f6faf-d9f3-4668-9000-e414d92c7f41_ROYER_LUDIVINE_FACTURE_OPHTALMO item[1].quantity missing
101 Facture ostéopathe-25152747 bill_paid True but amount_paid missing
102 DDE prestation Facture des lentilles de contact-26705902 item[3].amount negative: -3.0
103 Facture ostéopathe-26750142 item[0].quantity missing
104 9934f5be-de66-495a-a478-f34e4ced1fa5_Facture_lunettes_Emnah item[0].quantity missing
105 9934f5be-de66-495a-a478-f34e4ced1fa5_Facture_lunettes_Emnah item[1].quantity missing
106 9934f5be-de66-495a-a478-f34e4ced1fa5_Facture_lunettes_Emnah item[2].quantity missing
107 9934f5be-de66-495a-a478-f34e4ced1fa5_Facture_lunettes_Emnah item[3].quantity missing
108 9934f5be-de66-495a-a478-f34e4ced1fa5_Facture_lunettes_Emnah item[4].quantity missing
109 9934f5be-de66-495a-a478-f34e4ced1fa5_Facture_lunettes_Emnah item[5].quantity missing
110 e997eddb-05a4-49f9-a7fd-82b48c2694b3_IMG_OPHTALMOLOGIE_20250117_0001 item[0].quantity missing
111 fa25423e-b79c-48f9-89ae-32ffd95e3101_Facture_Ophtalmologie item[0].quantity missing
112 91c41171-ac88-4ce4-8f15-48c4ad2c5aa2_Devis_lunette item[0].quantity missing
113 DDE prestation Facture des lentilles de contact-26736761 item[0].quantity missing
114 DDE prestation Facture des lentilles de contact-26736761 item[1].quantity missing
115 Facture ostéopathe-26077624 item[0].quantity missing
116 Facture médecine douce-27701986 item[0].quantity missing
117 801fa72b-9dba-4852-9378-3b69ea925679_17354854480549213702943309634816 item[0].quantity missing
118 Facture médecine douce-27784881 item[0].quantity missing
119 d3ef8b3d-b78f-4455-b2d6-1c9ef9ec5e60_Quittance_Ophtalmologue item[0].quantity missing
120 d3ef8b3d-b78f-4455-b2d6-1c9ef9ec5e60_Quittance_Ophtalmologue item[0].quantity missing
121 Facture des lentilles de contact-27654482 item[3].amount negative: -5.9
122 Facture des lentilles de contact-27654482 item[4].amount negative: -17.38
123 Facture médecine douce-27784857 item[0].quantity missing
124 DDE prestation Facture des lentilles de contact-26729570 item[0].quantity missing
125 1724156677847Facture_osteophate_13_08_2024_POIGNANT_Thierry item[0].quantity missing
126 5cf26866-c1ae-4dfd-8793-54ef5148224c_FACTURE_OSTEO_SILLON_DU_22-11-2024 item[0].quantity missing
127 Facture ostéopathe-26300731 item[0].quantity missing
128 Facture ostéopathe-26300731 item[1].quantity missing
129 Facture ostéopathe-26300731 item[2].quantity missing
130 dc4e7302-925d-438f-82d5-e78510d858c5_7447113 item[3].amount negative: -5.9
131 dc4e7302-925d-438f-82d5-e78510d858c5_7447113 item[4].amount negative: -5.0
132 Facture ostéopathe-25089174 item[0].quantity missing
133 d9e1cb2a-b44b-4cc8-b07a-030ddf3acd28_Ordonnance_lentilles_Mary bill_paid True but amount_paid missing
134 1724658376437factosteo item[0].quantity missing
135 Facture des lentilles de contact-27708923 item[0].quantity missing
136 b46e8411-29d3-44b6-ae65-55f11bf360bd_FACTURE_OCULISTE_JANE item[0].quantity missing
137 b46e8411-29d3-44b6-ae65-55f11bf360bd_FACTURE_OCULISTE_JANE item[1].quantity missing
138 b46e8411-29d3-44b6-ae65-55f11bf360bd_FACTURE_OCULISTE_JANE item[2].quantity missing
139 27c16ad0-3309-4eb1-9432-ab8a546fee7c_osteo currency missing for bill
140 c2009b55-58d4-411f-a25a-3dc1e32e4a89_facture_lentille item[3].amount negative: -3.0
141 25f19f18-b19d-4e1e-b105-349dd1584c81_75109795868__1FEA46AA-A04E-44AF-9748-541DE3F7B438 item[0].quantity missing
142 25f19f18-b19d-4e1e-b105-349dd1584c81_75109795868__1FEA46AA-A04E-44AF-9748-541DE3F7B438 item[1].quantity missing
143 1724399050339FACTURE_132120424_MME_RONDIN_MAHEVA item[0].quantity missing
144 64fa4c19-efa4-48f1-acf8-8c2e8f573b8b_Facture_osteopathe_ item[0].quantity missing
145 c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo item[0].quantity missing
146 c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo item[1].quantity missing
147 c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo item[0].quantity missing
148 bae2e636-32c5-4b2c-b2d2-a7b28b30760f_KM_C250i24082011140 item[0].quantity missing
149 bae2e636-32c5-4b2c-b2d2-a7b28b30760f_KM_C250i24082011140 item[1].quantity missing
150 e0f0eab0-f98c-4b74-95f2-bfc61ad76d28_Ophtalmo_Isa_01-24 item[0].quantity missing
151 e0f0eab0-f98c-4b74-95f2-bfc61ad76d28_Ophtalmo_Isa_01-24 item[1].quantity missing
152 Facture des lentilles de contact-27647325 item[0].quantity missing
153 1724399050380osteo_Lina item[0].quantity missing
154 70c3e7f5-405b-49d8-a73c-dae83beafb59_Facture_ophtalmo item[0].quantity missing
155 70c3e7f5-405b-49d8-a73c-dae83beafb59_Facture_ophtalmo item[1].quantity missing
156 70c3e7f5-405b-49d8-a73c-dae83beafb59_Facture_ophtalmo item[2].quantity missing
157 66de7232-78a6-46a0-8392-47ec4630ad31_image item[0].quantity missing
158 66de7232-78a6-46a0-8392-47ec4630ad31_image item[1].quantity missing
159 66de7232-78a6-46a0-8392-47ec4630ad31_image item[2].quantity missing
160 0be67d5b-5af6-49ba-8851-d3976f9920a0_Facture_optalmo_Emma_PINSARD item[0].quantity missing
161 0be67d5b-5af6-49ba-8851-d3976f9920a0_Facture_optalmo_Emma_PINSARD item[1].quantity missing
162 b79c0ed4-4126-4ff5-86a8-0348cd1c308c_IMG_0048 item[0].quantity missing
163 b79c0ed4-4126-4ff5-86a8-0348cd1c308c_IMG_0048 item[1].quantity missing
164 b79c0ed4-4126-4ff5-86a8-0348cd1c308c_IMG_0048 item[2].quantity missing
165 Facture ostéopathe-26252579 item[0].quantity missing
166 Facture ostéopathe-26252579 item[1].quantity missing
167 Facture ostéopathe-26252579 item[2].quantity missing
168 Facture ostéopathe-26252579 item[3].quantity missing
169 a5bd8b97-a2a8-49ae-900d-0673d2f96637_quittance_optique_ item[0].quantity missing
170 8b6539db-21b4-41dc-a27f-db2cd351fe5f_Doc_34 item[0].quantity missing
171 172465822585720240416133632216 item[0].quantity missing
172 Facture médecine douce-27781814 item[0].quantity missing
173 Facture ostéopathe-26349654 item[0].quantity missing
174 4950b870-6662-4fcd-a7e9-1fc3abd0fdc9_IMG_20250321_182918 item[0].quantity missing
175 4950b870-6662-4fcd-a7e9-1fc3abd0fdc9_IMG_20250321_182918 currency missing for bill
176 cbc7ddff-2ed1-441b-85cb-5c1079588a52_Mes_des_factures item[0].quantity missing
177 cbc7ddff-2ed1-441b-85cb-5c1079588a52_Mes_des_factures item[1].quantity missing
178 91efe623-7d6b-4a96-8b03-87199c56566a_image item[0].quantity missing
179 DDE prestation Facture des lentilles de contact-26705131 item[2].amount negative: -5.0
180 Facture des lentilles de contact-27620389 item[0].quantity missing
181 Facture des lentilles de contact-27620389 item[1].quantity missing
182 2da98e03-19d0-430e-90da-a09706de143b_Facture_Lentilles_Stephanie_Marchenay_ item[0].quantity missing
183 Facture-quittance toutes spécialités-27648502 item[0].quantity missing
184 Facture médecine douce-27776613 item[0].quantity missing
185 DDE prestation Facture des lentilles de contact-26685394 item[0].quantity missing
186 ec6baa8c-a189-4850-b310-c90bab372cc2_Facture_Krys bill_paid True but remaining_payment > 0
187 DDE prestation Facture des lentilles de contact-26772555 item[0].quantity missing
188 DDE prestation Facture des lentilles de contact-26772555 item[1].quantity missing
189 Facture des lentilles de contact-27700117 item[0].quantity missing
190 1724399050308image item[0].quantity missing
191 179673c3-7b80-47cb-9467-3425e010905a_ordonnance08.10.2024 item[0].quantity missing
192 179673c3-7b80-47cb-9467-3425e010905a_ordonnance08.10.2024 item[1].quantity missing
193 cdea6c95-be16-4d2b-ba78-95ef182d8e63_IMG_20250523_084341 item[0].quantity missing
194 Facture ostéopathe-25073562 item[0].quantity missing
195 499639d7-4faa-4d3e-b412-7f75da0b4e01_quercy item[0].quantity missing
196 499639d7-4faa-4d3e-b412-7f75da0b4e01_quercy item[1].quantity missing
197 Facture ostéopathe-26189649 item[0].quantity missing
198 Facture psychologie - 27581557 item[0].quantity missing
199 Facture psychologie - 27581557 item[0].quantity missing
200 fdaa0d42-8e79-4506-b222-373c88891ff5_notehonoraire__20250212_143947 item[0].quantity missing
201 92340ad4-27cf-476b-a842-051d0b6a2c9f_IMG_20250619_231842 item[0].quantity missing
202 Facture ostéopathe-1142513 item[0].quantity missing
203 Facture ostéopathe-26447822 bill_paid True but amount_paid missing
204 1724156469365facture_osteo08.24 item[0].quantity missing
205 1eea4521-e98f-4f41-911b-07ffaf3b52a0_Facture_MGP item[0].quantity missing
206 1eea4521-e98f-4f41-911b-07ffaf3b52a0_Facture_MGP item[1].quantity missing
207 Facture ostéopathe-26595219 item[0].quantity missing
208 0aa44c3d-2dc8-47fc-9307-d1485df454c1_20250207152016_001 item[0].quantity missing
209 Facture ostéopathe-25416514 item[0].quantity missing
210 Sans titre item[0].quantity missing
211 707771f6-bb3b-4e5e-8d7d-acd584a23009_IMG_20240726_142929 item[0].quantity missing
212 707771f6-bb3b-4e5e-8d7d-acd584a23009_IMG_20240726_142929 item[1].quantity missing
213 d35e4ea0-6678-4dd1-b0d5-fe905d262430_FACTURE_GAFFE_Thierry item[0].quantity missing
214 Facture ostéopathe-25921103 item[0].quantity missing
215 DDE prestation Facture des lentilles de contact-26764274 item[0].quantity missing
216 DDE prestation Facture des lentilles de contact-26764274 item[1].quantity missing
217 1724658376419facture_ostheo_avril_24 item[0].quantity missing
218 Facture des lentilles de contact-27654407 bill_paid True but amount_paid missing
219 Facture ostéopathe-26610950 item[0].quantity missing
220 Facture des lentilles de contact-27668729 item[4].amount negative: -4.9
221 Facture des lentilles de contact-27668729 item[5].amount negative: -12.8
222 a05800f6-c1f1-4d50-9598-98e0f90b83f0_17476639221036811513370155179329 item[0].quantity missing
223 32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337 item[0].quantity missing
224 32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337 item[1].quantity missing
225 32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337 item[2].quantity missing
226 Facture ostéopathe-1119174 item[0].quantity missing
227 Facture ostéopathe-26298895 item[0].quantity missing
228 82ac654c-54d4-401f-a006-96d9e46531fe_Facture_ostheo_JEAN item[0].quantity missing
229 Facture ostéopathe-26585743 item[0].quantity missing
230 106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie item[0].quantity missing
231 106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie item[1].quantity missing
232 106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie item[2].quantity missing
233 106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie item[3].quantity missing
234 106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie item[4].quantity missing
235 106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie item[5].quantity missing
236 284cb22c-17ad-409b-8d5f-79e43a6dd51b_Facture_ item[0].quantity missing
237 fe86f6a8-8360-458e-834c-805ac99350cf_Monsieur_OUEDRAOGO_DAOUDA item[0].quantity missing
238 fe86f6a8-8360-458e-834c-805ac99350cf_Monsieur_OUEDRAOGO_DAOUDA item[1].quantity missing
239 Facture des lentilles de contact-27576127 item[2].amount negative: -53.0
240 1724425079263JACOB_Francois_2024-111 item[0].quantity missing
241 46ddeec5-97bf-4175-b214-b7bd74c4fa24_Renouvin_Ferlicot_Andre item[0].quantity missing
242 46ddeec5-97bf-4175-b214-b7bd74c4fa24_Renouvin_Ferlicot_Andre item[1].quantity missing
243 0a26adda-beef-4977-966c-12efc73f1d15_20250129_190625 item[0].quantity missing
244 0a26adda-beef-4977-966c-12efc73f1d15_20250129_190625 item[1].quantity missing
245 Facture ostéopathe-26699279 item[0].quantity missing
246 Facture ostéopathe-1216961 bill_paid True but amount_paid missing
247 Facture ostéopathe-1216961 item[0].quantity missing
248 Facture ostéopathe-26935993 item[0].quantity missing
249 1724425079207douilly_6 item[0].quantity missing
250 Facture ostéopathe-26298310 item[0].quantity missing
251 8019accc-33b4-4f20-95eb-3ff1e1d4db0b_nnn item[0].quantity missing
252 8019accc-33b4-4f20-95eb-3ff1e1d4db0b_nnn item[1].quantity missing
253 379ed230-4e27-4f61-9039-226972f26cc9_Factures_Dani_medecin_ item[0].quantity missing
254 379ed230-4e27-4f61-9039-226972f26cc9_Factures_Dani_medecin_ item[1].quantity missing
255 b410345a-31de-4397-a896-bc11d97e524c_FACTURE_OPHTALMOLOGISTE item[0].quantity missing
256 0268b186-6926-46af-b75f-ba01c02cb647_IMG_20240514_152542 item[0].quantity missing
257 0bfb5041-2751-421d-b5d5-60d696c361c1_17391960081576654684571490136238 item[0].quantity missing
258 598221c3-d0b3-403a-a261-c68cbe56b453_facture_ophtalmo item[0].quantity missing
259 598221c3-d0b3-403a-a261-c68cbe56b453_facture_ophtalmo item[1].quantity missing
260 1724226151831IMG_8005 item[0].quantity missing
261 c8fa8d11-c2df-4f39-9909-28790acc5957_Factures_opthalmo item[0].quantity missing
262 50eeaebf-d552-4a12-92a4-66d185bb992b_facture_optique item[0].quantity missing
263 50eeaebf-d552-4a12-92a4-66d185bb992b_facture_optique item[1].quantity missing
264 50eeaebf-d552-4a12-92a4-66d185bb992b_facture_optique item[2].quantity missing
265 Facture ostéopathe-26744045 item[0].quantity missing
266 1724425031419Facture_Osteo_20240417 item[0].quantity missing
267 Facture ostéopathe-25187936 item[0].quantity missing
268 ec117b21-9348-4a76-9a22-2bae87639392_facture_du_14.02_Tony item[0].quantity missing
269 ec117b21-9348-4a76-9a22-2bae87639392_facture_du_14.02_Tony item[1].quantity missing
270 Facture ostéopathe-26052732 item[0].quantity missing
271 de6a66cd-d729-479c-a119-8b9e442702db_Screenshot_20250227_112521_Gallery item[0].quantity missing
272 de6a66cd-d729-479c-a119-8b9e442702db_Screenshot_20250227_112521_Gallery item[1].quantity missing
273 Facture médecine douce-27709304 item[0].quantity missing
274 DDE prestation Facture des lentilles de contact-26715605 item[0].quantity missing
275 Facture médecine douce-27702699 item[0].quantity missing
276 70546a38-b6d7-4151-b08a-82b01c8e4a98_480755849_2183001178819517_1003204107786000285_n item[0].quantity missing
277 1724658298461facture_podologue_alicia item[0].quantity missing

View File

@@ -0,0 +1,61 @@
source_image,total_billed,sum_item_amount,diff
6f544990-74c6-4c7e-8a25-08eb3f9138c8_RemboursementSoins01082024_1,24.98,41.41,-16.429999999999996
ea64ca35-29f5-49ba-894b-13aa3421f1d6_doc00681020250513143142,138.96,115.78,23.180000000000007
b7df45bd-a142-4cb1-bb00-69122c5b405a_IMG20250520184626,594.8,798.0,-203.20000000000005
2dbb4025-f570-49f9-83d1-08aedb2b122d_lunettes_Lou-Ann,213.4,253.0,-39.599999999999994
Facture des lentilles de contact-27693842,130.0,150.0,-20.0
6221379e-7344-4868-9ee0-04f2c4e26874_consultation_ophtalmo27janvier2025BEAUPERINyann,20.8,20.86,-0.05999999999999872
0a639214-b39e-4971-8d51-d83b7d2d2c2e_facture_lentilles_17_01_2025,540.8,550.8,-10.0
8f0e72ca-64e5-4797-87bc-9378b842433a_CF_1676107,64.48,59.88,4.600000000000001
Facture ostéopathe-26610949,60.0,100.0,-40.0
8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes,262.0,257.0,5.0
e566a3b0-cc99-4c4d-8715-9f3be841a066_noname,44.0,43.28,0.7199999999999989
Facture ostéopathe-25449739,70.0,60.0,10.0
47399c82-030f-4904-b8f4-2c3dc7c46723_image,77.61,0.0,77.61
209e4abe-4973-4fd3-83cc-64f940fb7372_facture_ophtalmologue,72.63,0.0,72.63
85120394-0ea3-4a2c-b0d4-11b0fe31a4e7_20250213_145033,73.33,84.33,-11.0
43666807-3eb2-40b9-bdf6-94d4fe31b71e_factures,147.0,105.0,42.0
a3e845ff-e89c-4eb0-bf8d-164979dfee2b_Recu_Osteo_LIGNAC_Sebastien,65.0,60.0,5.0
060cd8e5-ac6f-47e5-b8f5-fe9d3920c522_CHIRO_FEVRIER_2025,40.0,90.0,-50.0
1724397121050Adobe_Scan_17_avr._2024,45.96,43.00999999999999,2.95000000000001
8f2b09a0-d20d-4a94-a70a-fda31024af49_Facture_22_04_2025,340.0,290.0,50.0
06e1588f-71a7-43fc-bf99-d066db847189_Opticien_,375.6,613.6,-238.0
72cb658b-3239-4479-918d-aac53bc4a552_Facture_ophtalmo_14-01-2025,130.0,0.0,130.0
e997eddb-05a4-49f9-a7fd-82b48c2694b3_IMG_OPHTALMOLOGIE_20250117_0001,119.48,0.0,119.48
Facture ostéopathe-1066603,45.0,65.0,-20.0
DDE prestation Facture des lentilles de contact-26736761,78.0,76.0,2.0
Facture ostéopathe-26555547,220.65,0.0,220.65
96be3553-7261-4698-bf20-88ca5c826889_ReleveMensuel_Mai,154.04,535.1099999999999,-381.06999999999994
ba1b499f-199f-4106-bb2c-aac62a67c235_doc00448420250313084147,644.16,643.96,0.1999999999999318
1724397121048facture_kine_15-04-24,88.4,17.68,70.72
Facture ostéopathe-26401996,205.56,0.0,205.56
27c16ad0-3309-4eb1-9432-ab8a546fee7c_osteo,60.0,0.0,60.0
c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo,120.0,0.0,120.0
c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo,20.8,0.0,20.8
bae2e636-32c5-4b2c-b2d2-a7b28b30760f_KM_C250i24082011140,110.48,0.0,110.48
e0f0eab0-f98c-4b74-95f2-bfc61ad76d28_Ophtalmo_Isa_01-24,76.0,0.0,76.0
670b97e0-61da-4341-843d-a02b4ae76ef5_image,352.0,468.67,-116.67000000000002
e6c39a83-a53c-48f5-bffe-6bdb57337b33_image,560.06,560.0,0.05999999999994543
172442503141724-04-08_recu_kine_Suz,66.12,0.0,66.12
499639d7-4faa-4d3e-b412-7f75da0b4e01_quercy,97.63,97.43,0.19999999999998863
5dec7b4c-0160-443c-a498-6e94875222aa_FEVRIER_HYPNO,90.0,72.0,18.0
ff6211dc-b681-4709-9451-d36fb961476f_MICHELIZZA_QuittanceFSE_1952,280.0,245.0,35.0
8ed22fd8-18ae-4c54-93ae-2f5cd4ec2af6_17375530503755104210735675998866,73.43,0.0,73.43
78ec53bb-a4c5-469f-a4d0-f42db4cf9b11_CF_1697735,64.48,59.88,4.600000000000001
4fa21bd9-393e-41d4-9af2-bf42004f4e94_image,15.93,14.48,1.4499999999999993
DDE prestation Facture des lentilles de contact-26764274,78.0,76.0,2.0
ffc129de-c82b-4c0b-8ab8-2941fd28cda9_Facture_opticien,525.6,828.3,-302.69999999999993
edb77e5c-ea68-4bad-a177-1256a3bafc9c_CF_1686176,64.48,59.88,4.600000000000001
32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337,92.4,75.72,16.680000000000007
82ac654c-54d4-401f-a006-96d9e46531fe_Facture_ostheo_JEAN,40.0,60.0,-20.0
Facture des lentilles de contact-27603799,73.7,67.8,5.900000000000006
fe86f6a8-8360-458e-834c-805ac99350cf_Monsieur_OUEDRAOGO_DAOUDA,87.0,0.0,87.0
1498c5f5-22cf-41d4-87f0-9b295ff481f4_IMG_5659,50.0,41.41,8.590000000000003
0a26adda-beef-4977-966c-12efc73f1d15_20250129_190625,74.61,0.0,74.61
Facture des lentilles de contact-27687795,63.98,58.0,5.979999999999997
8019accc-33b4-4f20-95eb-3ff1e1d4db0b_nnn,127.77,0.0,127.77
379ed230-4e27-4f61-9039-226972f26cc9_Factures_Dani_medecin_,26.5,53.0,-26.5
598221c3-d0b3-403a-a261-c68cbe56b453_facture_ophtalmo,44.0,88.0,-44.0
1724226151831IMG_8005,33.0,38.0,-5.0
50eeaebf-d552-4a12-92a4-66d185bb992b_facture_optique,85.08,70.61,14.469999999999999
b5ecf45a-00cd-45a8-900d-6a4fc90a79a4_ZOPIE_CORPETTI_CAROLE,113.44,113.34,0.09999999999999432
1 source_image total_billed sum_item_amount diff
2 6f544990-74c6-4c7e-8a25-08eb3f9138c8_RemboursementSoins01082024_1 24.98 41.41 -16.429999999999996
3 ea64ca35-29f5-49ba-894b-13aa3421f1d6_doc00681020250513143142 138.96 115.78 23.180000000000007
4 b7df45bd-a142-4cb1-bb00-69122c5b405a_IMG20250520184626 594.8 798.0 -203.20000000000005
5 2dbb4025-f570-49f9-83d1-08aedb2b122d_lunettes_Lou-Ann 213.4 253.0 -39.599999999999994
6 Facture des lentilles de contact-27693842 130.0 150.0 -20.0
7 6221379e-7344-4868-9ee0-04f2c4e26874_consultation_ophtalmo27janvier2025BEAUPERINyann 20.8 20.86 -0.05999999999999872
8 0a639214-b39e-4971-8d51-d83b7d2d2c2e_facture_lentilles_17_01_2025 540.8 550.8 -10.0
9 8f0e72ca-64e5-4797-87bc-9378b842433a_CF_1676107 64.48 59.88 4.600000000000001
10 Facture ostéopathe-26610949 60.0 100.0 -40.0
11 8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes 262.0 257.0 5.0
12 e566a3b0-cc99-4c4d-8715-9f3be841a066_noname 44.0 43.28 0.7199999999999989
13 Facture ostéopathe-25449739 70.0 60.0 10.0
14 47399c82-030f-4904-b8f4-2c3dc7c46723_image 77.61 0.0 77.61
15 209e4abe-4973-4fd3-83cc-64f940fb7372_facture_ophtalmologue 72.63 0.0 72.63
16 85120394-0ea3-4a2c-b0d4-11b0fe31a4e7_20250213_145033 73.33 84.33 -11.0
17 43666807-3eb2-40b9-bdf6-94d4fe31b71e_factures 147.0 105.0 42.0
18 a3e845ff-e89c-4eb0-bf8d-164979dfee2b_Recu_Osteo_LIGNAC_Sebastien 65.0 60.0 5.0
19 060cd8e5-ac6f-47e5-b8f5-fe9d3920c522_CHIRO_FEVRIER_2025 40.0 90.0 -50.0
20 1724397121050Adobe_Scan_17_avr._2024 45.96 43.00999999999999 2.95000000000001
21 8f2b09a0-d20d-4a94-a70a-fda31024af49_Facture_22_04_2025 340.0 290.0 50.0
22 06e1588f-71a7-43fc-bf99-d066db847189_Opticien_ 375.6 613.6 -238.0
23 72cb658b-3239-4479-918d-aac53bc4a552_Facture_ophtalmo_14-01-2025 130.0 0.0 130.0
24 e997eddb-05a4-49f9-a7fd-82b48c2694b3_IMG_OPHTALMOLOGIE_20250117_0001 119.48 0.0 119.48
25 Facture ostéopathe-1066603 45.0 65.0 -20.0
26 DDE prestation Facture des lentilles de contact-26736761 78.0 76.0 2.0
27 Facture ostéopathe-26555547 220.65 0.0 220.65
28 96be3553-7261-4698-bf20-88ca5c826889_ReleveMensuel_Mai 154.04 535.1099999999999 -381.06999999999994
29 ba1b499f-199f-4106-bb2c-aac62a67c235_doc00448420250313084147 644.16 643.96 0.1999999999999318
30 1724397121048facture_kine_15-04-24 88.4 17.68 70.72
31 Facture ostéopathe-26401996 205.56 0.0 205.56
32 27c16ad0-3309-4eb1-9432-ab8a546fee7c_osteo 60.0 0.0 60.0
33 c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo 120.0 0.0 120.0
34 c7704de1-820a-44bf-aef9-f7b185f94a26_facture_ophtalmo 20.8 0.0 20.8
35 bae2e636-32c5-4b2c-b2d2-a7b28b30760f_KM_C250i24082011140 110.48 0.0 110.48
36 e0f0eab0-f98c-4b74-95f2-bfc61ad76d28_Ophtalmo_Isa_01-24 76.0 0.0 76.0
37 670b97e0-61da-4341-843d-a02b4ae76ef5_image 352.0 468.67 -116.67000000000002
38 e6c39a83-a53c-48f5-bffe-6bdb57337b33_image 560.06 560.0 0.05999999999994543
39 172442503141724-04-08_recu_kine_Suz 66.12 0.0 66.12
40 499639d7-4faa-4d3e-b412-7f75da0b4e01_quercy 97.63 97.43 0.19999999999998863
41 5dec7b4c-0160-443c-a498-6e94875222aa_FEVRIER_HYPNO 90.0 72.0 18.0
42 ff6211dc-b681-4709-9451-d36fb961476f_MICHELIZZA_QuittanceFSE_1952 280.0 245.0 35.0
43 8ed22fd8-18ae-4c54-93ae-2f5cd4ec2af6_17375530503755104210735675998866 73.43 0.0 73.43
44 78ec53bb-a4c5-469f-a4d0-f42db4cf9b11_CF_1697735 64.48 59.88 4.600000000000001
45 4fa21bd9-393e-41d4-9af2-bf42004f4e94_image 15.93 14.48 1.4499999999999993
46 DDE prestation Facture des lentilles de contact-26764274 78.0 76.0 2.0
47 ffc129de-c82b-4c0b-8ab8-2941fd28cda9_Facture_opticien 525.6 828.3 -302.69999999999993
48 edb77e5c-ea68-4bad-a177-1256a3bafc9c_CF_1686176 64.48 59.88 4.600000000000001
49 32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337 92.4 75.72 16.680000000000007
50 82ac654c-54d4-401f-a006-96d9e46531fe_Facture_ostheo_JEAN 40.0 60.0 -20.0
51 Facture des lentilles de contact-27603799 73.7 67.8 5.900000000000006
52 fe86f6a8-8360-458e-834c-805ac99350cf_Monsieur_OUEDRAOGO_DAOUDA 87.0 0.0 87.0
53 1498c5f5-22cf-41d4-87f0-9b295ff481f4_IMG_5659 50.0 41.41 8.590000000000003
54 0a26adda-beef-4977-966c-12efc73f1d15_20250129_190625 74.61 0.0 74.61
55 Facture des lentilles de contact-27687795 63.98 58.0 5.979999999999997
56 8019accc-33b4-4f20-95eb-3ff1e1d4db0b_nnn 127.77 0.0 127.77
57 379ed230-4e27-4f61-9039-226972f26cc9_Factures_Dani_medecin_ 26.5 53.0 -26.5
58 598221c3-d0b3-403a-a261-c68cbe56b453_facture_ophtalmo 44.0 88.0 -44.0
59 1724226151831IMG_8005 33.0 38.0 -5.0
60 50eeaebf-d552-4a12-92a4-66d185bb992b_facture_optique 85.08 70.61 14.469999999999999
61 b5ecf45a-00cd-45a8-900d-6a4fc90a79a4_ZOPIE_CORPETTI_CAROLE 113.44 113.34 0.09999999999999432

View File

@@ -0,0 +1,193 @@
# Label Analysis Report
Input: `009_label_data_sample_seed_42.json`
## Overview
- Total records: 1232
- Total labels (flattened): 1259
- is_bill distribution: {True: 1126, False: 133}
- bill_paid distribution: {False: 74, True: 1011, None: 174}
- Invoice dates span: 2012-04-17 .. 2025-12-20
- Unique year-month pairs: 64
## Professions (top)
- Ostéopathe: 377
- Ostéopathie: 308
- Optique: 221
- (missing): 108
- Chiropractie: 54
- Psychologue: 52
- Unknown: 21
- Kinésithérapie: 15
- Ophtalmologie: 13
- Sophrologie: 12
- Podologue: 9
- Hypnothérapie: 7
- Pharmacie: 5
- Naturopathie: 5
- Etiopathie: 4
- Radiologie: 4
- Étiopathie: 4
- Soins hospitalier: 4
- Sophrologue: 3
- Etiopathe: 3
- Psychologie: 2
- Psychomotricité: 2
- Otique: 2
- Médecine Générale: 2
- Homéopathie: 2
- Kinésiologie: 2
- Pompes funèbres: 2
- Chirurgie dentaire: 1
- Biologie: 1
- Sevrage tabagique: 1
- Acupuncture: 1
- Étiopathe: 1
- unknown: 1
- Orthopédie: 1
- Psychiatrie: 1
- Diététicienne: 1
- Chiropratie: 1
- Medecine Générale: 1
- Sage-femme: 1
- Addictologie: 1
- Ophtalmologue: 1
- Diététique: 1
- Réflexologie: 1
## Currency distribution
- EUR: 1137
- F: 2
## Identifier and key field presence
- adeli_number: 902 present
- rpps_number: 179 present
- finess_number: 133 present
- prescripteur_finess_number: 42 present
- doctor_name: 1051 present
- invoice_issuer: 1122 present
- insured_name: 203 present
- beneficiary_name: 1098 present
- security_number: 494 present
- currency: 1139 present
## Flags
- is_handwriting: {True: 62, None: 858, False: 339}
- is_rotated: {False: 372, None: 856, True: 31}
## Numeric summaries
- total_billed: {'count': 1134, 'min': 1.0, 'p25': 55.0, 'median': 60.0, 'p75': 70.0, 'max': 18600.0, 'mean': 112.98805114638448, 'sum': 128128.45000000001, 'missing': 125}
- amount_paid: {'count': 1012, 'min': 0.0, 'p25': 50.0, 'median': 60.0, 'p75': 65.0, 'max': 18600.0, 'mean': 106.93289525691699, 'sum': 108216.08999999997, 'missing': 247}
- remaining_payment: {'count': 252, 'min': 0.0, 'p25': 0.0, 'median': 0.0, 'p75': 0.0, 'max': 2744.64, 'mean': 14.226587301587301, 'sum': 3585.1, 'missing': 1007}
- client_part: {'count': 153, 'min': 0.0, 'p25': 27.72, 'median': 46.41, 'p75': 81.5, 'max': 1100.0, 'mean': 95.07934640522876, 'sum': 14547.140000000007, 'missing': 1106}
- mandatory_coverage: {'count': 138, 'min': 0.0, 'p25': 0.09, 'median': 27.380000000000003, 'p75': 47.730000000000004, 'max': 1175.72, 'mean': 39.25376811594203, 'sum': 5417.020000000004, 'missing': 1121}
- complementary_coverage: {'count': 89, 'min': 0.0, 'p25': 0.0, 'median': 0.0, 'p75': 60.0, 'max': 370.03, 'mean': 54.395168539325844, 'sum': 4841.169999999999, 'missing': 1170}
## Items analysis
- Items per label: count=1259, min=0, max=14, mean=1.22
- total_billed vs sum(items.amount) mismatches: 60
## Data quality issues (sample)
- eedda33e-049b-4ab8-aee8-6f5d0c88b500_IMG_0358: item[0].quantity missing
- eedda33e-049b-4ab8-aee8-6f5d0c88b500_IMG_0358: item[1].quantity missing
- e72e49e0-7891-4dc8-b62f-40c7bf6252a4_Nuria_Ravier_Ophtalomologiste: item[0].quantity missing
- e72e49e0-7891-4dc8-b62f-40c7bf6252a4_Nuria_Ravier_Ophtalomologiste: item[0].quantity missing
- Facture ostéopathe-25417096: item[0].quantity missing
- eb5561dd-937b-4c0b-b6c6-56da8b528c94_facture_vision_claire_ophtalmo_1_001: item[0].quantity missing
- 6f544990-74c6-4c7e-8a25-08eb3f9138c8_RemboursementSoins01082024_1: item[0].quantity missing
- 6f544990-74c6-4c7e-8a25-08eb3f9138c8_RemboursementSoins01082024_1: item[1].quantity missing
- 1150ac6a-abdc-409d-b7ca-296e7b35769c_Facture_BOLZERJean_Francois_20250131GMAM: item[0].quantity missing
- Facture ostéopathe-26738638: item[0].quantity missing
- 34162b51-6c3f-4dcc-a18a-d6b9e45e4f22_Recu_ophtalmo_Cadet_Corine_2025-03-09: bill_paid True but remaining_payment > 0
- 34162b51-6c3f-4dcc-a18a-d6b9e45e4f22_Recu_ophtalmo_Cadet_Corine_2025-03-09: item[0].quantity missing
- 34162b51-6c3f-4dcc-a18a-d6b9e45e4f22_Recu_ophtalmo_Cadet_Corine_2025-03-09: item[1].quantity missing
- abff81b4-d895-47d5-b3d4-93da48f5c1e6_20241212_172044: item[0].quantity missing
- abff81b4-d895-47d5-b3d4-93da48f5c1e6_20241212_172044: item[1].quantity missing
- 2c75bd51-e61c-452e-b19b-494fa70e9cc9_Facture_lentilles_janvier_2024: item[4].amount negative: -2.9
- 2c75bd51-e61c-452e-b19b-494fa70e9cc9_Facture_lentilles_janvier_2024: item[5].amount negative: -3.0
- Facture ostéopathe-26605349: item[0].quantity missing
- Facture médecine douce-27702698: item[0].quantity missing
- 6221379e-7344-4868-9ee0-04f2c4e26874_consultation_ophtalmo27janvier2025BEAUPERINyann: item[0].quantity missing
- f55f9121-499d-4c25-b79b-464413d7ca6c_Quittance_Ophtalmologue: item[0].quantity missing
- f55f9121-499d-4c25-b79b-464413d7ca6c_Quittance_Ophtalmologue: item[0].quantity missing
- 62b45277-a437-48c6-af52-d1814f23434f_17460042180346042803920585280783: item[0].quantity missing
- 62b45277-a437-48c6-af52-d1814f23434f_17460042180346042803920585280783: item[1].quantity missing
- Facture ostéopathe-25847017: bill_paid True but remaining_payment > 0
- 02358c4c-820a-409f-9489-a75f22af3f32_image: item[0].quantity missing
- 02358c4c-820a-409f-9489-a75f22af3f32_image: item[1].quantity missing
- 7ef7306d-400e-4a56-b31c-02237212c470_Scan: item[0].quantity missing
- 7ef7306d-400e-4a56-b31c-02237212c470_Scan: item[1].quantity missing
- f482ebef-e885-4627-a927-96ad54ab5aa0_Facture: item[0].quantity missing
- f482ebef-e885-4627-a927-96ad54ab5aa0_Facture: item[1].quantity missing
- f482ebef-e885-4627-a927-96ad54ab5aa0_Facture: item[3].quantity missing
- f482ebef-e885-4627-a927-96ad54ab5aa0_Facture: item[4].quantity missing
- f482ebef-e885-4627-a927-96ad54ab5aa0_Facture: item[6].quantity missing
- f482ebef-e885-4627-a927-96ad54ab5aa0_Facture: item[7].quantity missing
- f482ebef-e885-4627-a927-96ad54ab5aa0_Facture: item[8].quantity missing
- f482ebef-e885-4627-a927-96ad54ab5aa0_Facture: item[9].quantity missing
- f482ebef-e885-4627-a927-96ad54ab5aa0_Facture: item[11].quantity missing
- Facture ostéopathe-26610949: item[0].quantity missing
- 8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes: item[0].quantity missing
- 8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes: item[1].quantity missing
- 8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes: item[2].quantity missing
- 8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes: item[3].quantity missing
- 8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes: item[4].quantity missing
- bf09a877-0705-4c79-b908-6e8da44e44c7_17490641879424403676372514736136: item[0].quantity missing
- bf09a877-0705-4c79-b908-6e8da44e44c7_17490641879424403676372514736136: item[1].quantity missing
- 17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo: item[0].quantity missing
- 17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo: item[1].quantity missing
- 17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo: item[0].quantity missing
- e566a3b0-cc99-4c4d-8715-9f3be841a066_noname: item[0].quantity missing
## Plots
- is_bill
![](plots/is_bill.png)
- bill_paid
![](plots/bill_paid.png)
- is_handwriting
![](plots/is_handwriting.png)
- is_rotated
![](plots/is_rotated.png)
- professions_top20
![](plots/professions_top20.png)
- currency
![](plots/currency.png)
- invoice_year_month
![](plots/invoice_year_month.png)
- items_per_label
![](plots/items_per_label.png)
- hist_total_billed
![](plots/hist_total_billed.png)
- hist_amount_paid
![](plots/hist_amount_paid.png)
- hist_remaining_payment
![](plots/hist_remaining_payment.png)
- hist_client_part
![](plots/hist_client_part.png)
- hist_mandatory_coverage
![](plots/hist_mandatory_coverage.png)
- hist_complementary_coverage
![](plots/hist_complementary_coverage.png)

Binary file not shown.

After

Width:  |  Height:  |  Size: 20 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 22 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 24 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 38 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 18 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 21 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 23 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 84 KiB

View File

@@ -0,0 +1,44 @@
profession,count
Ostéopathe,377
Ostéopathie,308
Optique,221
(missing),108
Chiropractie,54
Psychologue,52
Unknown,21
Kinésithérapie,15
Ophtalmologie,13
Sophrologie,12
Podologue,9
Hypnothérapie,7
Pharmacie,5
Naturopathie,5
Etiopathie,4
Radiologie,4
Étiopathie,4
Soins hospitalier,4
Sophrologue,3
Etiopathe,3
Psychologie,2
Psychomotricité,2
Otique,2
Médecine Générale,2
Homéopathie,2
Kinésiologie,2
Pompes funèbres,2
Chirurgie dentaire,1
Biologie,1
Sevrage tabagique,1
Acupuncture,1
Étiopathe,1
unknown,1
Orthopédie,1
Psychiatrie,1
Diététicienne,1
Chiropratie,1
Medecine Générale,1
Sage-femme,1
Addictologie,1
Ophtalmologue,1
Diététique,1
Réflexologie,1
1 profession count
2 Ostéopathe 377
3 Ostéopathie 308
4 Optique 221
5 (missing) 108
6 Chiropractie 54
7 Psychologue 52
8 Unknown 21
9 Kinésithérapie 15
10 Ophtalmologie 13
11 Sophrologie 12
12 Podologue 9
13 Hypnothérapie 7
14 Pharmacie 5
15 Naturopathie 5
16 Etiopathie 4
17 Radiologie 4
18 Étiopathie 4
19 Soins hospitalier 4
20 Sophrologue 3
21 Etiopathe 3
22 Psychologie 2
23 Psychomotricité 2
24 Otique 2
25 Médecine Générale 2
26 Homéopathie 2
27 Kinésiologie 2
28 Pompes funèbres 2
29 Chirurgie dentaire 1
30 Biologie 1
31 Sevrage tabagique 1
32 Acupuncture 1
33 Étiopathe 1
34 unknown 1
35 Orthopédie 1
36 Psychiatrie 1
37 Diététicienne 1
38 Chiropratie 1
39 Medecine Générale 1
40 Sage-femme 1
41 Addictologie 1
42 Ophtalmologue 1
43 Diététique 1
44 Réflexologie 1

163
filter/create_label_data.py Normal file
View File

@@ -0,0 +1,163 @@
#!/usr/bin/env python3
"""
Script to filter label_data.json based on filepaths in dbscan_filtered_results.json
This script will keep only the samples from label_data.json where the 'image' field
matches with the filepath names found in the 'results' section of dbscan_filtered_results.json
"""
import json
import os
from pathlib import Path
from typing import List, Dict, Set
def extract_image_names_from_filepath(filepath: str) -> str:
"""
Extract the base image name from a filepath for comparison.
Args:
filepath: Full filepath from dbscan results
Returns:
Base filename without extension and path
"""
return Path(filepath).stem
def load_dbscan_filtered_results(filepath: str) -> Set[str]:
"""
Load dbscan filtered results and extract image names.
Args:
filepath: Path to dbscan_filtered_results.json
Returns:
Set of image names extracted from filepaths
"""
with open(filepath, 'r', encoding='utf-8') as f:
data = json.load(f)
image_names = set()
# Extract image names from all results
for result in data.get('results', []):
filepath_str = result.get('filepath', '')
if filepath_str:
# Extract filename without extension
image_name = extract_image_names_from_filepath(filepath_str)
image_names.add(image_name)
# Also try to match different variations
# Some images might have different naming patterns
filename = Path(filepath_str).name
# Remove extension and add to set
filename_without_ext = Path(filename).stem
image_names.add(filename_without_ext)
return image_names
def load_label_data(filepath: str) -> List[Dict]:
"""
Load label data from JSON file.
Args:
filepath: Path to label_data.json
Returns:
List of label data samples
"""
with open(filepath, 'r', encoding='utf-8') as f:
return json.load(f)
def filter_label_data(label_data: List[Dict], valid_image_names: Set[str]) -> List[Dict]:
"""
Filter label data to keep only samples with matching image names.
Args:
label_data: List of label data samples
valid_image_names: Set of valid image names from dbscan results
Returns:
Filtered list of label data samples
"""
filtered_data = []
for sample in label_data:
image_name = sample.get('image', '')
# Check if the image name matches any of the valid names
# We'll check various matching strategies
if (image_name in valid_image_names or
# Check if any valid name is contained in the image name
any(valid_name in image_name for valid_name in valid_image_names) or
# Check if image name is contained in any valid name
any(image_name in valid_name for valid_name in valid_image_names)):
filtered_data.append(sample)
return filtered_data
def save_filtered_data(filtered_data: List[Dict], output_filepath: str):
"""
Save filtered data to JSON file.
Args:
filtered_data: Filtered label data
output_filepath: Path to save the filtered data
"""
with open(output_filepath, 'w', encoding='utf-8') as f:
json.dump(filtered_data, f, ensure_ascii=False, indent=2)
def main():
"""Main function to execute the filtering process."""
# Input file paths
# dbscan_results_path = '014_dbscan_filtered_results_temp.json'
# dbscan_results_path = '/home/nguyendc/sonnh/embedding-clustering/filter/027_dbscan_v3_filtered_results_temp.json'
dbscan_results_path = '/home/nguyendc/sonnh/embedding-clustering/filter/029_dbscan_v3_filtered_results_temp_30.json'
print(dbscan_results_path)
label_data_path = '/home/nguyendc/sonnh/embedding-clustering/filter/label_data.json'
# Output file path
# output_path = '014_filtered_label_data_dbscan_v2_30_percent_amount_data.json'
output_path = '030_dbscan_v3_retrain_014.json'
# Check if input files exist
if not os.path.exists(dbscan_results_path):
print(f"Error: {dbscan_results_path} not found!")
return
if not os.path.exists(label_data_path):
print(f"Error: {label_data_path} not found!")
return
print("Loading dbscan filtered results...")
valid_image_names = load_dbscan_filtered_results(dbscan_results_path)
print(f"Found {len(valid_image_names)} unique image names in dbscan results")
print("Loading label data...")
label_data = load_label_data(label_data_path)
print(f"Loaded {len(label_data)} samples from label data")
print("Filtering label data...")
filtered_data = filter_label_data(label_data, valid_image_names)
print(f"Filtered to {len(filtered_data)} samples")
print(f"Saving filtered data to {output_path}...")
save_filtered_data(filtered_data, output_path)
print("Filtering completed successfully!")
print(f"Original samples: {len(label_data)}")
print(f"Filtered samples: {len(filtered_data)}")
print(f"Reduction: {len(label_data) - len(filtered_data)} samples removed")
print(f"Retention rate: {len(filtered_data)/len(label_data)*100:.2f}%")
if __name__ == "__main__":
main()

326
filter/dbscan.py Normal file
View File

@@ -0,0 +1,326 @@
#!/usr/bin/env python3
"""
DBSCAN Clustering Filter
Filters clustering results based on specific criteria:
- For each cluster: select 50% of points
- 25% from center region (closest to centroid)
- 25% from border region (furthest from centroid)
- All noise points are selected
- Uses cosine distance metric
"""
import json
import numpy as np
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_distances
import argparse
import os
from pathlib import Path
class DBSCANFilter:
def __init__(self, embeddings_path, clustering_results_path):
"""
Initialize DBSCAN filter
Args:
embeddings_path: Path to embeddings JSON file
clustering_results_path: Path to DBSCAN clustering results JSON
"""
self.embeddings_path = embeddings_path
self.clustering_results_path = clustering_results_path
self.embeddings = None
self.embeddings_normalized = None
self.clustering_results = None
self.filepath_to_embedding = {}
def load_data(self):
"""Load embeddings and clustering results"""
print("Loading embeddings...")
with open(self.embeddings_path, 'r') as f:
embeddings_data = json.load(f)
# Create mapping from filepath to embedding
embeddings_list = []
filepaths = []
for item in embeddings_data:
self.filepath_to_embedding[item['filepath']] = item['embedding']
embeddings_list.append(item['embedding'])
filepaths.append(item['filepath'])
self.embeddings = np.array(embeddings_list, dtype=np.float32)
self.embeddings_normalized = normalize(self.embeddings, norm='l2')
print(f"Loaded {len(embeddings_list)} embeddings")
print("Loading clustering results...")
with open(self.clustering_results_path, 'r') as f:
self.clustering_results = json.load(f)
print(f"Loaded clustering results: {self.clustering_results['n_clusters']} clusters, "
f"{self.clustering_results['n_samples']} samples")
def group_by_clusters(self):
"""Group data points by cluster labels"""
clusters = {}
noise_points = []
for result in self.clustering_results['results']:
cluster_id = result['cluster']
filepath = result['filepath']
if 'is_noise' in result:
is_noise = result['is_noise']
else:
is_noise = False
if is_noise or cluster_id == -1:
noise_points.append({
'filepath': filepath,
'embedding': self.filepath_to_embedding[filepath]
})
else:
if cluster_id not in clusters:
clusters[cluster_id] = []
clusters[cluster_id].append({
'filepath': filepath,
'embedding': self.filepath_to_embedding[filepath]
})
return clusters, noise_points
def calculate_cluster_centroid(self, cluster_points):
"""Calculate centroid of a cluster using normalized embeddings"""
embeddings = np.array([point['embedding'] for point in cluster_points])
embeddings_normalized = normalize(embeddings, norm='l2')
# For cosine distance, centroid is the normalized mean
centroid = np.mean(embeddings_normalized, axis=0)
centroid_normalized = normalize(centroid.reshape(1, -1), norm='l2')[0]
return centroid_normalized
def calculate_cosine_distances_to_centroid(self, cluster_points, centroid):
"""Calculate cosine distances from each point to cluster centroid"""
embeddings = np.array([point['embedding'] for point in cluster_points])
embeddings_normalized = normalize(embeddings, norm='l2')
# Calculate cosine distances to centroid
distances = cosine_distances(embeddings_normalized, centroid.reshape(1, -1)).flatten()
return distances
# v1 0.5 data, 0.5 center 0.5 border
# v2 0.5 data, 0.25 center 0.75 border
# def filter_cluster(self, cluster_points, selection_ratio=0.5):
# v3 0.75 data, 0.25 center 0.75 border
def filter_cluster(self, cluster_points, selection_ratio=0.5):
"""
Filter points from a cluster
Args:
cluster_points: List of points in the cluster
selection_ratio: Ratio of points to select (default: 0.5 = 50%)
Returns:
List of selected points
"""
if len(cluster_points) == 0:
return []
# Calculate how many points to select
total_points = len(cluster_points)
num_to_select = max(15, int(total_points * selection_ratio))
# If we need to select all or almost all points, just return all
if num_to_select >= total_points:
return cluster_points
# Calculate centroid
centroid = self.calculate_cluster_centroid(cluster_points)
# Calculate distances to centroid
distances = self.calculate_cosine_distances_to_centroid(cluster_points, centroid)
# Create list of (point, distance) pairs
point_distance_pairs = list(zip(cluster_points, distances))
# Sort by distance (closest to furthest from centroid)
point_distance_pairs.sort(key=lambda x: x[1])
# Calculate how many points to select from center and border
# 25% from center (closest to centroid)
center_count = max(1, int(num_to_select * 0.25)) # 50% of selected points from center
# 25% from border (furthest from centroid)
border_count = num_to_select - center_count # remaining from border
selected_points = []
# Select center points (closest to centroid)
center_points = [pair[0] for pair in point_distance_pairs[:center_count]]
selected_points.extend(center_points)
# Select border points (furthest from centroid)
if border_count > 0:
border_points = [pair[0] for pair in point_distance_pairs[-border_count:]]
selected_points.extend(border_points)
print(f"Cluster with {total_points} points -> selected {len(selected_points)} points "
f"({center_count} center + {border_count} border)")
return selected_points
def filter_all_clusters(self):
"""Filter all clusters according to the specified criteria"""
print("\n" + "="*60)
print("FILTERING DBSCAN CLUSTERING RESULTS")
print("="*60)
clusters, noise_points = self.group_by_clusters()
print(f"Found {len(clusters)} clusters and {len(noise_points)} noise points")
filtered_results = []
# Process each cluster
for cluster_id, cluster_points in clusters.items():
print(f"\nProcessing Cluster {cluster_id}:")
filtered_points = self.filter_cluster(cluster_points)
# Add cluster information
for point in filtered_points:
filtered_results.append({
'filepath': point['filepath'],
'cluster': cluster_id,
'is_noise': False,
'selection_type': 'cluster_filtered'
})
# Add all noise points
print(f"\nAdding all {len(noise_points)} noise points...")
for point in noise_points:
filtered_results.append({
'filepath': point['filepath'],
'cluster': -1,
'is_noise': True,
'selection_type': 'noise'
})
return filtered_results
def save_filtered_results(self, filtered_results, output_path=None):
"""Save filtered results to JSON file"""
if output_path is None:
# Generate output filename based on input
base_name = Path(self.clustering_results_path).stem
output_path = f"{base_name}_filtered.json"
# Create summary statistics
cluster_stats = {}
noise_count = 0
for result in filtered_results:
if result['is_noise']:
noise_count += 1
else:
cluster_id = result['cluster']
if cluster_id not in cluster_stats:
cluster_stats[cluster_id] = 0
cluster_stats[cluster_id] += 1
# Prepare output data
output_data = {
"method": "DBSCAN_FILTERED",
"original_n_clusters": self.clustering_results['n_clusters'],
"original_n_samples": self.clustering_results['n_samples'],
"filtered_n_samples": len(filtered_results),
"filtering_criteria": {
"cluster_selection_ratio": 0.5,
"center_points_ratio": 0.5, # 50% of selected points from center
"border_points_ratio": 0.5, # 50% of selected points from border
"noise_points": "all_selected"
},
"cluster_statistics": cluster_stats,
"noise_points": noise_count,
"results": filtered_results
}
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(output_data, f, indent=4, ensure_ascii=False)
print("\n" + "="*60)
print("FILTERING SUMMARY")
print("="*60)
print(f"Original samples: {self.clustering_results['n_samples']}")
print(f"Filtered samples: {len(filtered_results)}")
print(f"Reduction ratio: {len(filtered_results)/self.clustering_results['n_samples']:.2%}")
print("\nCluster breakdown:")
for cluster_id, count in sorted(cluster_stats.items()):
print(f" Cluster {cluster_id}: {count} points")
print(f" Noise points: {noise_count} points")
print(f"\nFiltered results saved to: {output_path}")
return output_path
def create_filepath_list(self, filtered_results, output_txt_path=None):
"""Create a simple text file with filtered filepaths"""
if output_txt_path is None:
base_name = Path(self.clustering_results_path).stem
output_txt_path = f"{base_name}_filtered_filepaths.txt"
filepaths = [result['filepath'] for result in filtered_results]
with open(output_txt_path, 'w', encoding='utf-8') as f:
for filepath in filepaths:
f.write(f"{filepath}\n")
print(f"Filepath list saved to: {output_txt_path}")
return output_txt_path
def main():
parser = argparse.ArgumentParser(description="Filter DBSCAN clustering results")
parser.add_argument("--embeddings_path", required=True,
help="Path to embeddings JSON file")
parser.add_argument("--clustering_results_path", required=True,
help="Path to DBSCAN clustering results JSON file")
parser.add_argument("--output_path",
help="Output path for filtered results (optional)")
parser.add_argument("--create_filepath_list", action="store_true",
help="Also create a simple text file with filtered filepaths")
args = parser.parse_args()
# Validate input files exist
if not os.path.exists(args.embeddings_path):
print(f"Error: Embeddings file not found: {args.embeddings_path}")
return
if not os.path.exists(args.clustering_results_path):
print(f"Error: Clustering results file not found: {args.clustering_results_path}")
return
# Initialize filter
filter_obj = DBSCANFilter(args.embeddings_path, args.clustering_results_path)
# Load data
filter_obj.load_data()
# Filter clusters
filtered_results = filter_obj.filter_all_clusters()
# Save results
filter_obj.save_filtered_results(filtered_results, args.output_path)
# Create filepath list if requested
if args.create_filepath_list:
filter_obj.create_filepath_list(filtered_results)
print("\nFiltering completed successfully!")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,429 @@
#!/usr/bin/env python3
"""
DBSCAN Clustering Filter
Filters clustering results based on specific criteria:
- For each cluster: select 50% of points
- 25% from center region (closest to centroid)
- 25% from border region (furthest from centroid)
- All noise points are selected
- Uses cosine distance metric
"""
import json
import numpy as np
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_distances
import argparse
import os
from pathlib import Path
import random
class DBSCANFilter:
def __init__(self, embeddings_path, clustering_results_path):
"""
Initialize DBSCAN filter
Args:
embeddings_path: Path to embeddings JSON file
clustering_results_path: Path to DBSCAN clustering results JSON
"""
self.embeddings_path = embeddings_path
self.clustering_results_path = clustering_results_path
self.embeddings = None
self.embeddings_normalized = None
self.clustering_results = None
self.filepath_to_embedding = {}
def load_data(self):
"""Load embeddings and clustering results"""
print("Loading embeddings...")
with open(self.embeddings_path, 'r') as f:
embeddings_data = json.load(f)
# Create mapping from filepath to embedding
embeddings_list = []
filepaths = []
for item in embeddings_data:
self.filepath_to_embedding[item['filepath']] = item['embedding']
embeddings_list.append(item['embedding'])
filepaths.append(item['filepath'])
self.embeddings = np.array(embeddings_list, dtype=np.float32)
self.embeddings_normalized = normalize(self.embeddings, norm='l2')
print(f"Loaded {len(embeddings_list)} embeddings")
print("Loading clustering results...")
with open(self.clustering_results_path, 'r') as f:
self.clustering_results = json.load(f)
print(f"Loaded clustering results: {self.clustering_results['n_clusters']} clusters, "
f"{self.clustering_results['n_samples']} samples")
def group_by_clusters(self):
"""Group data points by cluster labels"""
clusters = {}
noise_points = []
for result in self.clustering_results['results']:
cluster_id = result['cluster']
filepath = result['filepath']
if 'is_noise' in result:
is_noise = result['is_noise']
else:
is_noise = False
if is_noise or cluster_id == -1:
noise_points.append({
'filepath': filepath,
'embedding': self.filepath_to_embedding[filepath]
})
else:
if cluster_id not in clusters:
clusters[cluster_id] = []
clusters[cluster_id].append({
'filepath': filepath,
'embedding': self.filepath_to_embedding[filepath]
})
return clusters, noise_points
def calculate_cluster_centroid(self, cluster_points):
"""Calculate centroid of a cluster using normalized embeddings"""
embeddings = np.array([point['embedding'] for point in cluster_points])
embeddings_normalized = normalize(embeddings, norm='l2')
# For cosine distance, centroid is the normalized mean
centroid = np.mean(embeddings_normalized, axis=0)
centroid_normalized = normalize(centroid.reshape(1, -1), norm='l2')[0]
return centroid_normalized
def calculate_cosine_distances_to_centroid(self, cluster_points, centroid):
"""Calculate cosine distances from each point to cluster centroid"""
embeddings = np.array([point['embedding'] for point in cluster_points])
embeddings_normalized = normalize(embeddings, norm='l2')
# Calculate cosine distances to centroid
distances = cosine_distances(embeddings_normalized, centroid.reshape(1, -1)).flatten()
return distances
# v3
def filter_cluster(self, cluster_points, selection_ratio=0.3):
"""
Filter points from a cluster
Args:
cluster_points: List of points in the cluster
selection_ratio: Ratio of points to select (default: 0.5 = 50%)
Returns:
List of selected points
"""
if len(cluster_points) == 0:
return []
# Calculate how many points to select
total_points = len(cluster_points)
num_to_select = max(15, int(total_points * selection_ratio))
# If we need to select all or almost all points, just return all
if num_to_select >= total_points:
return cluster_points
# Calculate centroid
centroid = self.calculate_cluster_centroid(cluster_points)
# Calculate distances to centroid
distances = self.calculate_cosine_distances_to_centroid(cluster_points, centroid)
# Create list of (point, distance) pairs
point_distance_pairs = list(zip(cluster_points, distances))
# Sort by distance (closest to furthest from centroid)
point_distance_pairs.sort(key=lambda x: x[1])
dis = 0.1
# count_center = sum(1 for pair in point_distance_pairs if pair[1] < dis)
all_center_points = [pair[0] for pair in point_distance_pairs if pair[1] < dis]
print(f"Number of center points (distance < {dis}): {len(all_center_points)}")
# count_border = sum(1 for pair in point_distance_pairs if pair[1] >= dis)
all_border_points = [pair[0] for pair in point_distance_pairs if pair[1] >= dis]
print(f"Number of border points (distance >= {dis}): {len(all_border_points)}")
# Calculate how many points to select from center and border
n_center = len(all_center_points)
center_count = max(1, int(n_center * 0.15))
n_border = len(all_border_points)
if n_border < 70:
border_count = n_border
else:
border_count = max(0, int(n_border * 0.3)) # remaining from border
selected_points = []
random.seed(42)
# Select center points (closest to centroid)
# center_points = [pair[0] for pair in point_distance_pairs[:center_count]]
center_points = random.sample(all_center_points, center_count)
selected_points.extend(center_points)
# # Select border points (furthest from centroid)
if border_count > 0:
# border_points = [pair[0] for pair in point_distance_pairs[-border_count:]]
border_points = random.sample(all_border_points, border_count)
selected_points.extend(border_points)
print(f"Cluster with {total_points} points -> selected {len(selected_points)} points "
f"({center_count} center + {border_count} border)")
return selected_points
def filter_all_clusters(self):
"""Filter all clusters according to the specified criteria"""
print("\n" + "="*60)
print("FILTERING DBSCAN CLUSTERING RESULTS")
print("="*60)
clusters, noise_points = self.group_by_clusters()
print(f"Found {len(clusters)} clusters and {len(noise_points)} noise points")
filtered_results = []
# Process each cluster
for cluster_id, cluster_points in clusters.items():
print(f"\nProcessing Cluster {cluster_id}:")
filtered_points = self.filter_cluster(cluster_points)
# Add cluster information
for point in filtered_points:
filtered_results.append({
'filepath': point['filepath'],
'cluster': cluster_id,
'is_noise': False,
'selection_type': 'cluster_filtered'
})
# Add all noise points
print(f"\nAdding all {len(noise_points)} noise points...")
for point in noise_points:
filtered_results.append({
'filepath': point['filepath'],
'cluster': -1,
'is_noise': True,
'selection_type': 'noise'
})
return filtered_results
def save_full_clusters_separately(self, output_dir=None):
"""Save ALL points of each cluster (and noise) into separate JSON files.
The output format for each file matches the existing aggregated format
produced by save_filtered_results (keys / structure unchanged). Only the
content (subset of results) differs. Noise is treated as a cluster with id -1.
Args:
output_dir: Optional directory to place per-cluster files. If None,
files are written next to the clustering_results file.
Returns:
List of written file paths.
"""
clusters, noise_points = self.group_by_clusters()
base_name = Path(self.clustering_results_path).stem
if output_dir is None:
output_dir = Path('.')
else:
output_dir = Path(output_dir)
output_dir.mkdir(parents=True, exist_ok=True)
written_files = []
# Helper to build and write one cluster file
def _write_cluster_file(cluster_id, points, is_noise=False):
# Build results entries preserving field names
results_entries = []
for p in points:
results_entries.append({
'filepath': p['filepath'],
'cluster': -1 if is_noise else cluster_id,
'is_noise': is_noise,
'selection_type': 'noise' if is_noise else 'cluster_full'
})
# Stats
if is_noise:
cluster_stats = {}
noise_count = len(points)
else:
cluster_stats = {cluster_id: len(points)}
noise_count = 0
output_data = {
"method": "DBSCAN_FILTERED", # keep unchanged label
"original_n_clusters": self.clustering_results['n_clusters'],
"original_n_samples": self.clustering_results['n_samples'],
"filtered_n_samples": len(results_entries),
"filtering_criteria": {
# Keep same keys / structure (values kept as original literal values)
"cluster_selection_ratio": 0.5,
"center_points_ratio": 0.5,
"border_points_ratio": 0.5,
"noise_points": "all_selected"
},
"cluster_statistics": cluster_stats,
"noise_points": noise_count,
"results": results_entries
}
# Filename
if is_noise:
filename = f"{base_name}_cluster_noise.json"
else:
filename = f"{base_name}_cluster_{cluster_id}.json"
out_path = output_dir / filename
with open(out_path, 'w', encoding='utf-8') as f:
json.dump(output_data, f, indent=4, ensure_ascii=False)
written_files.append(str(out_path))
print(f"Saved cluster {cluster_id if not is_noise else 'noise'} -> {out_path} ({len(points)} samples)")
# Write each real cluster
for cid, pts in clusters.items():
_write_cluster_file(cid, pts, is_noise=False)
# Noise as its own cluster
if noise_points:
_write_cluster_file(-1, noise_points, is_noise=True)
return written_files
def save_filtered_results(self, filtered_results, output_path=None):
"""Save filtered results to JSON file"""
if output_path is None:
# Generate output filename based on input
base_name = Path(self.clustering_results_path).stem
output_path = f"{base_name}_filtered.json"
# Create summary statistics
cluster_stats = {}
noise_count = 0
for result in filtered_results:
if result['is_noise']:
noise_count += 1
else:
cluster_id = result['cluster']
if cluster_id not in cluster_stats:
cluster_stats[cluster_id] = 0
cluster_stats[cluster_id] += 1
# Prepare output data
output_data = {
"method": "DBSCAN_FILTERED",
"original_n_clusters": self.clustering_results['n_clusters'],
"original_n_samples": self.clustering_results['n_samples'],
"filtered_n_samples": len(filtered_results),
"filtering_criteria": {
"cluster_selection_ratio": 0.5,
"center_points_ratio": 0.5, # 50% of selected points from center
"border_points_ratio": 0.5, # 50% of selected points from border
"noise_points": "all_selected"
},
"cluster_statistics": cluster_stats,
"noise_points": noise_count,
"results": filtered_results
}
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(output_data, f, indent=4, ensure_ascii=False)
print("\n" + "="*60)
print("FILTERING SUMMARY")
print("="*60)
print(f"Original samples: {self.clustering_results['n_samples']}")
print(f"Filtered samples: {len(filtered_results)}")
print(f"Reduction ratio: {len(filtered_results)/self.clustering_results['n_samples']:.2%}")
print("\nCluster breakdown:")
for cluster_id, count in sorted(cluster_stats.items()):
print(f" Cluster {cluster_id}: {count} points")
print(f" Noise points: {noise_count} points")
print(f"\nFiltered results saved to: {output_path}")
return output_path
def create_filepath_list(self, filtered_results, output_txt_path=None):
"""Create a simple text file with filtered filepaths"""
if output_txt_path is None:
base_name = Path(self.clustering_results_path).stem
output_txt_path = f"{base_name}_filtered_filepaths.txt"
filepaths = [result['filepath'] for result in filtered_results]
with open(output_txt_path, 'w', encoding='utf-8') as f:
for filepath in filepaths:
f.write(f"{filepath}\n")
print(f"Filepath list saved to: {output_txt_path}")
return output_txt_path
def main():
parser = argparse.ArgumentParser(description="Filter DBSCAN clustering results")
parser.add_argument("--embeddings_path", required=True,
help="Path to embeddings JSON file")
parser.add_argument("--clustering_results_path", required=True,
help="Path to DBSCAN clustering results JSON file")
parser.add_argument("--output_path",
help="Output path for filtered results (optional)")
parser.add_argument("--create_filepath_list", action="store_true",
help="Also create a simple text file with filtered filepaths")
parser.add_argument("--save_full_clusters", action="store_true",
help="Save ALL points of each cluster (and noise) into separate JSON files without altering format")
parser.add_argument("--clusters_output_dir",
help="Directory to store per-cluster JSON files (default: current directory)")
args = parser.parse_args()
# Validate input files exist
if not os.path.exists(args.embeddings_path):
print(f"Error: Embeddings file not found: {args.embeddings_path}")
return
if not os.path.exists(args.clustering_results_path):
print(f"Error: Clustering results file not found: {args.clustering_results_path}")
return
# Initialize filter
filter_obj = DBSCANFilter(args.embeddings_path, args.clustering_results_path)
# Load data
filter_obj.load_data()
# Filter clusters
filtered_results = filter_obj.filter_all_clusters()
# Save results
filter_obj.save_filtered_results(filtered_results, args.output_path)
# Save full clusters separately if requested
if args.save_full_clusters:
print("\nSaving full clusters (no filtering) separately...")
filter_obj.save_full_clusters_separately(args.clusters_output_dir)
# Create filepath list if requested
if args.create_filepath_list:
filter_obj.create_filepath_list(filtered_results)
print("\nFiltering completed successfully!")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,981 @@
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/eedda33e-049b-4ab8-aee8-6f5d0c88b500_IMG_0358_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/dd1f6f53-e26f-44a1-90af-3f0167eea8e6_IMG_1045_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d05b8d2e-0d0b-4354-9f3a-5cfaacf274da_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f5e670b2-9825-444b-a6c3-4f5e2a8d2882_20250206_110853_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6e609e6b-dd09-4dd8-9192-416dd99e3fc6_IMG_20250207_135202_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/7e3f586b-8053-40b0-9902-485f6af20e7c_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/df1aed59-e90c-44b3-b554-9486d68418da_IMG_4636_1_scale_0.8.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6b927189-bc6c-49e5-aadc-9c58f56a6d68_IMG_2695_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/3f09fc00-ffd5-498a-adb8-b6805519ad3a_20250212_133526_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/dbe359c6-62bd-4697-874d-992495192860_20250212_164630_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724425031238IMG_20240417_161859.jpg_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1271a6e8-cf74-4e87-927d-abdefb4609f6_1739029106267._1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Extrait acte de naissance - 27049977-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/5ff72751-5d92-463c-a272-97fda77c34f8_2025-01-31-13187-Luc_Lisa_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/881d86b7-5bdb-4a16-bcbc-57202e0879b1_osteo_fevrier_2025_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8c0d32a4-a14c-4294-9130-d34cc8fcdffa_Piece_identite__livret_famille-2_1_scale_0.8.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/caaa1340-dc45-4133-9ff4-8bac8e2f6bbd_Facture_Lunettes_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17244250314221713370771620192636844333496435_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Extrait acte de naissance - 27060888-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/141f7ccc-37ab-4da5-9788-80a27e819b8b_Feuilles_de_soins_Psychologue_x2_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/4b0d7324-9a46-4f2c-872b-4b9c06cb0566_595570692474790_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/72347d5c-1c18-4a42-8b6c-f0a365dbaf97_FACTURE_N_14_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/46ddeec5-97bf-4175-b214-b7bd74c4fa24_Renouvin_Ferlicot_Andre_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/172415668128117241471676118541084374852557430_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27781771_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8682e432-4b4f-4152-a1d6-0e5f52eee148_Osteo_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27620389_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a87c1caa-f6cb-4fbe-a460-8f95e76217c8_IMG_20250207_100001_1_scale_0.9.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/32ffb8dc-fbd0-4edf-94b2-7b784cc72ff2_17479876010256548167621008372337_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b32c815a-62d9-40d2-b979-eb10e581f7a9_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26684583_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/33fea246-3d1b-4751-b216-3c2f5a07d26d_facture_lunettes_pat_16.5.2024_001_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/5600db4f-5884-4f54-87ce-56c8b3f5f0a0_Aurelie_GALEOTTI_Ezio__1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/preview-locataire-certificat-de-scolarite-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724156469378osteopathe_renau_anais_le_31-07-2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25352522_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/eb5561dd-937b-4c0b-b6c6-56da8b528c94_facture_vision_claire_ophtalmo_1_001_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397366229Facture_RIVALLANDChrystelle_20240413ICQO_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8588f081-a48b-4676-90e1-cabcd66a74ad_IMG_20250207_110801_1_scale_0.9.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27703140_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/831984b8-6c87-4f5d-b0a0-2d0ab5cc9061_20250207_140107_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f02ef934-5964-4230-bfab-42bb97f20398_Scan_20250206_112050_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27238995_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/RIB_Mr_Mme_PHAM-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1142334_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/172415668130417241454121786128659475795835945_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ddc16454-d05b-4e9d-b03c-50468919fef3_IMG_9601_1_scale_0.4.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c0625556-2cb1-42c8-9433-e31274c121a3_Livret_de_famille_hamidouna-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/588bd118-1770-4c2a-8d86-a6a6cbed79b2_osteopathe_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e3a47913-2535-4417-a34f-fb8213cd8174_recu_osteopathe_du_11-02-25_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Extrait acte de naissance - 27059891-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d6b93c3d-f00e-49ac-8fce-ebd2c99ec6a5_Feuille_de_soins_PROBST_Annie_20250315_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/3e613099-4659-4efd-b33f-811fc5fc764d_IMG_20250206_131824_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26692990_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658225847osteo_du_17-04-24_CHIROUX_Theo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27756973_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/0a639214-b39e-4971-8d51-d83b7d2d2c2e_facture_lentilles_17_01_2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27784796_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8dcf02a7-b647-4605-9d95-d3fda3685599_p20250212_14102586_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25003607_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26272878_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1078555_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1062996_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Acte_de_mariage_de_Joachim_Murat_et_de_Marie_Annonciade_Bonaparte-Archives-nationales-AE-I-11-12-12-2_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25375642_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/59c82220-6ed8-4ea6-937a-b40c106e7f3b_Facture_osteo_02-2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/42182356-427d-4a67-8ad2-a21775f7bf44_17390334446023078850027382669569_1_scale_0.8.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26723534_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25182408_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397366265Facture_consultation_du_090424_psychologue_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27759503_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2dbb4025-f570-49f9-83d1-08aedb2b122d_lunettes_Lou-Ann_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/befdfae1-9f66-454c-8b37-754140a6e5de_Adobe_Scan_09_fevr._2025_2_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/bigcaptureconseil_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e6c39a83-a53c-48f5-bffe-6bdb57337b33_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b0c00bc9-adf8-4972-8350-a2a51177265a_bill_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/95c188de-0800-4cdc-8f02-4c6fdadf3d66_Facture_Osteo_Annick_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25489070_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397121055F.__Louis_Guillaume_G_030424_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/62f9b609-ae07-47b7-9e9f-5b28143887e3_IMG_2284_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26765337_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/72009fc3-e88d-4cbe-b482-7f1f354c17d3_osteo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture RETOUCHE remplie AVEC TVA_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25360588_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26341092_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26722802_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27608383_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/mobilenew2_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d54b04ed-4dbc-426d-8ab9-39a42c7a5ed2_c5f12183-38cc-43e5-bffd-7844eceb9306_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26077624_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/172422615172020240401_145715_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/5dab2435-c975-40bc-89d4-fadf59c19553_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/250365a0-1f62-406f-b468-a8f87641414f_20250210_140211_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a5bd8b97-a2a8-49ae-900d-0673d2f96637_quittance_optique__1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6ba7415c-4d85-465b-bc52-d60959b9457f_Facture_NCBL_Cataracte_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/21d01f35-2873-4214-a367-be8deea76ff2_IMG_20250205_180742_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a593830c-dcbb-4553-860f-b0925a4c30ef_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/16320fb4-bbc5-4e71-b4b4-a3c6bf818b1a_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27689832_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1217517_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26328720_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25632678_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/fd94f8a5-93f4-49dd-a4c4-cf0b4765d704_Facture_Osteopathe_120225_Daoud_DEBBACHE_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27753021_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27601381_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/7ef7306d-400e-4a56-b31c-02237212c470_Scan_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/44f39110-8f97-415d-8cc1-989f0a6df226_Ostheo_22_01_25_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27679600_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/28a6084a-24a5-4922-b79d-f96dd51c5666_Factures_et_ordonnaces_optique_ABDOUL_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8019accc-33b4-4f20-95eb-3ff1e1d4db0b_nnn_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/14dc234b-2f2e-44d7-9dbe-65da4b000dbb_Lentilles_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27533786_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ee05efb9-c283-46d5-a7e7-222392db9309_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a3e845ff-e89c-4eb0-bf8d-164979dfee2b_Recu_Osteo_LIGNAC_Sebastien_1_scale_0.4.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27698460_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-27210576_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/48e19752-3534-4539-8905-e09bdeb9fddd_CamScanner_11-02-2025_21.51_1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27708551_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1221348_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27708923_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724425079184Facture_Osteopathe_Brousseau_sader_17avr24_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26189648_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/68f79cfb-bc94-42fa-b7d0-2406f95f2327_Devis_lunettes_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/baadcf11-c68e-4774-bbaf-c7dad90b4e13_DURAND_FABIEN_facture_2025-04-25_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27771477_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25224007_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/141d1472-51b6-49f1-a349-e2b5a2adbdde_Facture_osteo__1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/3374a3a0-4ff8-4056-957f-8fedca1cd314_17389546040733582030933880599903_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/7bad16be-dff9-4b92-8041-4a9c38638e29_facturation_osteo-1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26626836_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27701052_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1a3dc013-5769-4e44-aa30-de4be4852e08_Seance_ostheo_TANCHOU_David_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25949353_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8794ba0a-5d58-4d01-a1be-71f7d5c4bfe2_IMG_20250207_185936_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27651465_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1100809_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658298467Facture_Osteopatie_Melanie_HAMON_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/cbc7ddff-2ed1-441b-85cb-5c1079588a52_Mes_des_factures_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6221379e-7344-4868-9ee0-04f2c4e26874_consultation_ophtalmo27janvier2025BEAUPERINyann_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27547656_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26957916_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/96be3553-7261-4698-bf20-88ca5c826889_ReleveMensuel_Mai_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27757098_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1102494_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2b39033c-dfff-4053-8765-682d1b5ee9b2__Facture_Ostheo_Fernandez_Thibault_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26447823_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2368a8d3-147e-4d8f-b183-889e65f9b30f_Adobe_Scan_10_fevr._2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/252c4ad4-8fa3-4ed1-9508-ee77f6dce6fe_img20250206_20040851_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25192273_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26660518_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17243973662518A42024B-6214-4DDE-A3C0-3CA27FC89720_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/5b11f47d-80de-4eb6-854c-4fe8bd4e4903_facture_osteo_30.01.25_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b98b1381-e3b2-41b9-87c8-a75b0a7d5e15_ReleveMensuelDecembre2024_CPAM_optique_Auregane_Daden_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/51062628-4c2d-444e-9359-ff2acb6104a8_facture_ophtalmologue_1_1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/9496a72b-9c2c-4542-867f-9fc4dad457c1_IMG_20250207_084759_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25911689_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2475-6_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17243969995542300676-guerrache-jessica-185907478364f5a55cbbbda_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27162326_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/7925fe35-96a3-4739-a64b-4d1c6ab90eeb_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27614709_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27580555_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/172415667782120240819_193126_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25979892_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26723851_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/modele-bon-de-livraison-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724396999541Image_2_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27712494_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6476f20b-dbac-4bba-9be0-3c22801eb59c_CamScanner_11-02-2025_21.51_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/3766651c-f086-4765-9557-c423473ea246_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/conseilmobile_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/137e6371-6478-4a36-8383-0090366a49fe_facture_Gros_Barbara_3_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27554761_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27597800_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25117297_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/3bae1b45-0f26-4056-b59c-a04c52679135_recu-fac-2025-1036_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/072fc9e8-d2ba-4bc7-9569-712845984ff8_Facture_ostheo_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27618927_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Attestation de droits RO-25180485-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-GED E-J22-1328268_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27668729_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/02c6c398-ddf5-47e9-be88-a27742a2683c_recu-fac-2025-02-002979_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e7f2d26a-0abe-479b-bedf-02ea97f357d8_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26722562_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/RIB-TESTNHIDEUX-UT-9-4-2025_152144-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1112007_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/actmariaaafd02qfqdfdfe5024424af_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26689382_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/91c41171-ac88-4ce4-8f15-48c4ad2c5aa2_Devis_lunette_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/56f4bf93-8fe8-4273-a48a-4d2551892ec8_facture_osthero_2_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27586729_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26772899_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c691b973-9ac9-4fc3-aaf2-d6dde41c98e0_livret_de_famille-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26702327_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27701057_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1184307_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/3860a65d-b7fe-45ed-942d-5832de69430e_Facture_osteo_15-01-25_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b6f16c2c-bd49-4062-b2ee-486cef3a62a7_Facture_Mr_Boulahia_Mohammed_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/fixIBULL_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f19b0290-ea44-45ae-9c34-719083ec4671_Lentilles_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2b987e55-9575-4a74-a557-94540c2a7548_Scan_CPN_STL_Siat_20250207161910_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27588122_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f482ebef-e885-4627-a927-96ad54ab5aa0_Facture_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ffa6f7d2-f14a-4a66-bc63-69977fb71a7a_2_consult_hypnose__1_scale_0.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/172563258396120240419_114051_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26709735_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ed2edb2f-d8f6-4194-8a2e-a42e49d6c317_2025-02-12_18-25_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25236328_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27146817_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d3ef8b3d-b78f-4455-b2d6-1c9ef9ec5e60_Quittance_Ophtalmologue_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658225861IMG_4399_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1d4346b3-1322-457d-9436-ea1d82f74cea_Facture_ophtalmo_enfants_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1150ac6a-abdc-409d-b7ca-296e7b35769c_Facture_BOLZERJean_Francois_20250131GMAM_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25236329_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a63e27cd-9d32-4806-9cb9-e3bc43a3c999_image03-02-2025-093926_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17243971209982400066-guerrache-jessica-83960773165b0ce7dbef5b_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b01c4fa3-53cb-49a7-8e24-a8f7aabe65f2_2025-04-28_consultation_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ec5f9e8d-e15a-421c-9fa8-6274420215cd_Facture_lentilles_2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b410345a-31de-4397-a896-bc11d97e524c_FACTURE_OPHTALMOLOGISTE_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/24ca9fa1-064d-49a0-8d54-d6f0171c8187_Facture_lunette_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/75f7652c-7e29-494f-a02f-dd2e2c47c9e0_CamScanner_06-02-2025_15_32_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/847603bb-2553-4d83-ace0-2b6ff79fa244_facture_osteo_06-02-2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25841816_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/91d588d0-aa0a-438c-91c2-06bbe858810c_n96_DUBOIS_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27691255_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25330401_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f8c6fe47-9608-4f9e-938b-117bfa4dcd30_ostheopatie_2025-02-12_Grenot-JLuc_Mme-Tabary_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27710953_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/7c2574b4-814d-46a5-aa87-a4402f46be9c_DAGHOU_Mustapha_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27654407_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/0cae671e-6b7f-41aa-a0ab-d74b8e15e424_DIMART_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8d5e5dfe-71a8-4a05-a376-8727cd264ffd_osteopathe_do_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a9f88b8f-a751-478e-9338-2d31fb1062e7_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724226151715Facture_Julie_Boissel_2_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25919222_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26729570_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1106092_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27784971_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26592477_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1f5c9f90-eb0c-45aa-b809-e08a7c7593b8_SOPHRO_FACTURE_MME_POIROT_MAURICETTE_JANV_25_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25279751_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658376437factosteo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1d3b5d22-87f2-4c89-90e5-8b0350bca84e_facture__Surre_Chloe_03_fevrier_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f0395d11-18fb-4b9f-bee1-5ca75d656d14_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b0977b46-747a-418c-bd20-9e99472917a2_Screenshot_20250209-143006_Samsung_Notes_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2af3fe93-a285-4ced-898b-90b4106bef8c_osteo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27653858_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27474258_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27576127_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d317e79c-0485-42f8-b24d-413d94e7a779_Livret_de_famille-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/34162b51-6c3f-4dcc-a18a-d6b9e45e4f22_Recu_ophtalmo_Cadet_Corine_2025-03-09_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25187936_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26107527_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27554784_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Quittance de Loyer 11.24 Mr NGUYEN-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/834a074c-811f-4d27-9c19-134a4b9ab2ea_FACTURE-25-02-208-GEORGES_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27717154_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25273058_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2b3ed78c-1a3c-40b3-b997-0e74f6ba497f_FENOLLAR_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26691031_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/9567846a-09af-4b73-9194-28f51c7bebce_Chevance_Lena_facture_2025-02-11_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/abd25cab-e2d5-456c-ac6c-d209b698daf2_Le_Monnier_Fauche_Nolan_facture_2025-01-31_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724156469373facture_psy_DOMI_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c7bbe834-632d-4fb6-8e24-c9c5caf488bc_IMG_4966_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27664783_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1105977_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/04fde30d-7891-439d-85e7-285a0d8ffee1_IMG_1550_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1b68fc1f-1e79-4f0b-a9dd-700931da1db1_FACTURE-25-02-542-PLANVIER_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26723200_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1196314_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26255763_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724425031280Facture_Osteopathe_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2212e210-bbbc-466f-b30c-fce89df87489_20250210_134936_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25263636_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/879cf76a-68c1-40d7-b444-0ff3341c97de_facture-zami-isabelle-20250130-1703_1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/4fa21bd9-393e-41d4-9af2-bf42004f4e94_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1498c5f5-22cf-41d4-87f0-9b295ff481f4_IMG_5659_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/0abfd7b9-9456-4b68-a489-bd990ef9a78e_livret_de_famille-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ff7bd554-e95b-411f-b07a-4347a96eb077_IMG_20250507_112754_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/cd3050c7-d380-4ecc-9665-c7209b6035f9_2025-02-07-malik-chaouche_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27749409_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26005448_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27699508_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c2b83174-b925-4b53-a364-4f90864eef93_pdfOlivier_B_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/7ad6d85f-7245-4c05-8c79-512ed13fe0d5_lunettes_001_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/bef5bd2d-a370-457f-9d30-f7ce27c4dd70_Facture_2024-12-002_-_S_Frederic_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/626c99a4-986b-401f-b91e-9b6c0f15cce6_Commande_n100691223_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/36571d26-dd70-4596-bcaa-3b77c37e0c38_FACTURE_ETIOPATE_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a4949824-1296-4d97-a46a-6173dee1f3ad_FACTURE_LENTILLES_09_2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25192590_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Elec+gaz 17.05.25 Auray-2_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2a9dd772-6d23-44ae-95f7-5de05ffd1167_fact_podologue_7-2-25_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/7cec0f10-5643-45db-a0e7-96d14f8a243f_osteo-0924-coste_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27671269_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25034588_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27697454_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/661c031f-1da5-4947-bf0e-3f5ae7267b99_20250206_173541_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27773374_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397125633ostheopate4_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25358481_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25416514_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25241935_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26760297_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/534b115b-9997-48c3-bc09-ffb89e5bddf3_Screenshot_20250212_150902_com.google.android.apps.docs_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/4d989506-6dba-451c-97ab-6c371618298f_8089465_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a7524bf0-1810-47ff-96c1-e744cbb863be_RDV_04.02.25FA-7564-000052_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26729830_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25375639_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397121098Screenshot_20240417_102951_Samsung_Notes_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27776512_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d40eaef8-860d-43f7-9156-3e2dcd09e498_Screenshot_20250211_104714_cn_wps_moffice_eng_PDFReader2_edit_2442102462482098_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724156677851ostheo_24_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d9e1cb2a-b44b-4cc8-b07a-030ddf3acd28_Ordonnance_lentilles_Mary_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27148581_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26754469_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27661590_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Impression-facture_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/facture-engie-page-2_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27651465_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27663184_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b1b5e4da-2462-48c1-a9b9-ff75ecd61e37_IMG_20250206_142210_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6f92aef5-73c1-404e-aabe-29f40cffd8b9_facture-20250203003_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/172415667785217241478517061500135449349862696_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25198509_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27671269_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724425031408Facture_psy_fevrier_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658376424KM_C250i24032713020_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c3c2a06b-9a19-4bc4-8e61-bce909c2d7eb_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25246953_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2f91e647-c8b5-42c5-9a4e-e6dc639bd488_2025-02-00097_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/0ca9563f-d46d-4f22-8f28-a19eca15ae40_Scan0015_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-27210535_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26107528_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25360470_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724226587949AnyScanner_03_25_2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/da1943b5-80c4-4105-8b48-78f49245b7c4_Facture_patient__1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27703118_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/484d05c2-b89b-4ec0-8968-9ae6a5360f6a_DetailDesPaiements_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27572543_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658126093MM_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/172465812606717133766883857695232286776891989_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e86a4c65-f1d6-4555-83e9-ca7ed9fd7352_IMG_4440_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b2d1c0ae-7f30-4730-82e0-39361ceba000_Facture_du_08.02.25_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25076150_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397366294irm_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27235184_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25352522_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27784796_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c64d0b90-f96f-432c-8ad7-52567dd9efc4_DRAPERI_Delphine_734_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27686118_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/7f0110a2-87fa-40db-9d29-b89fbc21a5ac_Ostheo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/3ee0ac3b-4d77-4692-80fa-15004f8a3499_facture_ophta_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b685d5dd-e01e-4fa9-b7aa-17abd1a25b74_IMG_5544_1_scale_0.9.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26479073_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a2da2013-8b36-453d-b6cc-d9c8d3336947_Osteo_janvier_2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-27210547_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1180505_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1433196e-97f1-4452-a061-8ec311e3bddd_IMG_4464_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/5440b8b5-5ea3-4a72-a8bc-23d578333d9b_Osteo_severine_2025_20250206_0001_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26077623_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/dc224b52-7d9a-4cda-9ac9-f12f4a7c1762_20250428102258_001_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27646505_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f8f29a91-6c43-4768-b4ad-ebd1c1cedf5b_17392679302675416955239120322151_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/551dd33d-0011-4887-b115-4ddf28ee0e2f_fact_barret_l_2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-27210534_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26764274_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/certificat-cérémonie-alice-merveille_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25106529_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1498644-copie-de-l-acte-de-divorce-d-antonio-580x0-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Attestation de droits RO-25935660-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25360588_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/offers1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-GED N-R-2323015_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/085f8bfc-690a-4daf-9559-865274181242_Facture_CIANO_Ophtamologique_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8f0e72ca-64e5-4797-87bc-9378b842433a_CF_1676107_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/39f80d3f-06e9-4bf5-9a57-5bbcb27f3321_facture_osteopathe_12.02.25_001_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27701997_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b7e34b86-fdb8-4ea4-b646-5bee350b947c_IMG_3542_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25007101_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25106530_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/df2d6a0f-3528-4629-a783-3dea426bcad1_IMG_4520_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1084027_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27702698_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2f3c1f06-a109-4236-944c-c6343d80bbe3_IMG_20250211_0001_1_scale_0.8.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/7d3ed5db-8ea9-48bb-870b-d67be0b7eb95_FACTURE_OSTEOPATHE_FEVRIER_2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f2d7862b-6953-42c0-8fce-22854ae760f9_Osteopathe__1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/cbb73a7f-8331-43df-a7f5-1562efc4bbff_IMG_4077_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/88f6c32a-2ae7-476a-b62f-59f8f74c3289_Facture_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/458af3c4-37ff-4eaf-b88c-99ea6a299a8d_IMG_4610_1_scale_0.4.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e389253a-a9f3-4559-b704-61be921eb0d3_Facture_lentilles_decembre_2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25164623_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26610950_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27778240_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d783daf8-282e-4860-af41-904aa36f5405_PXL_20250207_1233101042_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/64423bd1-67f9-49e4-8192-c9e6fae257fc_1739110695856573011383863310512_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25374417_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/02e9fec4-4a39-431e-ae25-2f758a1c1f7f_20250206_174531_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Magasinier-Exemple-de-CV_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/bad7f9ef-347f-4ab6-b69d-c99a726769f2_20250208125718_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/file-5e85e2ead7b5b_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26705902_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26398662_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/7957eec3-5b84-4e8d-bde3-99aa88ceef07_Facture_opticien_Mme_HERMANN_CLAUDETTE_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture psychologie - 27641339_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397125658received_451290077337699_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/717d0d8d-3065-41d4-99d9-8c42dbcab6a8_Viani_Doris_facture_2025-02-01_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17243969994532400313-simon-heilyn-1693195547661d58d7e884c_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/84731b17-5743-4240-8aab-0f1bcc603925_chiropracteur_1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724156469367facture_psy_DOMI_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27702202_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25406488_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1203226_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25489071_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/carte-tiers-payant_2025-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e0e762ca-3c0c-499e-813f-837b41cb9208_20250206_160143_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1198078_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/21c0bb8a-301e-43e4-b9d4-121a6e1d7106_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f4e2083e-3640-4988-853e-83579e9fed87_Adobe_Scan_11_fevr._2025_1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25310144_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26401996_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/57188e7d-280a-4f94-9d7d-6af62ede30ed_lunettes_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1228504_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26761307_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/9421b4f1-e9f7-42ea-8d9b-833361b16a20_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/bdb308da-16f5-4370-b1a4-8ffe4f063d84_2025-02-06-13-21-14-309_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/bebdddca-5e13-4baa-9b5a-ea26cd0e2db5_Facture_optalmologie_acquitee_du_28.03.2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26219439_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/preview-facture-2014-09-10-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/0a26adda-beef-4977-966c-12efc73f1d15_20250129_190625_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724425031429IMG_20240417_0001_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26740934_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d172cce4-7c6e-4954-93ff-ba8bba04e7db_2025-02-04_Facture_BAUDAIS_Ingrid_4_fevrier_2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25346392_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25190164_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26555547_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25864357_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27701064_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27778165_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/9536b482-958e-4978-9989-3bd87a85ccf6_Ostheo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f2d3f12d-cdd2-4ec6-af22-0aeb3f1f6467_MONIQUE_001_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26610949_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2c07c6c0-9ab6-4578-8a88-704ea038047e_Screenshot_20250211_101627_M365_Copilot_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/RIB DU SIE SCEAUX-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27705929_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f1f52e8d-326c-4714-ac75-c9ade1d945ca_facture_osteopathe_tosi_christine_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/5df53e4c-13e5-4c12-91a2-24236c5cfa5f_IMG_20250207_154557_1_scale_0.9.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/beb224ad-6c43-4abc-b80c-d438d00a77c2_Facture_TP_Client_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c20881d7-861d-44dd-b69e-a3f5ed4768ea_Osteopathe_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26722801_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27703581_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c2009b55-58d4-411f-a25a-3dc1e32e4a89_facture_lentille_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/5bb3a883-f00f-42a5-84e4-007ffa949d76_facture_acquittee_optique_MIHI_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/43efc0af-d04d-42a1-b638-89c32aac42d9_osteo_1_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26627281_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Liste_PJ_PasseportTalent_MembreFamille-2_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27711119_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/455854fe-84e6-4ece-9492-276410b4e422_BORDERES1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27648502_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a3745377-917b-49b9-b9fc-3e65d22c5638_BERTIN_Franck_Facture_osteopathie_du_23_01_2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/39384a4b-4117-4f56-8cca-55fd51e5b062_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/719ba06c-91ee-44dc-b7b6-4a3132f733af_FievreVillaniCarlaLunettes_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25190194_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/054de057-2011-49a3-9d11-3529ac66348d_roux_clemence_facture_2025-02-12_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26186342_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26715605_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724226587915Facture_kiro_fred_mars_2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/cc6f476b-07ef-4342-8d4d-3a490ab0d1c9_2025-02-09_22-25_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/172465822585720240416133632216_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724156677823Facture-CONSTANT-F2024-311_240820_122321_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26908261_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/82ac654c-54d4-401f-a006-96d9e46531fe_Facture_ostheo_JEAN_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/60167615-8a25-483b-9354-a6da8deb969c_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/facture-3_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e695ab7d-2b24-46df-8a84-75c6243d04ac_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26550623_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27603575_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1112614_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26705567_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26685696_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724399050413Facture_Osteo_Nathan_15_04_24_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25382470_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/RIB_Mr_Mme_PHAM-2_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26347233_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26626549_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27565028_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/9c2763f2-fb0c-4fe4-be38-6f94fba64162_optical_facture_lunettes_apres_accident_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724396999465IMG_20240416_0001_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25848670_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26771426_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Elec+gaz 17.05.25 Auray-3_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/4377eaa0-4f03-404e-86d3-fb386ae03826_facture_osteo_j3l_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26765337_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17241566778247779dccd-e833-44f2-8acf-5d5ab7077245_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f40d1b67-7690-4d39-8578-eca04cfd2b3c_Factures_Ethiopathe_Dimitri_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26479073_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27711119_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26556388_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27703142_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f539946f-af74-4bd2-92d6-36c8b867b21e_FACTURE_21.01.2025_JEAN_OSTEO_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/40e6166e-dce5-4710-aba7-538fa663ec6c_IMG_20250306_093631_1_scale_0.8.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27701669_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27611284_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658126054image16-04-2024-173146-1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/815d334e-5da4-47f8-8ab5-2dd9365f9b63_Numerisation_20250206_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d3aea37d-3c4d-49b1-bbd6-75fdea788fc9_IMG_3831_1_scale_0.8.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26771865_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27702790_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25880926_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25352901_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658298484BENTO29032024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27596586_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27784754_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e93a0fa5-82af-43d0-a3b5-f59a8b753c4f_IMG_2051_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/5e06ef95-6731-4416-a626-67039c3593dd_facture_osteo_chris_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1866f267-d6a3-49c5-80c3-e1c5d5ee6e72_HAEFFNER_Esther_Honoraires_osteeo-Schweickart_du_070225_Montant_55_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1174802_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724399050329osteo_B_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/bedf599f-99db-4613-aa22-17d52690c34c_doc00194520240521104544_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26683579_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6555a519-f9f4-4b6a-ae0d-fd491dcf9a96_Bidard_Dylan_facture_2025-02-08_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27784861_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17243971210022_factures_naturopathie_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27703160_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27701197_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1101598_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25375639_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/920c8c7c-e3b7-445f-8fcc-4d89cc33a578_facture_osteo_du_04022025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27693842_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/fd8187e1-b9eb-4727-8041-9fc9e6f1affb_0556_001_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6d5224ac-69a5-4658-b278-a7a03fef5cd8_facture_osteo_2_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17242265879332024-03-04_Osteo_facture_-Xa_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397366227Scan_Attestation_soin_MCT_15_av_2024_Mme_Nadine_ROMERO_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8eceb56a-b571-4ab6-a64e-668db6bf4ad5_chiropracteur_fevrier_25_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26349654_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27534156_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/9dfe878b-b49f-4c96-bab5-93bb2ccfc4cf_Facture_DE_LA_RIVIERE_G_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25143901_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ab1215bd-b251-4071-85aa-e24ef9dfd006_2024-12-31-valentin-petit-blanc_1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-27232722_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/84ee3980-6f2b-4943-9b11-1996dce1039f_Dossier_lentilles_Stephanie_M_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25161622_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724226151881090424_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27668729_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/499639d7-4faa-4d3e-b412-7f75da0b4e01_quercy_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25382478_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27705426_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/72bb486e-792d-45ef-b522-511ea16d56ef_IMG_5602_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Certificate of honour for filing all statutory tax returns_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/fe5aa214-30d9-44e4-a735-43ebe3eee76f_20240916_081457_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25054565_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724226151864OPPON_GEORGES_ORDO_LENT_2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26775374_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724226587918facture_kiro_vanessa_29_mars_2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25357010_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17243969995351713296832283485517389105164997_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27701052_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f1313200-92c8-4b7d-93e9-c010275f811c_osteo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26719410_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1197096_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27703114_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25176008_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724226587965Consultation_psychologie_Nathan_MERTEN_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27694953_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/mus22_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a4873987-de63-47b0-b0ef-81044554305f_IMG_20250211_163051_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1092486_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8f8adcc8-9b8f-48e0-94d5-d8cfb9d84a1f_Scanned_20250212_154615_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25632680_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1041724_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27575670_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/45ccffcd-5fe8-418b-80c2-894086de9087_facture_ophtalmo_Fred__1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/bdd3ed76-034c-4414-a72d-39cf766d272f_1739358339947320998553071824360_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/043bf393-055f-451c-ba6f-f01cac5964f0_17370239165192445587441836799654_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/323d5c7f-9ec7-42b0-9cad-d7cbd5f5c4d1_2025-02-06-simon-blondel_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/offers_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27396737_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/4969fd33-c173-4ae5-8020-28985599da71_17389252341351391827234693879142_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27622860_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6e41e75d-289c-4d07-a65c-9400c0cee5b7_Ophtalmo_Eshal_compressed_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/380f5d8e-540c-4671-8b03-26b7f659b7b4_image0000001_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27604021_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/01a27c1e-afbe-428f-9261-a9a82d1b2efb_IMG_20250210_171911_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/70c197e1-93a3-4eed-9b39-6f96d8cd07f2_17392727785368655476842236029180_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17243969995522300584-guerrache-jessica-57560893664d4b237dd783_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/0b801a90-3a0a-4b0c-bc7d-56f18b938619_2025-01-23-ophelie-lenoir_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/9c2ac88c-d912-4d55-8ea3-20e2feb3b8ad_Livret_de_famille_8-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6bf53897-d7c7-4485-9393-4ebbaf952d30_2ostheoRecu_2025-02-06_092631_1_scale_0.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26052732_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/fdaa0d42-8e79-4506-b222-373c88891ff5_notehonoraire__20250212_143947_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25239523_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6589f4fa-ebac-459a-9ebe-aa84820a2905_FACTURE_LENTILLE_2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/modele-contrat-de-travail-plein-temps-city-management-lausanne_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6f544990-74c6-4c7e-8a25-08eb3f9138c8_RemboursementSoins01082024_1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26941951_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/699e17b1-2aff-4884-af8c-126367c8ca95_lunettes_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1cbf5b4f-5370-4ad4-b877-1e3aaf3259f6_Document_2025-02-05_105424_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2d21dc04-ddde-4f3f-a9a2-bd0070dc04e4_Factures_ophtalmo_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2d79f19d-3ec5-4cdf-a88b-040e733860fa_Osteopathe_2_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27690522_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/facture-engie-page-4_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724226151795facture_du_30_mars_2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/795c0773-842a-467d-b78f-07746869b761_IMG_20250228_084908_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e45ea3e1-95fe-4d22-acf4-8ed0a30d9dc6_IMG20250206200324_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/719ba06c-91ee-44dc-b7b6-4a3132f733af_FievreVillaniCarlaLunettes_4_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26592511_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ecc107be-fcec-4b98-a0ed-bd325bd27b35_recu-fac-2025-02-031_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25040113_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25864356_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27545119_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b228ea49-7729-4c79-908d-d10ffd773ece_20250211_105509_1_scale_0.8.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1071226_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1174801_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/77c1b3c3-234d-4431-8675-a5c1d12053e0_osteopathe_MOREAU_Coralie_10-02-25_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d9341373-0699-4897-9dfc-15a20235ce80_Livret_de_famille_des_2_LAMA-7_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27778049_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e3c36ee1-bf05-4ac7-b27f-c7cef109b796_Note_dhonoraires_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/74441931-f8d0-4cb0-8eb5-df4147a72bbe_mgp_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27784756_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658376426facture_psy_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/141e5445-10ab-400b-81a3-001132ec0783_ROSSIGNOL_Laurine_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724226151851OPPON_GEROGES_FACTURE_LENTILLES_2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25233929_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d79dfb79-bfef-40db-bc98-f2af328e266e_facture_ostheo_1738873157473_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/37edd5db-6709-4433-8b25-badbc7e5ca61_20250208_132125_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-27082405_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25152384_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26548441_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1140659_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26360535_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25048435_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25849685_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25280845_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/fc14b93e-73ae-44e1-82b1-0ca50ee622de_Facture_1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26505719_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26238691_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25021617_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26689382_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1098837_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17242265879312024-02-19_Osteo_facture_-Xa_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/5bb3a883-f00f-42a5-84e4-007ffa949d76_facture_acquittee_optique_MIHI_4_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/de7be139-654f-4d9b-9179-2c08f02e0e93_osteo_noelle_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/7d98b25c-938e-4d62-9e14-ddc1d119fa45_FACTURE_OSTEO_DU_7_FEVRIER_2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/bae2e636-32c5-4b2c-b2d2-a7b28b30760f_KM_C250i24082011140_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/5c719322-2486-428c-8c7e-1441497877cc_scan_tc_2025-02-07-13-36-23_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25332784_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-27082405_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/09aafdfc-4378-4d2f-be2a-f647d5138f5b_Ordonnance_marie_lunettes_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b10610ca-bc5d-4954-bbc7-f6ab6e640a2c_Facture_250210_211735_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26592510_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1188165_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/83f38474-d109-4edc-a395-0025e6259d96_Facture_250128_Osteo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25198201_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/698ea3aa-944c-41f0-89c2-46a2be3a9189_facture_577852_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/0efee497-131b-4961-a1dc-2810eedbb393_Facture_BALDACHINO.V_chiropracteur__1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26660350_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17243969995232400015-gielczynski-jeanne-131369953265965e69a20c6_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27718628_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e5090ee5-7b15-4056-9459-c12d5757b516_1739267158433_CamScanner_11-02-2025_10.45_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/dc89371d-1648-4674-ba86-57579de74299_2500352-ozturk-kaan-90009253167ab6c990cbd9_1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27670038_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25292677_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25826742_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ea64ca35-29f5-49ba-894b-13aa3421f1d6_doc00681020250513143142_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1210372_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/5c4353b0-55a4-4687-9744-d1a6da183abd_facture_lentilles_de_contact_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27555315_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1142334_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27776417_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture psychologie - 27581557_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397121107image0_4_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25058603_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/598221c3-d0b3-403a-a261-c68cbe56b453_facture_ophtalmo_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397121061Numerisation_20240417_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/acte_nass_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27781876_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27576495_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/48eb0cb0-6218-4263-a1bf-a75c4b2cfc3d_Aurelie_CLOTAULT_250213003_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1183991_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f55f9121-499d-4c25-b79b-464413d7ca6c_Quittance_Ophtalmologue_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27236936_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26483179_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724425031349MEDECINE_DOUCE_2_TOMASSELLI_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27206664_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27779029_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture psychologie-GED S-Z22-996335_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/af206337-404d-4b97-9772-baae1962c3ee_IMG_2129_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27566548_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/11a7e90a-3116-4b7a-9b8d-e6949ef0cc00_Adobe_Scan_10_fevr._2025_1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27575321_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26592477_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26112050_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-26471195_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25361117_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27752635_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/419fd893-b8ef-404d-bc49-d637110e06ad_facture_ostheo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b6a49fa0-2900-45c1-915c-58d63bf64e48_OSTEO__NASSER_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26774954_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25885703_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b53e8f7a-d934-4e3f-8d03-12e071fdcfd6_Chiro_10_Fevrier_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8f0e72ca-64e5-4797-87bc-9378b842433a_CF_1676107_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ec6baa8c-a189-4850-b310-c90bab372cc2_Facture_Krys_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d2a8cfa5-32ee-4bf7-a77c-8d48625be243_2025-02-00061_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27703139_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26694527_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/cccf5a90-7d2c-46e5-a1a9-4ec676a6964c_IMG_3533_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8a9a4ca2-1881-473c-8604-9fddbaa624a0_Ordonnance_Lentilles_Lunettes__Facture_3_scale_0.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724425079263JACOB_Francois_2024-111_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/48df624c-9ff3-4155-94b4-773bb00a967a_Facture_osteopathie_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27665791_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25203829_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/5e5d3bd8-4494-4764-a001-fba806688587_Facture_ostheo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25290421_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture psychologie-GED N-R1-941543_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27200660_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27232257_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26441046_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27777639_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25241935_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/3b2e0bc5-3f96-456f-9a21-1b6a5c2138b8_Osteopathe_Sophie_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724399050372FA_24-1136_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1114152_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/13388e62-739c-4338-9b78-eb1c67fc740f_Facture_Osteo_02.25_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/0a28cce2-34d3-4fa0-8d46-7ff07a0b96c6_Numerisation_20250211_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27716116_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/fadd7aaa-0b0f-4f95-b66f-cd034f107ded_Facture_TICHIT_05022025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/749991f0-5f22-4b49-ad9d-457db1b5c472_facture_osteopathe_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25267998_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b013c377-130f-4bb8-a0ec-410e5e43b7f0_Seance_Hypnose_070225_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/9c2763f2-fb0c-4fe4-be38-6f94fba64162_optical_facture_lunettes_apres_accident_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/88a54205-4f31-459b-981a-99dadbeaad02_IMG_4210_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1071291_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/22a87bee-a05c-4605-afed-9e83d6a6e379_Scan092229_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724425079170Osteo_12-04-24_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27591022_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25175432_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27200192_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27758070_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/110bf0ce-0cc7-4189-ab0e-cb2b8cab1cd9_Ostheo_1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27663036_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724156681302Loen_osteopathe__1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397125647facture_osteoathie_Maxime_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b96f6ca3-3175-4cd8-a827-e886b5799867_Facture_ophtalmologue_24022025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724399050370Copie_Integrale_dacte_de_Naissance_Ya_Fatou_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658225868facture_PECOURT_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/7b830a57-2131-479c-bf31-1673e7308903_invoice_2025_1_31_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/060cd8e5-ac6f-47e5-b8f5-fe9d3920c522_CHIRO_FEVRIER_2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26740051_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26934661_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1066100_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/3116513f-22b1-4143-8f0b-97527093a9cb_Facture_osteo_fevrier_2024_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26762901_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d4724213-255b-41ec-aa7c-608661c35856__Facture_Ostheo_Fernandez_Thibault_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26555594_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c64ad8f0-0d5a-4a5b-aec5-62ec44099520_Screenshot_20250211_100040_Gallery_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27213931_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25358990_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e5200778-8114-415c-8304-539e8e8876a0_CamScanner_07-02-2025_11.49_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/57c0b77e-4634-4a68-b116-73ceb3223a39_OPHTALMO_16_12_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/655f9ad1-1759-4ae4-8707-c66a97ef4096_IMG_20250131_162239_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2f0d42d8-930e-46cb-9222-f421480516db_IMG_9332_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27619324_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/47a3ed6d-aade-4da2-8006-1ec798c5e915_Stiegler_Gary_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/20f7572a-00e5-45f9-8812-e6d2037826ea_IMG_1767_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26709999_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f339c5ad-7df8-4f3e-a5fa-9dc7616abc3e_IMG_20250327_165906_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/cf49f64f-639a-4c76-9121-6e5ca6796fdf_Etiopathe_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27667405_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26368529_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658126141JACOB_Francois_2024-111_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-27011182_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/34918269-15fd-4e3d-9180-659530a82d35_OSTEO_01.25_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25847017_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25302988_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c557951f-c4b7-4e04-a05c-871ace1f18af_F-2025-0065_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27699662_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25233928_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/43666807-3eb2-40b9-bdf6-94d4fe31b71e_factures_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/5d44caef-7a06-46ac-bd99-f3d33f544050_20241226_080225_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/9aea8ae5-7be5-4053-ac18-4135ec373137_IMG_5022_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27679188_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Extrait acte de naissance - 26616051-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1196314_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27618927_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25375641_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27562736_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-27210605_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/acte_nass_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25169425_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26761886_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27579724_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1072243_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27706035_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27705926_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27760177_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/coti1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1039410_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724396999467IMG_20240416_210050_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27618314_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27711297_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26732759_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6ffa2fe1-11be-4516-bd81-bef1824f5f8c_Ordonnances_et_factures_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/172422615178520240404_115041_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/fecefa42-3451-496b-bcb5-d5ebd4cdd8da_invoice_2024_10_14_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/88248db2-efbd-42c8-aacf-4e32f2b5b309_17392731219852541052997326186649_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27716094_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6ffa2fe1-11be-4516-bd81-bef1824f5f8c_Ordonnances_et_factures_4_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e4282a1e-c5df-4cf8-b326-032577658c64_IMG_20250108_090635_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1caeda03-aca9-43f5-8f05-8cd0ed3db1f2_20250211_134942_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27720265_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e1c02a95-aff0-4809-b30e-101ad81213b3_osteo_st1_2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/700-142024-CERTIFICAT-DE-SCOLARITE_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27780823_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/RIB-TESTNHIDEUX-D-9-4-2025_152113_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26300440_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/af1bc4f5-7332-4785-96b2-627c73eff2eb_LIVRET_DE_FAMILLE-3_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25316942_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25405311_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25273060_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ab26e47b-d267-439c-9b12-8699c17d5d47_facture_Aurelie_Claude_naturopathe_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25489070_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1066100_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/V1_CV-1_jpeg.original_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27619261_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/dd900918-d858-4a8f-ba79-61dd031d0bd8_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26749684_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397366268IMG_20240417_143900_1_scale_0.8.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27705774_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658376447Facture_ostheo_Matheo_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27707015_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397125670Facture_lentilles_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26077829_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26951476_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27535832_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27702777_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27752590_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/44e87f9e-a4e2-4851-b383-e1401b36e853_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/RIB-TESTNHIDEUX-UT-9-4-2025_152144_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26107527_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25361051_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27773543_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/934ce856-861d-44d3-94b2-998eb1a3d763_2025-02-00049_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/quittance-de-loyer-exemple_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/3f98b687-6be4-42e1-bf05-2a4a0e834440_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25979890_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26731052_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397125653noname_1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25406038_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724396999450DERNIERE_FACTURE_KINE_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/221e6223-efc6-44e8-9ef1-c53d5893abf9_Screenshot_20250210-175253_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25489071_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-GED E-J1-1065966_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25295241_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6d20c1cc-fa3b-41a9-990e-d115f2c1265b_Facture_MUNOZ_Florian_2025-0002-3_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26681219_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25792885_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25360589_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724226151848OPPON_GEROGES_ORDO_LENT_2023_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25224007_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25203829_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25183619_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27665789_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-GED E-J-2591884_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c21b71c3-edeb-458b-b9d5-6977d39092d2_Screenshot_20250215_191517_cn_wps_moffice_eng_PDFReader1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/smallscreen_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25246953_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25270267_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27699394_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/127a26a9-785f-4059-ad4b-1000d1476ead_Stephane_BRETON_250207006_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/4e6947e0-c34b-4fe1-81ab-41fce98ebbed_25_02_10_Facture_Hypnotherapeute_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/carte-tiers-payant_2025-1-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/smallcaptureconseil_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/23.06-Haskn-modele-facture-sans-tva-annexe-1-2_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Gaz-recto-633x1024_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/creer-bon-de-livraison-antsroute-7_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25347662_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27753021_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27702699_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1b105dcc-c530-4799-869e-28c15c685fab_Facture_osteo_Balmelle_Cassandra5_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Capture2_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/10f40812-5311-4e1d-9fb1-bf24b002208b_17389235878412277367177928986574_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6eeacb70-27be-40dd-9511-522025b459a3_20241226_080214_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27560309_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27702792_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25977504_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/aze_cargmoe017762-61_001_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/235cfce2-bcd5-42d5-9bce-4a775e34485b_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Extrait acte de naissance - 26932272_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a8e35bb0-75aa-47e4-a759-0bbda47b7d72_facture2_du_06_02_2025_1_scale_0.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27759511_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b8083bbb-9489-4706-af99-69f625bbc530_facture_OPHALMOLOGIE_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/3c48237f-b76e-499f-b1a1-7f7f8347c4f2_facture_osto2025_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/172442507917415_03_24-charton-isabelle_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17242265879292024-03-28_Facture_Chiroprateur_-Ch_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b966fb26-8754-4714-b795-78ef7e1116d0_facture_ostheopate_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25309354_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/95918650-771a-430c-9039-1601d46d19df_attestation_securite_sociale__1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/85e27f49-5723-4dff-a511-885441636918_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/28a6084a-24a5-4922-b79d-f96dd51c5666_Factures_et_ordonnaces_optique_ABDOUL_4_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724399050415IMG_20240417_160804_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25361051_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f6db258d-f400-41b5-a5ca-7878010f0fd7_doc00945920250206091811_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1041724_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26977630_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/RIB-TESTNHIDEUX-U-9-4-2025_152109_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/begin_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/bce5f438-e8f9-4089-b6c9-c3906bbc095f_Facture_Osteopathe_Karine_ANTONY_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/7402b29f-bcc9-42ed-a5a9-a8ec6468ad90_17392879320695618357009043029309_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ksnip_20250625-171328_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/facture anonyme_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25073562_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/conseill_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/07243018-c418-446f-8dca-307fc443b872_FACTURE_ETIOPATHE_5_FEVRIER_2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/915b97eb-e7d5-415f-ae89-823f08ae3bc0_ACfacture_opthalmo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26543134_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26733889_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25382479_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/0453c161-70af-4474-8854-27ab0f5d7bc0_2025-02-00044-2_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1041725_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ee2bacc2-7a66-4adf-b4ff-95fc5c13a2da_20250213_112821_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/141f7ccc-37ab-4da5-9788-80a27e819b8b_Feuilles_de_soins_Psychologue_x2_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-27210603_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/facture-engie-page-3-349x500_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26550397_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27588485_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/9c09398e-1348-4844-8fce-e0421ad306c9_note-d-honoraire_-_2025-02-05T144527.016_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397125629Facture_psychologue__1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/02358c4c-820a-409f-9489-a75f22af3f32_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27703393_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8d5e5dfe-71a8-4a05-a376-8727cd264ffd_osteopathe_do_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26077623_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27706471_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1709a87a-d859-41e8-93da-5f307e1677e8_facture_ostheo_2_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27545715_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27772607_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27234926_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26555547_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27621466_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26702207_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25263730_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/sfr-facture-7-2_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e6d69c3f-dc5a-4bfc-844c-361b9bfd255e_OSTEO_02.25_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/mus33_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/attestation_AMI0ZRFLBQ97-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26298895_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Bulletin_de_salaire1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27781864_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/35e2f7b4-ab20-48ae-bf69-fc2441756fdc_facture_ostheo_-_Pascal_BOURGEOIS_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25136270_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27778142_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/29d72de2-da83-483a-8702-939fbe4addfa_20250207_112449_1_scale_0.8.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724156469377Facture_DAURISFlorian_20240815CGEI_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/72cb658b-3239-4479-918d-aac53bc4a552_Facture_ophtalmo_14-01-2025_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1bb42dc7-7ee0-45fe-aed7-68192d875df3_IMG_20250207_193157_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27705767_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/fe3ade48-89c0-4ac3-81f3-6dae15e20859_Ostheopate_07_02_25_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/actaadfdfffe_mariage_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/fcd6e41f-3d4c-43c8-a6f7-85e7ea5bda1d_IMG_7926_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e566a3b0-cc99-4c4d-8715-9f3be841a066_noname_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a6321cee-55d4-42c4-be96-6b3a59d70908_facture_osteo_030225_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27399788_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27647325_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/4684e3a8-c488-4e89-9b58-c65ae3e3c093_2024-12-06_facture_lunettes_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e0f0eab0-f98c-4b74-95f2-bfc61ad76d28_Ophtalmo_Isa_01-24_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f2cf3af6-98dd-49f8-9d85-533b65215fb3_17393639738994204232790165513253_1_scale_0.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-GED E-J-2759858_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/28f8623a-8962-4ad1-88bb-4914109e1d42_20241118103848034_4_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/0abfd7b9-9456-4b68-a489-bd990ef9a78e_livret_de_famille-2_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ec5f9e8d-e15a-421c-9fa8-6274420215cd_Facture_lentilles_2025_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26705886_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture psychologie-GED S-Z22-1003798_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26539712_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25152747_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/fee6466d-f0bb-4974-82a5-556586667c21_20250210_192626_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25330401_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724156469365facture_osteo08.24_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/k-bis-103142_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/13194c98-bb10-4227-a438-b7a1b9f2b6cb_facture_osteo_caro_08022025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1207573_1_scale_1.5.jpg

496
filter/dbscan_v2.py Normal file
View File

@@ -0,0 +1,496 @@
#!/usr/bin/env python3
"""
DBSCAN Clustering Filter
Filters clustering results based on specific criteria:
- For each cluster: select 50% of points
- 25% from center region (closest to centroid)
- 25% from border region (furthest from centroid)
- All noise points are selected
- Uses cosine distance metric
"""
import json
import numpy as np
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_distances
import argparse
import os
from pathlib import Path
import random
class DBSCANFilter:
def __init__(self, embeddings_path, clustering_results_path):
"""
Initialize DBSCAN filter
Args:
embeddings_path: Path to embeddings JSON file
clustering_results_path: Path to DBSCAN clustering results JSON
"""
self.embeddings_path = embeddings_path
self.clustering_results_path = clustering_results_path
self.embeddings = None
self.embeddings_normalized = None
self.clustering_results = None
self.filepath_to_embedding = {}
def load_data(self):
"""Load embeddings and clustering results"""
print("Loading embeddings...")
with open(self.embeddings_path, 'r') as f:
embeddings_data = json.load(f)
# Create mapping from filepath to embedding
embeddings_list = []
filepaths = []
for item in embeddings_data:
self.filepath_to_embedding[item['filepath']] = item['embedding']
embeddings_list.append(item['embedding'])
filepaths.append(item['filepath'])
self.embeddings = np.array(embeddings_list, dtype=np.float32)
self.embeddings_normalized = normalize(self.embeddings, norm='l2')
print(f"Loaded {len(embeddings_list)} embeddings")
print("Loading clustering results...")
with open(self.clustering_results_path, 'r') as f:
self.clustering_results = json.load(f)
print(f"Loaded clustering results: {self.clustering_results['n_clusters']} clusters, "
f"{self.clustering_results['n_samples']} samples")
def group_by_clusters(self):
"""Group data points by cluster labels"""
clusters = {}
noise_points = []
for result in self.clustering_results['results']:
cluster_id = result['cluster']
filepath = result['filepath']
if 'is_noise' in result:
is_noise = result['is_noise']
else:
is_noise = False
if is_noise or cluster_id == -1:
noise_points.append({
'filepath': filepath,
'embedding': self.filepath_to_embedding[filepath]
})
else:
if cluster_id not in clusters:
clusters[cluster_id] = []
clusters[cluster_id].append({
'filepath': filepath,
'embedding': self.filepath_to_embedding[filepath]
})
return clusters, noise_points
def calculate_cluster_centroid(self, cluster_points):
"""Calculate centroid of a cluster using normalized embeddings"""
embeddings = np.array([point['embedding'] for point in cluster_points])
embeddings_normalized = normalize(embeddings, norm='l2')
# For cosine distance, centroid is the normalized mean
centroid = np.mean(embeddings_normalized, axis=0)
centroid_normalized = normalize(centroid.reshape(1, -1), norm='l2')[0]
return centroid_normalized
def calculate_cosine_distances_to_centroid(self, cluster_points, centroid):
"""Calculate cosine distances from each point to cluster centroid"""
embeddings = np.array([point['embedding'] for point in cluster_points])
embeddings_normalized = normalize(embeddings, norm='l2')
# Calculate cosine distances to centroid
distances = cosine_distances(embeddings_normalized, centroid.reshape(1, -1)).flatten()
return distances
# v1 0.5 data, 0.5 center 0.5 border
# v2 0.5 data, 0.25 center 0.75 border
# def filter_cluster(self, cluster_points, selection_ratio=0.5):
# v3 0.75 data, 0.25 center 0.75 border
#dbscan 014
# def filter_cluster(self, cluster_points, selection_ratio=0.3):
# """
# Filter points from a cluster
# Args:
# cluster_points: List of points in the cluster
# selection_ratio: Ratio of points to select (default: 0.5 = 50%)
# Returns:
# List of selected points
# """
# if len(cluster_points) == 0:
# return []
# # Calculate how many points to select
# total_points = len(cluster_points)
# num_to_select = max(15, int(total_points * selection_ratio))
# # If we need to select all or almost all points, just return all
# if num_to_select >= total_points:
# return cluster_points
# # Calculate centroid
# centroid = self.calculate_cluster_centroid(cluster_points)
# # Calculate distances to centroid
# distances = self.calculate_cosine_distances_to_centroid(cluster_points, centroid)
# # Create list of (point, distance) pairs
# point_distance_pairs = list(zip(cluster_points, distances))
# # Sort by distance (closest to furthest from centroid)
# point_distance_pairs.sort(key=lambda x: x[1])
# dis = 0.1
# # count_center = sum(1 for pair in point_distance_pairs if pair[1] < dis)
# all_center_points = [pair[0] for pair in point_distance_pairs if pair[1] < dis]
# print(f"Number of center points (distance < {dis}): {len(all_center_points)}")
# # count_border = sum(1 for pair in point_distance_pairs if pair[1] >= dis)
# all_border_points = [pair[0] for pair in point_distance_pairs if pair[1] >= dis]
# print(f"Number of border points (distance >= {dis}): {len(all_border_points)}")
# # Calculate how many points to select from center and border
# n_center = len(all_center_points)
# center_count = max(1, int(n_center * 0.15))
# n_border = len(all_border_points)
# if n_border < 70:
# border_count = n_border
# else:
# border_count = max(0, int(n_border * 0.3)) # remaining from border
# selected_points = []
# random.seed(42)
# # Select center points (closest to centroid)
# # center_points = [pair[0] for pair in point_distance_pairs[:center_count]]
# center_points = random.sample(all_center_points, center_count)
# selected_points.extend(center_points)
# # # Select border points (furthest from centroid)
# if border_count > 0:
# # border_points = [pair[0] for pair in point_distance_pairs[-border_count:]]
# border_points = random.sample(all_border_points, border_count)
# selected_points.extend(border_points)
# print(f"Cluster with {total_points} points -> selected {len(selected_points)} points "
# f"({center_count} center + {border_count} border)")
# return selected_points
# dbscan 015
def filter_cluster(self, cluster_points, selection_ratio=0.3):
"""
Filter points from a cluster
Args:
cluster_points: List of points in the cluster
selection_ratio: Ratio of points to select (default: 0.5 = 50%)
Returns:
List of selected points
"""
if len(cluster_points) == 0:
return []
# Calculate how many points to select
total_points = len(cluster_points)
num_to_select = max(15, int(total_points * selection_ratio))
# If we need to select all or almost all points, just return all
if num_to_select >= total_points:
return cluster_points
# Calculate centroid
centroid = self.calculate_cluster_centroid(cluster_points)
# Calculate distances to centroid
distances = self.calculate_cosine_distances_to_centroid(cluster_points, centroid)
# Create list of (point, distance) pairs
point_distance_pairs = list(zip(cluster_points, distances))
# Sort by distance (closest to furthest from centroid)
point_distance_pairs.sort(key=lambda x: x[1])
dis = 0.1
# count_center = sum(1 for pair in point_distance_pairs if pair[1] < dis)
all_center_points = [pair[0] for pair in point_distance_pairs if pair[1] < dis]
print(f"Number of center points (distance < {dis}): {len(all_center_points)}")
# count_border = sum(1 for pair in point_distance_pairs if pair[1] >= dis)
all_border_points = [pair[0] for pair in point_distance_pairs if pair[1] >= dis]
print(f"Number of border points (distance >= {dis}): {len(all_border_points)}")
# Calculate how many points to select from center and border
n_center = len(all_center_points)
center_count = max(1, int(n_center * 0.15))
n_border = len(all_border_points)
if n_border < 70:
border_count = n_border
else:
border_count = max(0, int(n_border * 0.3)) # remaining from border
selected_points = []
random.seed(42)
# Select center points (closest to centroid)
# center_points = [pair[0] for pair in point_distance_pairs[:center_count]]
center_points = random.sample(all_center_points, center_count)
selected_points.extend(center_points)
# # Select border points (furthest from centroid)
if border_count > 0:
# border_points = [pair[0] for pair in point_distance_pairs[-border_count:]]
border_points = random.sample(all_border_points, border_count)
selected_points.extend(border_points)
print(f"Cluster with {total_points} points -> selected {len(selected_points)} points "
f"({center_count} center + {border_count} border)")
return selected_points
#gmm
# def filter_cluster(self, cluster_points, selection_ratio=0.3):
# """
# Filter points from a cluster
# Args:
# cluster_points: List of points in the cluster
# selection_ratio: Ratio of points to select (default: 0.5 = 50%)
# Returns:
# List of selected points
# """
# if len(cluster_points) == 0:
# return []
# # Calculate how many points to select
# total_points = len(cluster_points)
# num_to_select = max(15, int(total_points * selection_ratio))
# # If we need to select all or almost all points, just return all
# if num_to_select >= total_points:
# return cluster_points
# # Calculate centroid
# centroid = self.calculate_cluster_centroid(cluster_points)
# # Calculate distances to centroid
# distances = self.calculate_cosine_distances_to_centroid(cluster_points, centroid)
# # Create list of (point, distance) pairs
# point_distance_pairs = list(zip(cluster_points, distances))
# # Sort by distance (closest to furthest from centroid)
# point_distance_pairs.sort(key=lambda x: x[1])
# dis = 0.2
# # count_center = sum(1 for pair in point_distance_pairs if pair[1] < dis)
# all_center_points = [pair[0] for pair in point_distance_pairs if pair[1] < dis]
# print(f"Number of center points (distance < {dis}): {len(all_center_points)}")
# # count_border = sum(1 for pair in point_distance_pairs if pair[1] >= dis)
# all_border_points = [pair[0] for pair in point_distance_pairs if pair[1] >= dis]
# print(f"Number of border points (distance >= {dis}): {len(all_border_points)}")
# # Calculate how many points to select from center and border
# n_center = len(all_center_points)
# center_count = max(1, int(n_center * 0.15))
# n_border = len(all_border_points)
# if n_border < 70:
# border_count = n_border
# else:
# border_count = max(0, int(n_border * 0.3)) # remaining from border
# selected_points = []
# random.seed(42)
# # Select center points (closest to centroid)
# # center_points = [pair[0] for pair in point_distance_pairs[:center_count]]
# center_points = random.sample(all_center_points, center_count)
# selected_points.extend(center_points)
# # # Select border points (furthest from centroid)
# if border_count > 0:
# # border_points = [pair[0] for pair in point_distance_pairs[-border_count:]]
# border_points = random.sample(all_border_points, border_count)
# selected_points.extend(border_points)
# print(f"Cluster with {total_points} points -> selected {len(selected_points)} points "
# f"({center_count} center + {border_count} border)")
# return selected_points
def filter_all_clusters(self):
"""Filter all clusters according to the specified criteria"""
print("\n" + "="*60)
print("FILTERING DBSCAN CLUSTERING RESULTS")
print("="*60)
clusters, noise_points = self.group_by_clusters()
print(f"Found {len(clusters)} clusters and {len(noise_points)} noise points")
filtered_results = []
# Process each cluster
for cluster_id, cluster_points in clusters.items():
print(f"\nProcessing Cluster {cluster_id}:")
filtered_points = self.filter_cluster(cluster_points)
# Add cluster information
for point in filtered_points:
filtered_results.append({
'filepath': point['filepath'],
'cluster': cluster_id,
'is_noise': False,
'selection_type': 'cluster_filtered'
})
# Add all noise points
print(f"\nAdding all {len(noise_points)} noise points...")
for point in noise_points:
filtered_results.append({
'filepath': point['filepath'],
'cluster': -1,
'is_noise': True,
'selection_type': 'noise'
})
return filtered_results
def save_filtered_results(self, filtered_results, output_path=None):
"""Save filtered results to JSON file"""
if output_path is None:
# Generate output filename based on input
base_name = Path(self.clustering_results_path).stem
output_path = f"{base_name}_filtered.json"
# Create summary statistics
cluster_stats = {}
noise_count = 0
for result in filtered_results:
if result['is_noise']:
noise_count += 1
else:
cluster_id = result['cluster']
if cluster_id not in cluster_stats:
cluster_stats[cluster_id] = 0
cluster_stats[cluster_id] += 1
# Prepare output data
output_data = {
"method": "DBSCAN_FILTERED",
"original_n_clusters": self.clustering_results['n_clusters'],
"original_n_samples": self.clustering_results['n_samples'],
"filtered_n_samples": len(filtered_results),
"filtering_criteria": {
"cluster_selection_ratio": 0.5,
"center_points_ratio": 0.5, # 50% of selected points from center
"border_points_ratio": 0.5, # 50% of selected points from border
"noise_points": "all_selected"
},
"cluster_statistics": cluster_stats,
"noise_points": noise_count,
"results": filtered_results
}
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(output_data, f, indent=4, ensure_ascii=False)
print("\n" + "="*60)
print("FILTERING SUMMARY")
print("="*60)
print(f"Original samples: {self.clustering_results['n_samples']}")
print(f"Filtered samples: {len(filtered_results)}")
print(f"Reduction ratio: {len(filtered_results)/self.clustering_results['n_samples']:.2%}")
print("\nCluster breakdown:")
for cluster_id, count in sorted(cluster_stats.items()):
print(f" Cluster {cluster_id}: {count} points")
print(f" Noise points: {noise_count} points")
print(f"\nFiltered results saved to: {output_path}")
return output_path
def create_filepath_list(self, filtered_results, output_txt_path=None):
"""Create a simple text file with filtered filepaths"""
if output_txt_path is None:
base_name = Path(self.clustering_results_path).stem
output_txt_path = f"{base_name}_filtered_filepaths.txt"
filepaths = [result['filepath'] for result in filtered_results]
with open(output_txt_path, 'w', encoding='utf-8') as f:
for filepath in filepaths:
f.write(f"{filepath}\n")
print(f"Filepath list saved to: {output_txt_path}")
return output_txt_path
def main():
parser = argparse.ArgumentParser(description="Filter DBSCAN clustering results")
parser.add_argument("--embeddings_path", required=True,
help="Path to embeddings JSON file")
parser.add_argument("--clustering_results_path", required=True,
help="Path to DBSCAN clustering results JSON file")
parser.add_argument("--output_path",
help="Output path for filtered results (optional)")
parser.add_argument("--create_filepath_list", action="store_true",
help="Also create a simple text file with filtered filepaths")
args = parser.parse_args()
# Validate input files exist
if not os.path.exists(args.embeddings_path):
print(f"Error: Embeddings file not found: {args.embeddings_path}")
return
if not os.path.exists(args.clustering_results_path):
print(f"Error: Clustering results file not found: {args.clustering_results_path}")
return
# Initialize filter
filter_obj = DBSCANFilter(args.embeddings_path, args.clustering_results_path)
# Load data
filter_obj.load_data()
# Filter clusters
filtered_results = filter_obj.filter_all_clusters()
# Save results
filter_obj.save_filtered_results(filtered_results, args.output_path)
# Create filepath list if requested
if args.create_filepath_list:
filter_obj.create_filepath_list(filtered_results)
print("\nFiltering completed successfully!")
if __name__ == "__main__":
main()

352
filter/dbscan_v3.py Normal file
View File

@@ -0,0 +1,352 @@
#!/usr/bin/env python3
"""
DBSCAN Clustering Filter
Filters clustering results based on specific criteria, parameterized via CLI:
- For each cluster: select a configurable ratio of points (selection_ratio)
- A configurable portion from center region (center_ratio)
- A configurable portion from border region (border_ratio)
- All noise points are selected
- Uses cosine distance metric
CLI parameters added:
--selection_ratio (float, default 0.5)
--center_ratio (float, default 0.5)
--border_ratio (float, default 0.5)
Example:
python dbscan_v3.py \
--embeddings_path embeddings.json \
--clustering_results_path dbscan_results.json \
--selection_ratio 0.4 --center_ratio 0.6 --border_ratio 0.4
"""
import json
import numpy as np
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_distances
import argparse
import os
from pathlib import Path
import random
class DBSCANFilter:
def __init__(self, embeddings_path, clustering_results_path,
selection_ratio=0.5, center_ratio=0.5, border_ratio=0.5):
"""Initialize DBSCAN filter
Args:
embeddings_path: Path to embeddings JSON file
clustering_results_path: Path to DBSCAN clustering results JSON
selection_ratio: Ratio of total cluster points to consider selecting
center_ratio: Ratio applied within center region (relative scaling)
border_ratio: Ratio applied within border region (relative scaling)
"""
self.embeddings_path = embeddings_path
self.clustering_results_path = clustering_results_path
self.embeddings = None
self.embeddings_normalized = None
self.clustering_results = None
self.filepath_to_embedding = {}
self.selection_ratio = selection_ratio
self.center_ratio = center_ratio
self.border_ratio = border_ratio
def load_data(self):
"""Load embeddings and clustering results"""
print("Loading embeddings...")
with open(self.embeddings_path, 'r') as f:
embeddings_data = json.load(f)
# Create mapping from filepath to embedding
embeddings_list = []
filepaths = []
for item in embeddings_data:
self.filepath_to_embedding[item['filepath']] = item['embedding']
embeddings_list.append(item['embedding'])
filepaths.append(item['filepath'])
self.embeddings = np.array(embeddings_list, dtype=np.float32)
self.embeddings_normalized = normalize(self.embeddings, norm='l2')
print(f"Loaded {len(embeddings_list)} embeddings")
print("Loading clustering results...")
with open(self.clustering_results_path, 'r') as f:
self.clustering_results = json.load(f)
print(f"Loaded clustering results: {self.clustering_results['n_clusters']} clusters, "
f"{self.clustering_results['n_samples']} samples")
def group_by_clusters(self):
"""Group data points by cluster labels"""
clusters = {}
noise_points = []
for result in self.clustering_results['results']:
cluster_id = result['cluster']
filepath = result['filepath']
if 'is_noise' in result:
is_noise = result['is_noise']
else:
is_noise = False
if is_noise or cluster_id == -1:
noise_points.append({
'filepath': filepath,
'embedding': self.filepath_to_embedding[filepath]
})
else:
if cluster_id not in clusters:
clusters[cluster_id] = []
clusters[cluster_id].append({
'filepath': filepath,
'embedding': self.filepath_to_embedding[filepath]
})
return clusters, noise_points
def calculate_cluster_centroid(self, cluster_points):
"""Calculate centroid of a cluster using normalized embeddings"""
embeddings = np.array([point['embedding'] for point in cluster_points])
embeddings_normalized = normalize(embeddings, norm='l2')
# For cosine distance, centroid is the normalized mean
centroid = np.mean(embeddings_normalized, axis=0)
centroid_normalized = normalize(centroid.reshape(1, -1), norm='l2')[0]
return centroid_normalized
def calculate_cosine_distances_to_centroid(self, cluster_points, centroid):
"""Calculate cosine distances from each point to cluster centroid"""
embeddings = np.array([point['embedding'] for point in cluster_points])
embeddings_normalized = normalize(embeddings, norm='l2')
# Calculate cosine distances to centroid
distances = cosine_distances(embeddings_normalized, centroid.reshape(1, -1)).flatten()
return distances
def filter_cluster(self, cluster_points):
"""Lọc điểm trong một cluster dựa trên các tham số đã cấu hình."""
if not cluster_points:
return []
selection_ratio = self.selection_ratio
center_ratio = self.center_ratio
border_ratio = self.border_ratio
total_points = len(cluster_points)
num_to_select = max(15, int(total_points * selection_ratio))
if num_to_select >= total_points and selection_ratio != 1:
return cluster_points
centroid = self.calculate_cluster_centroid(cluster_points)
distances = self.calculate_cosine_distances_to_centroid(cluster_points, centroid)
point_distance_pairs = list(zip(cluster_points, distances))
point_distance_pairs.sort(key=lambda x: x[1])
dis = 0.1 # ngưỡng khoảng cách để phân loại center / border
all_center_points = [p for p, d in point_distance_pairs if d < dis]
all_border_points = [p for p, d in point_distance_pairs if d >= dis]
print(f"Number of center points (distance < {dis}): {len(all_center_points)}")
print(f"Number of border points (distance >= {dis}): {len(all_border_points)}")
n_center = len(all_center_points)
n_border = len(all_border_points)
if n_center > 0:
center_count = max(1, int(n_center * center_ratio * selection_ratio))
center_count = min(center_count, n_center)
else:
center_count = 0
if n_border < 70:
border_count = n_border
else:
border_count = max(0, int(n_border * border_ratio * selection_ratio))
border_count = min(border_count, n_border)
random.seed(42)
selected_points = []
if center_count > 0:
selected_points.extend(random.sample(all_center_points, center_count))
if border_count > 0:
selected_points.extend(random.sample(all_border_points, border_count))
print(
f"Cluster with {total_points} points -> selected {len(selected_points)} points "
f"({center_count} center + {border_count} border)"
)
return selected_points
def filter_all_clusters(self):
"""Filter all clusters according to the specified criteria"""
print("\n" + "="*60)
print("FILTERING DBSCAN CLUSTERING RESULTS")
print("="*60)
clusters, noise_points = self.group_by_clusters()
print(f"Found {len(clusters)} clusters and {len(noise_points)} noise points")
filtered_results = []
# Process each cluster
for cluster_id, cluster_points in clusters.items():
print(f"\nProcessing Cluster {cluster_id}:")
filtered_points = self.filter_cluster(cluster_points)
# Add cluster information
for point in filtered_points:
filtered_results.append({
'filepath': point['filepath'],
'cluster': cluster_id,
'is_noise': False,
'selection_type': 'cluster_filtered'
})
# Add all noise points
print(f"\nAdding all {len(noise_points)} noise points...")
n_noise = len(noise_points)
noise_count = max(0, int(n_noise * self.selection_ratio))
random.seed(42)
selected_noise_points = random.sample(noise_points, noise_count)
for point in selected_noise_points:
filtered_results.append({
'filepath': point['filepath'],
'cluster': -1,
'is_noise': True,
'selection_type': 'noise'
})
return filtered_results
def save_filtered_results(self, filtered_results, output_path=None):
"""Save filtered results to JSON file"""
if output_path is None:
# Generate output filename based on input
base_name = Path(self.clustering_results_path).stem
output_path = f"{base_name}_filtered.json"
# Create summary statistics
cluster_stats = {}
noise_count = 0
for result in filtered_results:
if result['is_noise']:
noise_count += 1
else:
cluster_id = result['cluster']
if cluster_id not in cluster_stats:
cluster_stats[cluster_id] = 0
cluster_stats[cluster_id] += 1
# Prepare output data
output_data = {
"method": "DBSCAN_FILTERED",
"original_n_clusters": self.clustering_results['n_clusters'],
"original_n_samples": self.clustering_results['n_samples'],
"filtered_n_samples": len(filtered_results),
"filtering_criteria": {
"cluster_selection_ratio": self.selection_ratio,
"center_points_ratio": self.center_ratio,
"border_points_ratio": self.border_ratio,
"noise_points": "all_selected"
},
"cluster_statistics": cluster_stats,
"noise_points": noise_count,
"results": filtered_results
}
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(output_data, f, indent=4, ensure_ascii=False)
print("\n" + "="*60)
print("FILTERING SUMMARY")
print("="*60)
print(f"Original samples: {self.clustering_results['n_samples']}")
print(f"Filtered samples: {len(filtered_results)}")
print(f"Reduction ratio: {len(filtered_results)/self.clustering_results['n_samples']:.2%}")
print("\nCluster breakdown:")
for cluster_id, count in sorted(cluster_stats.items()):
print(f" Cluster {cluster_id}: {count} points")
print(f" Noise points: {noise_count} points")
print(f"\nFiltered results saved to: {output_path}")
return output_path
def create_filepath_list(self, filtered_results, output_txt_path=None):
"""Create a simple text file with filtered filepaths"""
if output_txt_path is None:
base_name = Path(self.clustering_results_path).stem
output_txt_path = f"{base_name}_filtered_filepaths.txt"
filepaths = [result['filepath'] for result in filtered_results]
with open(output_txt_path, 'w', encoding='utf-8') as f:
for filepath in filepaths:
f.write(f"{filepath}\n")
print(f"Filepath list saved to: {output_txt_path}")
return output_txt_path
def main():
parser = argparse.ArgumentParser(description="Filter DBSCAN clustering results")
parser.add_argument("--embeddings_path", required=True,
help="Path to embeddings JSON file")
parser.add_argument("--clustering_results_path", required=True,
help="Path to DBSCAN clustering results JSON file")
parser.add_argument("--output_path",
help="Output path for filtered results (optional)")
parser.add_argument("--create_filepath_list", action="store_true",
help="Also create a simple text file with filtered filepaths")
parser.add_argument("--selection_ratio", type=float, default=0.5,
help="Overall ratio of points to sample per cluster (default: 0.5). Minimum 15 points enforced.")
parser.add_argument("--center_ratio", type=float, default=0.5,
help="Relative ratio applied to center region when sampling (default: 0.5)")
parser.add_argument("--border_ratio", type=float, default=0.5,
help="Relative ratio applied to border region when sampling (default: 0.5)")
args = parser.parse_args()
# Validate input files exist
if not os.path.exists(args.embeddings_path):
print(f"Error: Embeddings file not found: {args.embeddings_path}")
return
if not os.path.exists(args.clustering_results_path):
print(f"Error: Clustering results file not found: {args.clustering_results_path}")
return
# Initialize filter
# Initialize filter with user-provided ratios
filter_obj = DBSCANFilter(
args.embeddings_path,
args.clustering_results_path,
selection_ratio=args.selection_ratio,
center_ratio=args.center_ratio,
border_ratio=args.border_ratio
)
# Load data
filter_obj.load_data()
# Filter clusters
filtered_results = filter_obj.filter_all_clusters()
# Save results
filter_obj.save_filtered_results(filtered_results, args.output_path)
# Create filepath list if requested
if args.create_filepath_list:
filter_obj.create_filepath_list(filtered_results)
print("\nFiltering completed successfully!")
if __name__ == "__main__":
main()

376
filter/fillter_all.py Normal file
View File

@@ -0,0 +1,376 @@
#!/usr/bin/env python3
"""
Universal Clustering Filter
Filters clustering results for multiple algorithms:
- DBSCAN: handles noise points, uses density-based selection
- GMM: uses probability-based selection, no noise points
- K-Means: standard centroid-based selection
Uses cosine distance metric for all calculations.
"""
import json
import numpy as np
from sklearn.preprocessing import normalize
from sklearn.metrics.pairwise import cosine_distances
import argparse
import os
from pathlib import Path
class UniversalClusterFilter:
def __init__(self, embeddings_path, clustering_results_path):
"""
Initialize universal cluster filter
Args:
embeddings_path: Path to embeddings JSON file
clustering_results_path: Path to clustering results JSON
"""
self.embeddings_path = embeddings_path
self.clustering_results_path = clustering_results_path
self.embeddings = None
self.embeddings_normalized = None
self.clustering_results = None
self.filepath_to_embedding = {}
self.algorithm = None
def load_data(self):
"""Load embeddings and clustering results"""
print("Loading embeddings...")
with open(self.embeddings_path, 'r') as f:
embeddings_data = json.load(f)
# Create mapping from filepath to embedding
embeddings_list = []
filepaths = []
for item in embeddings_data:
self.filepath_to_embedding[item['filepath']] = item['embedding']
embeddings_list.append(item['embedding'])
filepaths.append(item['filepath'])
self.embeddings = np.array(embeddings_list, dtype=np.float32)
self.embeddings_normalized = normalize(self.embeddings, norm='l2')
print(f"Loaded {len(embeddings_list)} embeddings")
print("Loading clustering results...")
with open(self.clustering_results_path, 'r') as f:
self.clustering_results = json.load(f)
# Detect algorithm type
self.algorithm = self.clustering_results.get('method', 'UNKNOWN')
print(f"Detected algorithm: {self.algorithm}")
print(f"Loaded clustering results: {self.clustering_results['n_clusters']} clusters, "
f"{self.clustering_results['n_samples']} samples")
def group_by_clusters(self):
"""Group data points by cluster labels (algorithm-agnostic)"""
clusters = {}
noise_points = []
for result in self.clustering_results['results']:
cluster_id = result['cluster']
filepath = result['filepath']
# Check for noise points (DBSCAN specific)
is_noise = result.get('is_noise', False)
if is_noise or cluster_id == -1:
noise_points.append({
'filepath': filepath,
'embedding': self.filepath_to_embedding[filepath],
'metadata': result
})
else:
if cluster_id not in clusters:
clusters[cluster_id] = []
clusters[cluster_id].append({
'filepath': filepath,
'embedding': self.filepath_to_embedding[filepath],
'metadata': result
})
return clusters, noise_points
def calculate_cluster_centroid(self, cluster_points):
"""Calculate centroid of a cluster using normalized embeddings"""
embeddings = np.array([point['embedding'] for point in cluster_points])
embeddings_normalized = normalize(embeddings, norm='l2')
# For cosine distance, centroid is the normalized mean
centroid = np.mean(embeddings_normalized, axis=0)
centroid_normalized = normalize(centroid.reshape(1, -1), norm='l2')[0]
return centroid_normalized
def calculate_cosine_distances_to_centroid(self, cluster_points, centroid):
"""Calculate cosine distances from each point to cluster centroid"""
embeddings = np.array([point['embedding'] for point in cluster_points])
embeddings_normalized = normalize(embeddings, norm='l2')
# Calculate cosine distances to centroid
distances = cosine_distances(embeddings_normalized, centroid.reshape(1, -1)).flatten()
return distances
def filter_cluster_standard(self, cluster_points, selection_ratio=0.5):
"""
Standard filtering: 25% center + 75% border of selected points
"""
if len(cluster_points) == 0:
return []
# Calculate how many points to select
total_points = len(cluster_points)
num_to_select = max(1, int(total_points * selection_ratio))
# If we need to select all or almost all points, just return all
if num_to_select >= total_points:
return cluster_points
# Calculate centroid
centroid = self.calculate_cluster_centroid(cluster_points)
# Calculate distances to centroid
distances = self.calculate_cosine_distances_to_centroid(cluster_points, centroid)
# Create list of (point, distance) pairs
point_distance_pairs = list(zip(cluster_points, distances))
# Sort by distance (closest to furthest from centroid)
point_distance_pairs.sort(key=lambda x: x[1])
# Calculate how many points to select from center and border
center_count = max(1, int(num_to_select * 0.25)) # 25% from center
border_count = num_to_select - center_count # 75% from border
selected_points = []
# Select center points (closest to centroid)
center_points = [pair[0] for pair in point_distance_pairs[:center_count]]
for point in center_points:
point['selection_type'] = 'center'
selected_points.extend(center_points)
# Select border points (furthest from centroid)
if border_count > 0:
border_points = [pair[0] for pair in point_distance_pairs[-border_count:]]
for point in border_points:
point['selection_type'] = 'border'
selected_points.extend(border_points)
print(f"Cluster with {total_points} points -> selected {len(selected_points)} points "
f"({center_count} center + {border_count} border)")
return selected_points
def filter_cluster_gmm(self, cluster_points, selection_ratio=0.5):
"""
GMM-specific filtering: consider probability scores if available
"""
if len(cluster_points) == 0:
return []
# Check if we have probability scores
has_probabilities = any('probability' in point['metadata'] for point in cluster_points)
if has_probabilities:
# Use probability-based selection
total_points = len(cluster_points)
num_to_select = max(1, int(total_points * selection_ratio))
if num_to_select >= total_points:
return cluster_points
# Sort by probability (highest confidence first)
sorted_points = sorted(cluster_points,
key=lambda x: x['metadata'].get('probability', 0),
reverse=True)
# Take top probability points
selected_points = sorted_points[:num_to_select]
for point in selected_points:
point['selection_type'] = 'high_probability'
print(f"GMM Cluster with {total_points} points -> selected {len(selected_points)} points "
f"(top probability)")
return selected_points
else:
# Fall back to standard filtering
return self.filter_cluster_standard(cluster_points, selection_ratio)
def filter_all_clusters(self, selection_ratio=0.5):
"""Filter all clusters according to algorithm-specific criteria"""
print("\n" + "="*60)
print(f"FILTERING {self.algorithm} CLUSTERING RESULTS")
print("="*60)
clusters, noise_points = self.group_by_clusters()
print(f"Found {len(clusters)} clusters and {len(noise_points)} noise points")
filtered_results = []
# Process each cluster
for cluster_id, cluster_points in clusters.items():
print(f"\nProcessing Cluster {cluster_id}:")
# Choose filtering method based on algorithm
if self.algorithm.upper() == 'GMM' or 'GAUSSIAN' in self.algorithm.upper():
filtered_points = self.filter_cluster_gmm(cluster_points, selection_ratio)
else:
filtered_points = self.filter_cluster_standard(cluster_points, selection_ratio)
# Add cluster information
for point in filtered_points:
filtered_results.append({
'filepath': point['filepath'],
'cluster': cluster_id,
'is_noise': False,
'selection_type': point.get('selection_type', 'cluster_filtered'),
'original_metadata': point['metadata']
})
# Add all noise points (DBSCAN only)
if noise_points:
print(f"\nAdding all {len(noise_points)} noise points...")
for point in noise_points:
filtered_results.append({
'filepath': point['filepath'],
'cluster': -1,
'is_noise': True,
'selection_type': 'noise',
'original_metadata': point['metadata']
})
return filtered_results
def save_filtered_results(self, filtered_results, output_path=None):
"""Save filtered results to JSON file"""
if output_path is None:
base_name = Path(self.clustering_results_path).stem
output_path = f"{base_name}_filtered.json"
# Create summary statistics
cluster_stats = {}
noise_count = 0
selection_type_stats = {}
for result in filtered_results:
# Cluster stats
if result['is_noise']:
noise_count += 1
else:
cluster_id = result['cluster']
if cluster_id not in cluster_stats:
cluster_stats[cluster_id] = 0
cluster_stats[cluster_id] += 1
# Selection type stats
sel_type = result['selection_type']
selection_type_stats[sel_type] = selection_type_stats.get(sel_type, 0) + 1
# Prepare output data
output_data = {
"method": f"{self.algorithm}_FILTERED",
"original_algorithm": self.algorithm,
"original_n_clusters": self.clustering_results['n_clusters'],
"original_n_samples": self.clustering_results['n_samples'],
"filtered_n_samples": len(filtered_results),
"filtering_criteria": {
"cluster_selection_ratio": 0.5,
"center_points_ratio": 0.25,
"border_points_ratio": 0.75,
"noise_points": "all_selected" if noise_count > 0 else "none_present"
},
"cluster_statistics": cluster_stats,
"selection_type_statistics": selection_type_stats,
"noise_points": noise_count,
"results": filtered_results
}
with open(output_path, 'w', encoding='utf-8') as f:
json.dump(output_data, f, indent=4, ensure_ascii=False)
print("\n" + "="*60)
print("FILTERING SUMMARY")
print("="*60)
print(f"Algorithm: {self.algorithm}")
print(f"Original samples: {self.clustering_results['n_samples']}")
print(f"Filtered samples: {len(filtered_results)}")
print(f"Reduction ratio: {len(filtered_results)/self.clustering_results['n_samples']:.2%}")
print("\nCluster breakdown:")
for cluster_id, count in sorted(cluster_stats.items()):
print(f" Cluster {cluster_id}: {count} points")
if noise_count > 0:
print(f" Noise points: {noise_count} points")
print("\nSelection type breakdown:")
for sel_type, count in selection_type_stats.items():
print(f" {sel_type}: {count} points")
print(f"\nFiltered results saved to: {output_path}")
return output_path
def create_filepath_list(self, filtered_results, output_txt_path=None):
"""Create a simple text file with filtered filepaths"""
if output_txt_path is None:
base_name = Path(self.clustering_results_path).stem
output_txt_path = f"{base_name}_filtered_filepaths.txt"
filepaths = [result['filepath'] for result in filtered_results]
with open(output_txt_path, 'w', encoding='utf-8') as f:
for filepath in filepaths:
f.write(f"{filepath}\n")
print(f"Filepath list saved to: {output_txt_path}")
return output_txt_path
def main():
parser = argparse.ArgumentParser(description="Universal filter for clustering results")
parser.add_argument("--embeddings_path", required=True,
help="Path to embeddings JSON file")
parser.add_argument("--clustering_results_path", required=True,
help="Path to clustering results JSON file")
parser.add_argument("--output_path",
help="Output path for filtered results (optional)")
parser.add_argument("--selection_ratio", type=float, default=0.5,
help="Ratio of points to select from each cluster (default: 0.5)")
parser.add_argument("--create_filepath_list", action="store_true",
help="Also create a simple text file with filtered filepaths")
args = parser.parse_args()
# Validate input files exist
if not os.path.exists(args.embeddings_path):
print(f"Error: Embeddings file not found: {args.embeddings_path}")
return
if not os.path.exists(args.clustering_results_path):
print(f"Error: Clustering results file not found: {args.clustering_results_path}")
return
# Initialize filter
filter_obj = UniversalClusterFilter(args.embeddings_path, args.clustering_results_path)
# Load data
filter_obj.load_data()
# Filter clusters
filtered_results = filter_obj.filter_all_clusters(args.selection_ratio)
# Save results
filter_obj.save_filtered_results(filtered_results, args.output_path)
# Create filepath list if requested
if args.create_filepath_list:
filter_obj.create_filepath_list(filtered_results)
print("\nFiltering completed successfully!")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,105 @@
#!/usr/bin/env python3
"""Generate filtered DBSCAN results based on images listed in 009_label_data_sample_seed_42.json.
Rules (v1): keep only entries whose base filename (before _<idx>_scale_* suffix or extension)
can be matched with any "image" field in the label JSON. Matching strategy:
- Extract filename without extension from each DBSCAN result filepath.
- Remove trailing pattern: _<digits>_scale_<anything>
- For each image name from labels, keep the DBSCAN entry if:
* cleaned_filename startswith(image_name) OR
* image_name startswith(cleaned_filename) OR
* image_name in cleaned_filename OR cleaned_filename in image_name
- This permissive logic aims to cover variations; can be tightened later.
Outputs file: dbscan_results_filter.json in the same folder with structure:
{
"method": "DBSCAN_FILTERED",
"n_samples": <count_of_original>,
"n_filtered": <count_after_filter>,
"results": [ ...filtered entries... ]
}
"""
from __future__ import annotations
import json
import re
from pathlib import Path
BASE_DIR = Path(__file__).resolve().parent
# LABEL_FILE = BASE_DIR / "009_label_data_sample_seed_42.json"
LABEL_FILE = BASE_DIR / "015_label_data_sample_seed_1997.json"
LABEL_FILE = BASE_DIR / "008_label_data_sample_seed_1997.json"
# Prefer the clustering output in cluster/ directory relative to project root
# DBSCAN_FILE_CANDIDATES = [
# BASE_DIR.parent / "cluster" / "dbscan_results.json",
# BASE_DIR.parent / "result" / "dbscan_results.json",
# ]
DBSCAN_FILE_CANDIDATES = [
BASE_DIR.parent / "cluster" / "dbscan_results.json",
BASE_DIR.parent / "result" / "dbscan_results.json",
]
OUTPUT_FILE = BASE_DIR / "dbscan_results_filter_008.json"
def load_first_existing(paths):
for p in paths:
if p.exists():
return p
raise FileNotFoundError("No dbscan_results.json file found in expected locations: " + ", ".join(map(str, paths)))
def normalize_image_name(name: str) -> str:
return name.strip()
def cleaned_filename(path: str) -> str:
stem = Path(path).stem # remove extension
stem = re.sub(r"_[0-9]+_scale_.*$", "", stem)
return stem
def build_image_name_set(label_path: Path) -> set[str]:
data = json.loads(label_path.read_text())
names: set[str] = set()
for entry in data:
if not isinstance(entry, dict):
continue
img = entry.get("image")
if isinstance(img, str) and img.strip():
names.add(normalize_image_name(img))
return names
def match_entry(core_name: str, image_names: set[str]) -> bool:
for img in image_names:
if core_name.startswith(img) or img.startswith(core_name):
return True
if img in core_name or core_name in img:
return True
return False
def main():
dbscan_file = load_first_existing(DBSCAN_FILE_CANDIDATES)
image_names = build_image_name_set(LABEL_FILE)
raw = json.loads(dbscan_file.read_text())
results = raw.get("results", [])
filtered = []
for rec in results:
filepath = rec.get("filepath")
if not filepath:
continue
core = cleaned_filename(filepath)
if match_entry(core, image_names):
filtered.append(rec)
out = {
"method": "DBSCAN_FILTERED",
"n_samples": len(results),
"n_filtered": len(filtered),
"match_ratio": round(len(filtered) / len(results), 4) if results else 0.0,
"source_file": str(dbscan_file),
"label_source": str(LABEL_FILE),
"results": filtered,
}
OUTPUT_FILE.write_text(json.dumps(out, indent=2, ensure_ascii=False))
print(f"Wrote {len(filtered)} filtered results to {OUTPUT_FILE}")
if __name__ == "__main__": # pragma: no cover
main()

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,460 @@
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-26471195_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26539711_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ee47fa65-6a09-493b-af5e-3d8faf643436_2500016-duvivier-anne-marie-173187397567aa1b97b01b5_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c884bb97-d413-4384-8ec1-6a99735b8be1_facture_opticien_4_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2f91e647-c8b5-42c5-9a4e-e6dc639bd488_2025-02-00097_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27545119_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1070373_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25374417_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f482ebef-e885-4627-a927-96ad54ab5aa0_Facture_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b410345a-31de-4397-a896-bc11d97e524c_FACTURE_OPHTALMOLOGISTE_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/aa50cf96-f4cb-4848-a392-99daa5efa987_Facture_ZINT_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/38b89b50-a6b6-4816-a495-ce2d9e71082f_Kine-28-11-2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27776469_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26556388_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/7bc6ff74-13df-435e-8457-ecbb20d9d366_Demande_remboursement_mutuelle_osteo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25332784_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1212858_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26539712_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/76577d77-1213-4994-9360-9b72f6053892_Document_PDF_2_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27703581_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2d21dc04-ddde-4f3f-a9a2-bd0070dc04e4_Factures_ophtalmo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1140659_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture psychologie-GED N-R-2391368_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724156469375osteopathe_renau_anais_le_31-07-2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/05ec43f1-34ea-40e6-b799-413abcba56a5_IMG_1539_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/3c00ec8d-24ad-46c1-aa12-08f7cba34328_17478452286276479334092225478882_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-27210547_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27605403_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27708936_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27596405_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27697454_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/7e914ee7-a271-4faa-8d0c-3b911e700820_image_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/803f61ed-759c-4773-90f2-d8737ef911ab_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397366294irm_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/RIB-TESTNHIDEUX-U-9-4-2025_152109-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/287b78bf-1861-467f-973f-21aad6be26f1_livret_de_famille_Humez_25022024-3_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25290421_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8c31bae4-5322-4f3b-ae00-d3dbd40446df_facture_Henry_Nadege_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/172442507921917133766883857695232286776891989_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6e609e6b-dd09-4dd8-9192-416dd99e3fc6_IMG_20250207_135202_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26729830_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/047b2e29-3030-4b30-b328-8726874f3506_251_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/506fd2ef-84fb-4c32-9ce8-f68335c66bf2_1739272341881936420559325279448_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1071225_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27701076_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/03c955b1-c201-4ec2-b5e0-42a11e04908f_facture_osteo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27672722_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/91efe623-7d6b-4a96-8b03-87199c56566a_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/4ba5828c-76aa-45e9-a89f-1257021074c9_Claire_Osteo_20250207_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26592511_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/81fa09d5-04c7-4ed5-8a19-e6e724a47acb_MORIN_MARYLINE_osteopathe_050225_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/smallscreen_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1183799_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27711119_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1106091_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/5818c642-d6c2-4cc9-a16d-b66563f0cb0b_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26685506_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25864357_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27559701_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25379394_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26954651_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/comprendre-ma-facture-edf-exemple_F4nHAoq_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/300a47a0-30a0-4419-a27b-a08f7ccd6db3_17392726735678376859314053270035_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/0b801a90-3a0a-4b0c-bc7d-56f18b938619_2025-01-23-ophelie-lenoir_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25143901_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ro_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e1c02a95-aff0-4809-b30e-101ad81213b3_osteo_st1_2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25293643_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397125678ordo_05.03.2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25863446_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25007101_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a64d7f57-a8ed-44e9-ae38-fae9bb713144_Osteopathe__1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ecc107be-fcec-4b98-a0ed-bd325bd27b35_recu-fac-2025-02-031_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724399050400acte_de_naissance_Amaury_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/254612ad-aaea-43d0-8af7-882709c7e1d4_2025-02-00047_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25190194_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25880926_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e6d69c3f-dc5a-4bfc-844c-361b9bfd255e_OSTEO_02.25_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27776257_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/881d86b7-5bdb-4a16-bcbc-57202e0879b1_osteo_fevrier_2025_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ecf337c5-6bab-4630-aa9a-8364b1f542bb_facture_osteo__1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c64ad8f0-0d5a-4a5b-aec5-62ec44099520_Screenshot_20250211_100040_Gallery_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/41e00ab7-34ce-4ebd-baae-b7b9e42fb973_17392896793882777076946436275276_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26186342_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26595219_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/preview-promesse-embauche-doc1-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/k-bis-103142_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25847017_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1b105dcc-c530-4799-869e-28c15c685fab_Facture_osteo_Balmelle_Cassandra5_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25270267_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397125640FACTURE_HOSTEO__1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a13a6613-da2d-48b7-96ef-412ba5a88af0_Factures_Clemence__2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a73c367c-878a-4461-a248-1f29d0a4eafb_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture psychologie-GED S-Z22-1003800_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/EdiAttAssPermCont_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1196314_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/4fa21bd9-393e-41d4-9af2-bf42004f4e94_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27716093_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25414972_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27545715_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25332074_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/fc14b93e-73ae-44e1-82b1-0ca50ee622de_Facture_1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25203829_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26717084_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27709440_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/caaa1340-dc45-4133-9ff4-8bac8e2f6bbd_Facture_Lunettes_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8c3abdc3-796f-4764-9cc8-4da2481f36e8_IMG_2534_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25361051_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8908ff09-4087-467a-88f1-e415501a95b4_POUVAIT_lunettes_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Extrait acte de naissance - 27052816-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1207573_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658298461facture_podologue_alicia_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/56711b2e-104c-4be3-8308-60c6fb070bd6_IMG_8832_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1071291_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e6272522-cc2a-44b0-af14-9f54d1a76a59_Facture_lunettes__1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25093559_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/719ba06c-91ee-44dc-b7b6-4a3132f733af_FievreVillaniCarlaLunettes_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f041f191-6b8f-429a-b8d9-1204957390b2_JANNEAU_DAVID_JANV25_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/89b129db-8ec0-4d9f-a40a-a85c38cbf44b_IMG_4581_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2d21dc04-ddde-4f3f-a9a2-bd0070dc04e4_Factures_ophtalmo_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27658074_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a0bbcbed-7ccd-4e43-9e14-296e186afcce_FACTURE_Osteo_AdlaNadia_11022025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6d20c1cc-fa3b-41a9-990e-d115f2c1265b_Facture_MUNOZ_Florian_2025-0002-3_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1224543_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/27c16ad0-3309-4eb1-9432-ab8a546fee7c_osteo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2212e210-bbbc-466f-b30c-fce89df87489_20250210_134936_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27702790_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f894f446-28ce-491b-b6fb-f138c6406def_Scan_CPN_STL_Siat_20250207161822_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/9c09398e-1348-4844-8fce-e0421ad306c9_note-d-honoraire_-_2025-02-05T144527.016_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25417096_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27575418_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/819f6faf-d9f3-4668-9000-e414d92c7f41_ROYER_LUDIVINE_FACTURE_OPHTALMO_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/85e27f49-5723-4dff-a511-885441636918_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/3fd4b2e1-0247-47a1-8f27-ef7d95b9bcd2_Facture_M._Delattre_3_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25265881_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658376424KM_C250i24032713020_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6ba7415c-4d85-465b-bc52-d60959b9457f_Facture_NCBL_Cataracte_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25875089_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26772555_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724226151775psy_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2ffb844a-d807-42e3-93e8-18ee0285a542_facture-POULAIN_Justine_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2d21dc04-ddde-4f3f-a9a2-bd0070dc04e4_Factures_ophtalmo_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b8feb7cc-40df-4d9a-b250-a06d519fdd3d_IMG_20240522_181521_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724425031414recu-fac-2024-04-002327_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658298482BENTO22032024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/73c8aacd-890c-427f-b73d-9590f84fd6cb_20250208_114121_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/3e214583-2362-4d68-a96e-ac34df3a2bb7_osteo_ju_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-27210605_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/68e26d63-8bae-4e80-8f45-9a0cf6c23ad1_Osteo__1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1183772_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27784857_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27631166_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture psychologie-GED S-Z22-996335_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/4d8f8074-95e0-411a-b29d-25f1180fab97_Facture_Osteo_05.02.25_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/bef5bd2d-a370-457f-9d30-f7ce27c4dd70_Facture_2024-12-002_-_S_Frederic_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26592510_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/717d0d8d-3065-41d4-99d9-8c42dbcab6a8_Viani_Doris_facture_2025-02-01_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e5200778-8114-415c-8304-539e8e8876a0_CamScanner_07-02-2025_11.49_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/72eca803-8b6c-462c-83b3-ffbccf565a09_invoice-1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/exemple-cvec_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/77645bbd-1bc5-4636-b6c9-1c64713d19b1_facture_Raphael_Braud__1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25187935_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6ac3dac2-9705-4daa-848b-86f960b592f6_IMG_1638_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25233928_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/4c53616e-7da0-4c6b-9d89-b645e80f1836_FA_25-1000_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25357010_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/cfe63e63-0385-4881-b08a-b65b50e7eeed_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/534b115b-9997-48c3-bc09-ffb89e5bddf3_Screenshot_20250212_150902_com.google.android.apps.docs_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/9e0ec80c-7190-4c77-a4f8-3dda603b0b30_Facture_Osteo_-_1ERE_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27579724_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/0453c161-70af-4474-8854-27ab0f5d7bc0_2025-02-00044-2_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25632679_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/actaadfdfffe_mariage_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6589f4fa-ebac-459a-9ebe-aa84820a2905_FACTURE_LENTILLE_2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724399050308image_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/83f38474-d109-4edc-a395-0025e6259d96_Facture_250128_Osteo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c64d0b90-f96f-432c-8ad7-52567dd9efc4_DRAPERI_Delphine_734_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/172560941569217134836825692176214918791820826_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/modele quittance de loyers agricoles_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Extrait acte de naissance - 27049977-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f2d7862b-6953-42c0-8fce-22854ae760f9_Osteopathe__1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724226151828Facture-FACTURE_2024-1152-MEKELLECHE-31-03-2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27701396_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e18c5522-3488-40e9-9237-fd47811fb031_17388342101672092579270573009279_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658298484BENTO29032024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/40e6166e-dce5-4710-aba7-538fa663ec6c_IMG_20250306_093631_1_scale_0.8.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27619324_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25161623_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25166322_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/137586df-491e-440d-97b1-0afd0047318f_Facture_2025-02-008_-_L_Ezra_5_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25184785_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27778112_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f58f9a38-60a6-4f41-98f5-26852a68d8c7_Facture_osteo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/5550f57b-87f9-49dc-966d-aebef8d6c1d0_facture-chaulan-eliane-20250523-1531_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25414972_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26775374_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/085f8bfc-690a-4daf-9559-865274181242_Facture_CIANO_Ophtamologique_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/mobilenew_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e01ef5c0-efc8-4f46-9868-b2b7ea2e85e9_Facture_4_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25182410_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27619261_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/141f7ccc-37ab-4da5-9788-80a27e819b8b_Feuilles_de_soins_Psychologue_x2_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a3745377-917b-49b9-b9fc-3e65d22c5638_BERTIN_Franck_Facture_osteopathie_du_23_01_2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a682cca0-0df7-4f71-b366-7525e73fb322_IMG_20250207_115651_1_scale_0.8.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25117961_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a4873987-de63-47b0-b0ef-81044554305f_IMG_20250211_163051_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d1d62a23-a37a-4bae-b6f5-a28a137ef0af_RECTIFY_IMG_20250207_153454_1_scale_0.8.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b0977b46-747a-418c-bd20-9e99472917a2_Screenshot_20250209-143006_Samsung_Notes_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1210953_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27775827_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/511f4232-e9d9-47e2-b8b4-d029f8dc1e6e_2025-01-23_OSTEO_ALCOCK_FAC-2170_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/0007983c-1e48-4197-87d8-9291c7928688_LUNETTE_IVANOV_REMI_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27646505_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/106b84f7-29a1-4c06-8cdf-4a297995e678_Facture_lunettes_Amelie_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724156681291facture_osteopathe_20_aout_2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/419fd893-b8ef-404d-bc49-d637110e06ad_facture_ostheo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26775264_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25300559_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1204645_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/3116513f-22b1-4143-8f0b-97527093a9cb_Facture_osteo_fevrier_2024_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e8e0549f-9d27-4508-9f56-1750649f9c1f_17466044538525179769049202853107_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/9dfe878b-b49f-4c96-bab5-93bb2ccfc4cf_Facture_DE_LA_RIVIERE_G_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/484d05c2-b89b-4ec0-8968-9ae6a5360f6a_DetailDesPaiements_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25239524_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/fe3ade48-89c0-4ac3-81f3-6dae15e20859_Ostheopate_07_02_25_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27687795_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27701163_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27779029_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/879cf76a-68c1-40d7-b444-0ff3341c97de_facture-zami-isabelle-20250130-1703_1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27702202_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26749684_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/27af0979-5d5f-4dee-b6b3-a8ef17cce646_facture_osteopathie_Dufour_Francois_04-02-2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27776107_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27603799_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e61341e0-47f5-44f0-afdd-b151b4ceb5aa_DEPASSEMENT_FACTURE_IVANOV_R_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/655f9ad1-1759-4ae4-8707-c66a97ef4096_IMG_20250131_162239_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27784722_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture psychologie - 27518206_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27701669_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1cbf5b4f-5370-4ad4-b877-1e3aaf3259f6_Document_2025-02-05_105424_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/5f424e29-cdd7-4c3f-aceb-28377ea12982_Facture_2500023_Ferre_Bryan_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26765337_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c884bb97-d413-4384-8ec1-6a99735b8be1_facture_opticien_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27771553_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27633719_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25186075_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27651227_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/5bb3a883-f00f-42a5-84e4-007ffa949d76_facture_acquittee_optique_MIHI_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/172415668127717241473179067067247267468124453_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17b264a2-4881-49a4-8326-48ace17dd86f_factures_ophtalmo_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27705806_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/455854fe-84e6-4ece-9492-276410b4e422_BORDERES1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture psychologie-GED N-R-2391503_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27439925_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27232480_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1498c5f5-22cf-41d4-87f0-9b295ff481f4_IMG_5659_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26550623_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a2487589-0cc9-424b-b7c0-077dc768318b_Renouvin_Ferlicot_Edith_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/exemple-de-facture-e-commerce_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26908261_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b3271922-dc96-4bc0-b79b-f11c1c3c2d8b_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25627904_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/fa25423e-b79c-48f9-89ae-32ffd95e3101_Facture_Ophtalmologie_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b1b5e4da-2462-48c1-a9b9-ff75ecd61e37_IMG_20250206_142210_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27617919_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26189649_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1203226_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6e61ced2-f624-4434-b980-a347976b9612_osteopathe_11.02.2025_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26236562_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c865b3cf-c251-465d-97e2-ada7a618dc56_Numerisation_20250207_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27746732_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26441046_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/4b0d7324-9a46-4f2c-872b-4b9c06cb0566_595570692474790_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27705767_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/RIB_Mr_Mme_PHAM-2_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397366242IMG_20240417_135648_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27686489_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/carte-tiers-payant_2025-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25382471_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/afc2330e-295a-4fcf-b12f-eb7de776c7a7_EMERY_Jerome_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724226587949AnyScanner_03_25_2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724425031408Facture_psy_fevrier_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture psychologie-GED A-B22-677661_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b685d5dd-e01e-4fa9-b7aa-17abd1a25b74_IMG_5544_1_scale_0.9.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25192590_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27537844_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26740051_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/9c2763f2-fb0c-4fe4-be38-6f94fba64162_optical_facture_lunettes_apres_accident_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27235261_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27201372_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27660322_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1866f267-d6a3-49c5-80c3-e1c5d5ee6e72_HAEFFNER_Esther_Honoraires_osteeo-Schweickart_du_070225_Montant_55_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27776417_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25175989_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b7e34b86-fdb8-4ea4-b646-5bee350b947c_IMG_3542_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1198077_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-27011183_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658225850OSTEO1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/b6ecc678-8a40-4e52-bc31-09aab0782121_IMG_5670_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1959c640-25ba-42e8-a898-61d5e0d0eb98_Facture_chiropracteur_07022025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/17243969995252400021-gielczynski-louise-17494429056596ad03d4ae2_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27588377_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1221391_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/172415667785217241478517061500135449349862696_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1d7b21c7-12f2-475d-aacd-a4b6e48c3291_osteopathe_2_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1142197_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1abe7c4f-d458-4575-a0e9-3d2eaa3e0787_20250423133038356_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Elec+gaz 17.05.25 Auray-2_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/407243a9-16c8-4107-b8b3-5c9ad81c6952_DONNA_Celia_feuille-de-soins_2025-02-11_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27775261_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26748031_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/4ef6454c-e7e6-4b35-9e66-f41d56027d94_Facture_MARCIN_Amandine0702_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/28f8623a-8962-4ad1-88bb-4914109e1d42_20241118103848034_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/689164e2-be46-489b-8623-88eb5ef70df8_10-02-2025_Facture_Osteopathe_Mme__SIMOU_Daniele_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25957679_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/preview-locataire-certificat-de-scolarite-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724156469378osteopathe_renau_anais_le_31-07-2024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/40003ed8-4072-4398-bddd-0090359e602d_osteo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27708327_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2b553211-f546-45dd-aa16-d38b138a86c0_IMG_4601_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26729718_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/847603bb-2553-4d83-ace0-2b6ff79fa244_facture_osteo_06-02-2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/831984b8-6c87-4f5d-b0a0-2d0ab5cc9061_20250207_140107_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8be57af2-165f-4a77-9579-8ee89b4c0d88_1000012628_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25167333_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26610949_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1142334_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e358b13d-402c-4e6e-9a65-1160a204353e_Screenshot_20250211_100027_Gallery_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658298467Facture_Osteopatie_Melanie_HAMON_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/0bfb5041-2751-421d-b5d5-60d696c361c1_17391960081576654684571490136238_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Capture'_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/dcfae795-497c-4d49-8e5d-1c059f6d5cb4_Facture_Osteopathe__2_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/01d6b922-8482-400e-93ea-82cec3190698_1000004180_1_scale_0.7.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d6b93c3d-f00e-49ac-8fce-ebd2c99ec6a5_Feuille_de_soins_PROBST_Annie_20250315_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/3e613099-4659-4efd-b33f-811fc5fc764d_IMG_20250206_131824_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d4f310c8-3bcf-4bb5-bf53-27ad74d66c1e_FACTURE_OPTICIEN_OPHTALMO_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25182408_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27756973_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c0625556-2cb1-42c8-9433-e31274c121a3_Livret_de_famille_hamidouna-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26082051_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/719ba06c-91ee-44dc-b7b6-4a3132f733af_FievreVillaniCarlaLunettes_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/3676c6dd-d518-44b9-8c9f-a37b7eb77c2f_17389316237651055174693799784340_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/74441931-f8d0-4cb0-8eb5-df4147a72bbe_mgp_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1078555_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e41a3905-f595-4a23-8b98-cfba5c39052f_OSTEO_du_11_02_2025_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/12ebca6a-2e94-4739-a692-1100f6722a5f_Le_Monnier_Fauche_Nolan_facture_2025-01-17_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/50eeaebf-d552-4a12-92a4-66d185bb992b_facture_optique_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/9496a72b-9c2c-4542-867f-9fc4dad457c1_IMG_20250207_084759_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/42182356-427d-4a67-8ad2-a21775f7bf44_17390334446023078850027382669569_1_scale_0.8.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26723534_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/141d1472-51b6-49f1-a349-e2b5a2adbdde_Facture_osteo__1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ddc16454-d05b-4e9d-b03c-50468919fef3_IMG_9601_1_scale_0.4.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27759503_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397125672fact_osteo_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/bcdab145-079b-4ba1-a334-97abfd45d9f8_Cabinet_Perreux_sur_marne_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-27082405_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25224007_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/172563253219717135147262417284755816824044985_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/baadcf11-c68e-4774-bbaf-c7dad90b4e13_DURAND_FABIEN_facture_2025-04-25_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25489070_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/52bf793f-cea7-4827-a719-ac4fae130280_facture_osteo14.01_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/0308b7e0-8da0-4bfe-a578-57eb88e0511a_recu-fac-002296_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26765337_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25245480_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27668729_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1148278_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture psychologie-GED N-R1-948960_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture psychologie-GED K-M33-235142_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27608383_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ee05efb9-c283-46d5-a7e7-222392db9309_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1112613_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724397121048facture_kine_15-04-24_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/attestationVitale_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27703140_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8dcf02a7-b647-4605-9d95-d3fda3685599_p20250212_14102586_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27238995_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/6ba7415c-4d85-465b-bc52-d60959b9457f_Facture_NCBL_Cataracte_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/fixIBULL_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a593830c-dcbb-4553-860f-b0925a4c30ef_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27781874_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/nom contact_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/15702a02-c595-4c4e-bb3f-9b31e542c13b_FACTURE_POTARD_-_DE_SANTS_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/28a6084a-24a5-4922-b79d-f96dd51c5666_Factures_et_ordonnaces_optique_ABDOUL_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26722802_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/707771f6-bb3b-4e5e-8d7d-acd584a23009_IMG_20240726_142929_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27753021_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/rib-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f40d1b67-7690-4d39-8578-eca04cfd2b3c_Factures_Ethiopathe_Dimitri_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724396999460facture_lentille_garreau1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27679600_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c2dc54e7-025f-4d2a-a569-4548c4064cc4_Document_scanne_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/4f09f855-c993-42ea-97b1-3dd728b53e16_devis_ophtalmo_1_scale_0.8.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/586fa34d-1ecb-4c41-a953-b866443a7793_Osteopathe_001_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f7df49da-2cf8-4584-8306-0d10ea1994a3_Melot_Quentin_facture_2025-02-07_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1c9ac535-bb12-431c-b88f-b3401b30925e_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1062996_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724226151866livretDeFamille_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25059182_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/48e19752-3534-4539-8905-e09bdeb9fddd_CamScanner_11-02-2025_21.51_1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27757098_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26957793_3_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/bad0f823-e7d4-41be-87a2-1634254db924_IMG_20250207_130207_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724425079184Facture_Osteopathe_Brousseau_sader_17avr24_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26189648_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658298486BENTO06042024_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/95c188de-0800-4cdc-8f02-4c6fdadf3d66_Facture_Osteo_Annick_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27579295_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/e6c39a83-a53c-48f5-bffe-6bdb57337b33_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27601381_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/eb5561dd-937b-4c0b-b6c6-56da8b528c94_facture_vision_claire_ophtalmo_1_001_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/7bad16be-dff9-4b92-8041-4a9c38638e29_facturation_osteo-1_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25330401_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2af3fe93-a285-4ced-898b-90b4106bef8c_osteo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27646193_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27691255_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27576127_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/a9f88b8f-a751-478e-9338-2d31fb1062e7_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27717154_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27784971_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Quittance de Loyer 11.24 Mr NGUYEN-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26691031_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27653858_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25279751_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/0cae671e-6b7f-41aa-a0ab-d74b8e15e424_DIMART_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1196314_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27554784_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658376437factosteo_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25919222_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26729570_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724226151715Facture_Julie_Boissel_2_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1d3b5d22-87f2-4c89-90e5-8b0350bca84e_facture__Surre_Chloe_03_fevrier_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/04fde30d-7891-439d-85e7-285a0d8ffee1_IMG_1550_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25273058_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/2b3ed78c-1a3c-40b3-b997-0e74f6ba497f_FENOLLAR_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724156469373facture_psy_DOMI_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26107527_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/d317e79c-0485-42f8-b24d-413d94e7a779_Livret_de_famille-1_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture médecine douce-27710953_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1106092_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/834a074c-811f-4d27-9c19-134a4b9ab2ea_FACTURE-25-02-208-GEORGES_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/c7bbe834-632d-4fb6-8e24-c9c5caf488bc_IMG_4966_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/DDE prestation Facture des lentilles de contact-26723200_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f8c6fe47-9608-4f9e-938b-117bfa4dcd30_ostheopatie_2025-02-12_Grenot-JLuc_Mme-Tabary_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f0395d11-18fb-4b9f-bee1-5ca75d656d14_image_1_scale_0.6.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture des lentilles de contact-27664783_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26592477_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/abd25cab-e2d5-456c-ac6c-d209b698daf2_Le_Monnier_Fauche_Nolan_facture_2025-01-31_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-1105977_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture-quittance toutes spécialités-27474258_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/8d5e5dfe-71a8-4a05-a376-8727cd264ffd_osteopathe_do_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25187936_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724658126141JACOB_Francois_2024-111_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-26368529_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25263730_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/Facture ostéopathe-25330401_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/ab26e47b-d267-439c-9b12-8699c17d5d47_facture_Aurelie_Claude_naturopathe_2_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/47a3ed6d-aade-4da2-8006-1ec798c5e915_Stiegler_Gary_1_scale_1.5.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/f30627ed-f84d-4e7e-aad5-fd4e06475247_livret-de-famille-1-2_1_scale_1.0.jpg
/home/nguyendc/model-factory/Finetuning-Automation/etc/data/media/docai_mgp_facture_v2_0/1724156469377Facture_DAURISFlorian_20240815CGEI_1_scale_1.5.jpg

View File

@@ -0,0 +1,2 @@
nohup: ignoring input
python: can't open file '/home/nguyendc/sonnh/embedding-clustering/filter/gmm_extensive.py': [Errno 2] No such file or directory

View File

@@ -0,0 +1,111 @@
#!/usr/bin/env python3
"""
Script để random pick 50% samples từ label_data.json với 3 seeds khác nhau
và lưu thành 3 files riêng biệt.
"""
import json
import random
import os
from typing import List, Dict, Any
def load_json_data(file_path: str) -> List[Dict[str, Any]]:
"""Load dữ liệu từ file JSON"""
try:
with open(file_path, 'r', encoding='utf-8') as f:
data = json.load(f)
print(f"✅ Đã load {len(data)} samples từ {file_path}")
return data
except FileNotFoundError:
print(f"❌ Không tìm thấy file: {file_path}")
return []
except json.JSONDecodeError as e:
print(f"❌ Lỗi decode JSON: {e}")
return []
def random_sample_data(data: List[Dict[str, Any]], seed: int, sample_ratio: float = 0.5, sample_size = None) -> List[Dict[str, Any]]:
"""Random pick samples từ data với seed và tỷ lệ sampling cụ thể"""
random.seed(seed)
if sample_size is None:
sample_size = int(len(data) * sample_ratio)
sampled_data = random.sample(data, sample_size)
print(f"🎲 Seed {seed}: Đã sample {len(sampled_data)}/{len(data)} samples ({sample_ratio*100:.1f}%)")
return sampled_data
def save_json_data(data: List[Dict[str, Any]], file_path: str) -> bool:
"""Lưu dữ liệu vào file JSON"""
try:
with open(file_path, 'w', encoding='utf-8') as f:
json.dump(data, f, ensure_ascii=False, indent=2)
print(f"💾 Đã lưu {len(data)} samples vào {file_path}")
return True
except Exception as e:
print(f"❌ Lỗi khi lưu file {file_path}: {e}")
return False
def main():
"""Hàm chính"""
# Đường dẫn file input
input_file = "label_data.json"
# Các seeds cần sử dụng
seeds = [1997, 42, 2025]
# seeds = [42]
# Tỷ lệ sampling (50%)
# 8, 9, 10
sample_ratio = 0.5
# 11,12,13
sample_ratio = 0.3
# 16
# sample_ratio = 0.01
# 15, 17, 18
sample_size = 200
print("🔄 Bắt đầu quá trình random sampling...")
print("=" * 50)
# Load dữ liệu gốc
original_data = load_json_data(input_file)
if not original_data:
print("❌ Không thể load dữ liệu. Thoát chương trình.")
return
print(f"📊 Tổng số samples trong file gốc: {len(original_data)}")
print(f"🎯 Sẽ sample {sample_ratio*100:.1f}% = {int(len(original_data) * sample_ratio)} samples")
print("=" * 50)
# Thực hiện sampling với từng seed
for seed in seeds:
print(f"\n🌱 Xử lý với seed: {seed}")
# Random sample data
sampled_data = random_sample_data(original_data, seed, sample_ratio, sample_size = sample_size)
# Tạo tên file output
output_file = f"label_data_sample_seed_{seed}.json"
# Lưu file
success = save_json_data(sampled_data, output_file)
if success:
print(f"✅ Hoàn thành sampling với seed {seed}")
else:
print(f"❌ Lỗi khi xử lý seed {seed}")
print("\n" + "=" * 50)
print("🎉 Hoàn thành toàn bộ quá trình!")
print("\n📁 Các file đã được tạo:")
for seed in seeds:
output_file = f"016_no_fine_tune_label_data_sample_seed_{seed}.json"
if os.path.exists(output_file):
print(f"{output_file}")
else:
print(f"{output_file} (không tồn tại)")
if __name__ == "__main__":
main()

138
filter/run_filter.sh Normal file
View File

@@ -0,0 +1,138 @@
#!/bin/bash
# Example script to run DBSCAN filtering
# Make sure to update the paths according to your data
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json"
# OUTPUT_PATH="dbscan_filtered_results.json" #0.5 of data, center 0.5, 0.5 border
# OUTPUT_PATH="dbscan_filtered_results_v2.json" #0.5 of data, 0.25 center, 0.75 border
# OUTPUT_PATH="dbscan_filtered_results_v3.json" #0.75 of data, center 0.25 border 0.75
# echo "Running DBSCAN filtering..."
# echo "Embeddings: $EMBEDDINGS_PATH"
# echo "Clustering results: $CLUSTERING_RESULTS_PATH"
# echo "Output: $OUTPUT_PATH"
# python dbscan.py \
# --embeddings_path "$EMBEDDINGS_PATH" \
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
# --output_path "$OUTPUT_PATH" \
# --create_filepath_list
# echo "Filtering completed!"
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_final_results_bic_20250805_150636.json"
# OUTPUT_PATH="gmm_best_by_BIC_filtered_results.json" #0.75 of data, center 0.25 border 0.75
# echo "Running DBSCAN filtering..."
# echo "Embeddings: $EMBEDDINGS_PATH"
# echo "Clustering results: $CLUSTERING_RESULTS_PATH"
# echo "Output: $OUTPUT_PATH"
# python dbscan.py \
# --embeddings_path "$EMBEDDINGS_PATH" \
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
# --output_path "$OUTPUT_PATH" \
# --create_filepath_list
# echo "Filtering completed!"
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json"
# OUTPUT_PATH="014_re_run_dbscan_filtered_results_temp.json" #0.75 of data, center 0.25 border 0.75
# python dbscan_v2.py \
# --embeddings_path "$EMBEDDINGS_PATH" \
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
# --output_path "$OUTPUT_PATH" \
# --create_filepath_list
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_final_results_bic_20250805_150636.json"
# OUTPUT_PATH="015_gmm_best_by_BIC_filtered_results_temp.json" #0.75 of data, center 0.25 border 0.75
# python dbscan_v2.py \
# --embeddings_path "$EMBEDDINGS_PATH" \
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
# --output_path "$OUTPUT_PATH" \
# --create_filepath_list
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json"
# OUTPUT_DIR="dbscan_v3_out_clusters"
# # python dbscan_v3_only_one_cluster.py \
# # --embeddings_path "$EMBEDDINGS_PATH" \
# # --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
# # --output_dir "$OUTPUT_DIR" \
# python dbscan_only_one_cluster.py \
# --embeddings_path "$EMBEDDINGS_PATH" \
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
# --save_full_clusters \
# --clusters_output_dir per_clusters
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json"
# OUTPUT_PATH="026_dbscan_v3_filtered_results_temp.json"
# python dbscan_v3.py \
# --embeddings_path "$EMBEDDINGS_PATH" \
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
# --output_path "$OUTPUT_PATH" \
# --create_filepath_list \
# --selection_ratio 1.0 --center_ratio 0.5 --border_ratio 0.5
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json"
# OUTPUT_PATH="027_dbscan_v3_filtered_results_temp.json"
# python dbscan_v3.py \
# --embeddings_path "$EMBEDDINGS_PATH" \
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
# --output_path "$OUTPUT_PATH" \
# --create_filepath_list \
# --selection_ratio 1.0 --center_ratio 0.25 --border_ratio 0.75
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json"
# OUTPUT_PATH="019_dbscan_v3_filtered_results_temp.json"
# python dbscan_v3.py \
# --embeddings_path "$EMBEDDINGS_PATH" \
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
# --output_path "$OUTPUT_PATH" \
# --create_filepath_list \
# --selection_ratio 0.12 --center_ratio 0.25 --border_ratio 0.75
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_final_results_bic_20250805_150636.json"
# OUTPUT_PATH="028_gmm_best_by_BIC_filtered_results.json" #0.75 of data, center 0.25 border 0.75
# python dbscan_v3.py \
# --embeddings_path "$EMBEDDINGS_PATH" \
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
# --output_path "$OUTPUT_PATH" \
# --create_filepath_list \
# --selection_ratio 1.0 --center_ratio 0.25 --border_ratio 0.75
EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json"
OUTPUT_PATH="029_dbscan_v3_filtered_results_temp_30.json" #0.75 of data, center 0.25 border 0.75
python dbscan_v3.py \
--embeddings_path "$EMBEDDINGS_PATH" \
--clustering_results_path "$CLUSTERING_RESULTS_PATH" \
--output_path "$OUTPUT_PATH" \
--create_filepath_list \
--selection_ratio 0.6 --center_ratio 0.5 --border_ratio 0.5

12
filter/shuffle.py Normal file
View File

@@ -0,0 +1,12 @@
import random
import json
with open('/home/nguyendc/sonnh/embedding-clustering/filter/008_label_data_sample_seed_1997.json', 'r') as f:
data = json.load(f)
random.seed(8272025)
random.shuffle(data)
with open('/home/nguyendc/sonnh/embedding-clustering/filter/032_shuffle_008.json', 'w') as f:
json.dump(data, f)