#!/bin/bash # Example script to run DBSCAN filtering # Make sure to update the paths according to your data # EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json" # CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json" # OUTPUT_PATH="dbscan_filtered_results.json" #0.5 of data, center 0.5, 0.5 border # OUTPUT_PATH="dbscan_filtered_results_v2.json" #0.5 of data, 0.25 center, 0.75 border # OUTPUT_PATH="dbscan_filtered_results_v3.json" #0.75 of data, center 0.25 border 0.75 # echo "Running DBSCAN filtering..." # echo "Embeddings: $EMBEDDINGS_PATH" # echo "Clustering results: $CLUSTERING_RESULTS_PATH" # echo "Output: $OUTPUT_PATH" # python dbscan.py \ # --embeddings_path "$EMBEDDINGS_PATH" \ # --clustering_results_path "$CLUSTERING_RESULTS_PATH" \ # --output_path "$OUTPUT_PATH" \ # --create_filepath_list # echo "Filtering completed!" # EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json" # CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_final_results_bic_20250805_150636.json" # OUTPUT_PATH="gmm_best_by_BIC_filtered_results.json" #0.75 of data, center 0.25 border 0.75 # echo "Running DBSCAN filtering..." # echo "Embeddings: $EMBEDDINGS_PATH" # echo "Clustering results: $CLUSTERING_RESULTS_PATH" # echo "Output: $OUTPUT_PATH" # python dbscan.py \ # --embeddings_path "$EMBEDDINGS_PATH" \ # --clustering_results_path "$CLUSTERING_RESULTS_PATH" \ # --output_path "$OUTPUT_PATH" \ # --create_filepath_list # echo "Filtering completed!" # EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json" # CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json" # OUTPUT_PATH="014_re_run_dbscan_filtered_results_temp.json" #0.75 of data, center 0.25 border 0.75 # python dbscan_v2.py \ # --embeddings_path "$EMBEDDINGS_PATH" \ # --clustering_results_path "$CLUSTERING_RESULTS_PATH" \ # --output_path "$OUTPUT_PATH" \ # --create_filepath_list # EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json" # CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_final_results_bic_20250805_150636.json" # OUTPUT_PATH="015_gmm_best_by_BIC_filtered_results_temp.json" #0.75 of data, center 0.25 border 0.75 # python dbscan_v2.py \ # --embeddings_path "$EMBEDDINGS_PATH" \ # --clustering_results_path "$CLUSTERING_RESULTS_PATH" \ # --output_path "$OUTPUT_PATH" \ # --create_filepath_list # EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json" # CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json" # OUTPUT_DIR="dbscan_v3_out_clusters" # # python dbscan_v3_only_one_cluster.py \ # # --embeddings_path "$EMBEDDINGS_PATH" \ # # --clustering_results_path "$CLUSTERING_RESULTS_PATH" \ # # --output_dir "$OUTPUT_DIR" \ # python dbscan_only_one_cluster.py \ # --embeddings_path "$EMBEDDINGS_PATH" \ # --clustering_results_path "$CLUSTERING_RESULTS_PATH" \ # --save_full_clusters \ # --clusters_output_dir per_clusters # EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json" # CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json" # OUTPUT_PATH="026_dbscan_v3_filtered_results_temp.json" # python dbscan_v3.py \ # --embeddings_path "$EMBEDDINGS_PATH" \ # --clustering_results_path "$CLUSTERING_RESULTS_PATH" \ # --output_path "$OUTPUT_PATH" \ # --create_filepath_list \ # --selection_ratio 1.0 --center_ratio 0.5 --border_ratio 0.5 # EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json" # CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json" # OUTPUT_PATH="027_dbscan_v3_filtered_results_temp.json" # python dbscan_v3.py \ # --embeddings_path "$EMBEDDINGS_PATH" \ # --clustering_results_path "$CLUSTERING_RESULTS_PATH" \ # --output_path "$OUTPUT_PATH" \ # --create_filepath_list \ # --selection_ratio 1.0 --center_ratio 0.25 --border_ratio 0.75 # EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json" # CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json" # OUTPUT_PATH="019_dbscan_v3_filtered_results_temp.json" # python dbscan_v3.py \ # --embeddings_path "$EMBEDDINGS_PATH" \ # --clustering_results_path "$CLUSTERING_RESULTS_PATH" \ # --output_path "$OUTPUT_PATH" \ # --create_filepath_list \ # --selection_ratio 0.12 --center_ratio 0.25 --border_ratio 0.75 # EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json" # CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_final_results_bic_20250805_150636.json" # OUTPUT_PATH="028_gmm_best_by_BIC_filtered_results.json" #0.75 of data, center 0.25 border 0.75 # python dbscan_v3.py \ # --embeddings_path "$EMBEDDINGS_PATH" \ # --clustering_results_path "$CLUSTERING_RESULTS_PATH" \ # --output_path "$OUTPUT_PATH" \ # --create_filepath_list \ # --selection_ratio 1.0 --center_ratio 0.25 --border_ratio 0.75 EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json" CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json" OUTPUT_PATH="029_dbscan_v3_filtered_results_temp_30.json" #0.75 of data, center 0.25 border 0.75 python dbscan_v3.py \ --embeddings_path "$EMBEDDINGS_PATH" \ --clustering_results_path "$CLUSTERING_RESULTS_PATH" \ --output_path "$OUTPUT_PATH" \ --create_filepath_list \ --selection_ratio 0.6 --center_ratio 0.5 --border_ratio 0.5