139 lines
6.0 KiB
Bash
139 lines
6.0 KiB
Bash
|
#!/bin/bash
|
||
|
|
||
|
# Example script to run DBSCAN filtering
|
||
|
# Make sure to update the paths according to your data
|
||
|
|
||
|
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
|
||
|
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json"
|
||
|
# OUTPUT_PATH="dbscan_filtered_results.json" #0.5 of data, center 0.5, 0.5 border
|
||
|
# OUTPUT_PATH="dbscan_filtered_results_v2.json" #0.5 of data, 0.25 center, 0.75 border
|
||
|
# OUTPUT_PATH="dbscan_filtered_results_v3.json" #0.75 of data, center 0.25 border 0.75
|
||
|
|
||
|
|
||
|
|
||
|
# echo "Running DBSCAN filtering..."
|
||
|
# echo "Embeddings: $EMBEDDINGS_PATH"
|
||
|
# echo "Clustering results: $CLUSTERING_RESULTS_PATH"
|
||
|
# echo "Output: $OUTPUT_PATH"
|
||
|
|
||
|
# python dbscan.py \
|
||
|
# --embeddings_path "$EMBEDDINGS_PATH" \
|
||
|
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
|
||
|
# --output_path "$OUTPUT_PATH" \
|
||
|
# --create_filepath_list
|
||
|
|
||
|
# echo "Filtering completed!"
|
||
|
|
||
|
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
|
||
|
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_final_results_bic_20250805_150636.json"
|
||
|
# OUTPUT_PATH="gmm_best_by_BIC_filtered_results.json" #0.75 of data, center 0.25 border 0.75
|
||
|
|
||
|
|
||
|
# echo "Running DBSCAN filtering..."
|
||
|
# echo "Embeddings: $EMBEDDINGS_PATH"
|
||
|
# echo "Clustering results: $CLUSTERING_RESULTS_PATH"
|
||
|
# echo "Output: $OUTPUT_PATH"
|
||
|
|
||
|
# python dbscan.py \
|
||
|
# --embeddings_path "$EMBEDDINGS_PATH" \
|
||
|
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
|
||
|
# --output_path "$OUTPUT_PATH" \
|
||
|
# --create_filepath_list
|
||
|
|
||
|
|
||
|
|
||
|
# echo "Filtering completed!"
|
||
|
|
||
|
|
||
|
|
||
|
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
|
||
|
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json"
|
||
|
# OUTPUT_PATH="014_re_run_dbscan_filtered_results_temp.json" #0.75 of data, center 0.25 border 0.75
|
||
|
# python dbscan_v2.py \
|
||
|
# --embeddings_path "$EMBEDDINGS_PATH" \
|
||
|
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
|
||
|
# --output_path "$OUTPUT_PATH" \
|
||
|
# --create_filepath_list
|
||
|
|
||
|
|
||
|
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
|
||
|
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_final_results_bic_20250805_150636.json"
|
||
|
# OUTPUT_PATH="015_gmm_best_by_BIC_filtered_results_temp.json" #0.75 of data, center 0.25 border 0.75
|
||
|
# python dbscan_v2.py \
|
||
|
# --embeddings_path "$EMBEDDINGS_PATH" \
|
||
|
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
|
||
|
# --output_path "$OUTPUT_PATH" \
|
||
|
# --create_filepath_list
|
||
|
|
||
|
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
|
||
|
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json"
|
||
|
# OUTPUT_DIR="dbscan_v3_out_clusters"
|
||
|
|
||
|
# # python dbscan_v3_only_one_cluster.py \
|
||
|
# # --embeddings_path "$EMBEDDINGS_PATH" \
|
||
|
# # --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
|
||
|
# # --output_dir "$OUTPUT_DIR" \
|
||
|
|
||
|
# python dbscan_only_one_cluster.py \
|
||
|
# --embeddings_path "$EMBEDDINGS_PATH" \
|
||
|
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
|
||
|
# --save_full_clusters \
|
||
|
# --clusters_output_dir per_clusters
|
||
|
|
||
|
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
|
||
|
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json"
|
||
|
# OUTPUT_PATH="026_dbscan_v3_filtered_results_temp.json"
|
||
|
|
||
|
# python dbscan_v3.py \
|
||
|
# --embeddings_path "$EMBEDDINGS_PATH" \
|
||
|
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
|
||
|
# --output_path "$OUTPUT_PATH" \
|
||
|
# --create_filepath_list \
|
||
|
# --selection_ratio 1.0 --center_ratio 0.5 --border_ratio 0.5
|
||
|
|
||
|
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
|
||
|
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json"
|
||
|
# OUTPUT_PATH="027_dbscan_v3_filtered_results_temp.json"
|
||
|
|
||
|
# python dbscan_v3.py \
|
||
|
# --embeddings_path "$EMBEDDINGS_PATH" \
|
||
|
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
|
||
|
# --output_path "$OUTPUT_PATH" \
|
||
|
# --create_filepath_list \
|
||
|
# --selection_ratio 1.0 --center_ratio 0.25 --border_ratio 0.75
|
||
|
|
||
|
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
|
||
|
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json"
|
||
|
# OUTPUT_PATH="019_dbscan_v3_filtered_results_temp.json"
|
||
|
|
||
|
# python dbscan_v3.py \
|
||
|
# --embeddings_path "$EMBEDDINGS_PATH" \
|
||
|
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
|
||
|
# --output_path "$OUTPUT_PATH" \
|
||
|
# --create_filepath_list \
|
||
|
# --selection_ratio 0.12 --center_ratio 0.25 --border_ratio 0.75
|
||
|
|
||
|
|
||
|
# EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
|
||
|
# CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_final_results_bic_20250805_150636.json"
|
||
|
# OUTPUT_PATH="028_gmm_best_by_BIC_filtered_results.json" #0.75 of data, center 0.25 border 0.75
|
||
|
|
||
|
# python dbscan_v3.py \
|
||
|
# --embeddings_path "$EMBEDDINGS_PATH" \
|
||
|
# --clustering_results_path "$CLUSTERING_RESULTS_PATH" \
|
||
|
# --output_path "$OUTPUT_PATH" \
|
||
|
# --create_filepath_list \
|
||
|
# --selection_ratio 1.0 --center_ratio 0.25 --border_ratio 0.75
|
||
|
|
||
|
|
||
|
EMBEDDINGS_PATH="/home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json"
|
||
|
CLUSTERING_RESULTS_PATH="/home/nguyendc/sonnh/embedding-clustering/cluster/dbscan_results.json"
|
||
|
OUTPUT_PATH="029_dbscan_v3_filtered_results_temp_30.json" #0.75 of data, center 0.25 border 0.75
|
||
|
|
||
|
python dbscan_v3.py \
|
||
|
--embeddings_path "$EMBEDDINGS_PATH" \
|
||
|
--clustering_results_path "$CLUSTERING_RESULTS_PATH" \
|
||
|
--output_path "$OUTPUT_PATH" \
|
||
|
--create_filepath_list \
|
||
|
--selection_ratio 0.6 --center_ratio 0.5 --border_ratio 0.5
|