update source code and pipeline

This commit is contained in:
2025-09-04 14:39:02 +00:00
parent 9aabd991c5
commit 878310a551
82 changed files with 24373 additions and 0 deletions

View File

@@ -0,0 +1,670 @@
#!/usr/bin/env python3
"""
Simple script to run automatic clustering methods (DBSCAN, Mean Shift, Affinity Propagation)
These methods don't require specifying the number of clusters beforehand.
"""
import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN, MeanShift, AffinityPropagation
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA
import argparse
import warnings
warnings.filterwarnings('ignore')
class AutoClustering:
def __init__(self, embeddings_path):
self.embeddings_path = embeddings_path
self.embeddings = None
self.file_paths = None
self.load_embeddings()
def load_embeddings(self):
"""Load embeddings from JSON file"""
print(f"Loading embeddings from {self.embeddings_path}...")
with open(self.embeddings_path, 'r') as f:
data = json.load(f)
self.file_paths = []
embeddings_list = []
for item in data:
self.file_paths.append(item['filepath'])
embeddings_list.append(item['embedding'])
self.embeddings = np.array(embeddings_list, dtype=np.float32)
print(f"Loaded {len(self.file_paths)} samples with embedding dimension {self.embeddings.shape[1]}")
# Standardize embeddings for better clustering
self.scaler = StandardScaler()
self.embeddings_scaled = self.scaler.fit_transform(self.embeddings)
def run_dbscan(self):
"""Run DBSCAN with extensive grid search for parameter estimation"""
print("\n" + "="*50)
print("RUNNING DBSCAN CLUSTERING WITH EXTENSIVE GRID SEARCH")
print("="*50)
# Method 1: K-nearest neighbors approach with multiple k values
eps_candidates = []
# Try different k values for nearest neighbors
k_values = [5, 10, 15, 20, 25, 30]
for k in k_values:
k_actual = min(k, len(self.embeddings_scaled) // 4)
if k_actual < 3:
continue
neighbors = NearestNeighbors(n_neighbors=k_actual)
neighbors_fit = neighbors.fit(self.embeddings_scaled)
distances, indices = neighbors_fit.kneighbors(self.embeddings_scaled)
# Sort distances and use k-th nearest neighbor distance
distances = np.sort(distances, axis=0)
kth_distances = distances[:, k_actual-1]
# Multiple percentile thresholds for each k
percentiles = [60, 65, 70, 75, 80, 85, 90, 95]
for p in percentiles:
eps_candidates.append(np.percentile(kth_distances, p))
# Method 2: Statistical measures
# Mean and std of pairwise distances (sampled for efficiency)
sample_size = min(1000, len(self.embeddings_scaled))
sample_indices = np.random.choice(len(self.embeddings_scaled), sample_size, replace=False)
sample_data = self.embeddings_scaled[sample_indices]
from scipy.spatial.distance import pdist
pairwise_distances = pdist(sample_data)
# Add statistical measures as eps candidates
eps_candidates.extend([
np.mean(pairwise_distances) * 0.3,
np.mean(pairwise_distances) * 0.4,
np.mean(pairwise_distances) * 0.5,
np.mean(pairwise_distances) * 0.6,
np.mean(pairwise_distances) * 0.7,
np.median(pairwise_distances) * 0.3,
np.median(pairwise_distances) * 0.4,
np.median(pairwise_distances) * 0.5,
np.median(pairwise_distances) * 0.6,
np.std(pairwise_distances) * 0.5,
np.std(pairwise_distances) * 0.8,
np.std(pairwise_distances) * 1.0,
np.std(pairwise_distances) * 1.2
])
# Method 3: Manual eps values for different scales
manual_eps = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0,
1.2, 1.5, 1.8, 2.0, 2.5, 3.0, 3.5, 4.0, 5.0]
eps_candidates.extend(manual_eps)
# Remove duplicates and invalid values, then sort
eps_candidates = sorted(list(set([eps for eps in eps_candidates if eps > 0])))
# Extensive min_samples candidates
min_samples_candidates = [3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 18, 20, 25, 30, 35, 40, 50]
# Filter min_samples based on dataset size
# max_min_samples = len(self.embeddings_scaled) // 10 # At most 10% of data
# min_samples_candidates = [ms for ms in min_samples_candidates if ms <= max_min_samples]
min_samples_candidates = [60]
best_score = -1
best_params = None
best_labels = None
print(f"Testing {len(eps_candidates)} eps values x {len(min_samples_candidates)} min_samples values")
print(f"Total combinations: {len(eps_candidates) * len(min_samples_candidates)}")
print("This may take a while...\n")
# Track all results for analysis
all_results = []
total_combinations = len(eps_candidates) * len(min_samples_candidates)
current_combination = 0
for eps in eps_candidates:
for min_samples in min_samples_candidates:
current_combination += 1
# Progress indicator
if current_combination % 50 == 0 or current_combination == total_combinations:
progress = (current_combination / total_combinations) * 100
print(f"Progress: {current_combination}/{total_combinations} ({progress:.1f}%)")
try:
dbscan = DBSCAN(eps=eps, min_samples=min_samples)
labels = dbscan.fit_predict(self.embeddings_scaled)
# Check if we have meaningful clusters
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
n_noise = list(labels).count(-1)
noise_ratio = n_noise / len(labels)
# Store result for analysis
result_info = {
'eps': eps,
'min_samples': min_samples,
'n_clusters': n_clusters,
'n_noise': n_noise,
'noise_ratio': noise_ratio
}
# Check if we have meaningful clusters
if n_clusters >= 2 and noise_ratio < 0.9: # Not too many noise points
# Calculate silhouette score (excluding noise)
mask = labels != -1
if np.sum(mask) > 1:
try:
score = silhouette_score(self.embeddings_scaled[mask], labels[mask])
result_info['silhouette_score'] = score
# Print promising results
if score > 0.1: # Only show decent scores
print(f"eps={eps:.4f}, min_samples={min_samples}: {n_clusters} clusters, {n_noise} noise ({noise_ratio:.1%}), silhouette={score:.4f}")
if score > best_score:
best_score = score
best_params = (eps, min_samples)
best_labels = labels
except Exception:
result_info['silhouette_score'] = None
else:
result_info['silhouette_score'] = None
all_results.append(result_info)
except Exception as e:
# Skip problematic parameter combinations
continue
# Analysis of results
print("\n" + "="*50)
print("DBSCAN GRID SEARCH ANALYSIS")
print("="*50)
if all_results:
# Convert to numpy for easier analysis
import pandas as pd
df_results = pd.DataFrame(all_results)
print(f"Total parameter combinations tested: {len(df_results)}")
# Valid results (with clusters)
valid_results = df_results[df_results['n_clusters'] >= 2]
print(f"Combinations that produced clusters: {len(valid_results)}")
if len(valid_results) > 0:
# Best silhouette scores
scored_results = valid_results.dropna(subset=['silhouette_score'])
if len(scored_results) > 0:
print(f"Combinations with valid silhouette scores: {len(scored_results)}")
print(f"Best silhouette score: {scored_results['silhouette_score'].max():.4f}")
print(f"Mean silhouette score: {scored_results['silhouette_score'].mean():.4f}")
# Top 5 results
top_results = scored_results.nlargest(5, 'silhouette_score')
print("\nTop 5 parameter combinations:")
for idx, row in top_results.iterrows():
print(f" eps={row['eps']:.4f}, min_samples={row['min_samples']}: "
f"{row['n_clusters']} clusters, silhouette={row['silhouette_score']:.4f}")
# Cluster count distribution
cluster_counts = valid_results['n_clusters'].value_counts().sort_index()
print(f"\nCluster count distribution:")
for n_clusters, count in cluster_counts.items():
print(f" {n_clusters} clusters: {count} parameter combinations")
print(f"\n📁 SAVING DETAILED RESULTS...")
print("="*30)
# Save detailed grid search results to JSON file
self.save_dbscan_grid_search_results(all_results, best_params, best_score)
if best_labels is not None:
n_clusters = len(set(best_labels)) - (1 if -1 in best_labels else 0)
n_noise = list(best_labels).count(-1)
print(f"\nBest DBSCAN result:")
print(f"Parameters: eps={best_params[0]:.4f}, min_samples={best_params[1]}")
print(f"Number of clusters: {n_clusters}")
print(f"Number of noise points: {n_noise} ({n_noise/len(best_labels)*100:.1f}%)")
print(f"Silhouette score: {best_score:.4f}")
return best_labels
else:
print("DBSCAN could not find suitable clusters with the extensive grid search")
print("Consider:")
print("- Adjusting the embedding space (different model or preprocessing)")
print("- Using different clustering algorithms")
print("- Manual parameter tuning based on domain knowledge")
return None
def save_dbscan_grid_search_results(self, all_results, best_params, best_score):
"""Save detailed DBSCAN grid search results to JSON file"""
import datetime
# Prepare comprehensive results data
grid_search_data = {
"experiment_info": {
"timestamp": datetime.datetime.now().isoformat(),
"dataset_path": self.embeddings_path,
"total_samples": len(self.file_paths),
"embedding_dimension": self.embeddings.shape[1],
"total_combinations_tested": len(all_results)
},
"best_result": {
"eps": best_params[0] if best_params else None,
"min_samples": best_params[1] if best_params else None,
"silhouette_score": best_score if best_score > -1 else None
},
"all_trials": []
}
# Add all trial results
for i, result in enumerate(all_results):
trial_data = {
"trial_id": i + 1,
"parameters": {
"eps": result['eps'],
"min_samples": result['min_samples']
},
"results": {
"n_clusters": result['n_clusters'],
"n_noise": result['n_noise'],
"noise_ratio": result['noise_ratio'],
"silhouette_score": result['silhouette_score']
},
"status": "success" if result['silhouette_score'] is not None else "failed"
}
grid_search_data["all_trials"].append(trial_data)
# Calculate summary statistics
valid_trials = [t for t in grid_search_data["all_trials"] if t["status"] == "success"]
if valid_trials:
silhouette_scores = [t["results"]["silhouette_score"] for t in valid_trials if t["results"]["silhouette_score"] is not None]
if silhouette_scores:
grid_search_data["summary_statistics"] = {
"total_trials": len(all_results),
"successful_trials": len(valid_trials),
"success_rate": len(valid_trials) / len(all_results),
"best_silhouette_score": max(silhouette_scores),
"worst_silhouette_score": min(silhouette_scores),
"mean_silhouette_score": sum(silhouette_scores) / len(silhouette_scores),
"median_silhouette_score": sorted(silhouette_scores)[len(silhouette_scores)//2]
}
# Top 10 results
sorted_valid_trials = sorted(valid_trials,
key=lambda x: x["results"]["silhouette_score"] if x["results"]["silhouette_score"] is not None else -1,
reverse=True)
grid_search_data["top_10_results"] = sorted_valid_trials[:10]
# Parameter analysis
eps_values = [t["parameters"]["eps"] for t in valid_trials]
min_samples_values = [t["parameters"]["min_samples"] for t in valid_trials]
grid_search_data["parameter_analysis"] = {
"eps_range": {
"min": min(eps_values),
"max": max(eps_values),
"mean": sum(eps_values) / len(eps_values)
},
"min_samples_range": {
"min": min(min_samples_values),
"max": max(min_samples_values),
"mean": sum(min_samples_values) / len(min_samples_values)
}
}
# Save to file with timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"dbscan_grid_search_detailed_{timestamp}.json"
with open(filename, 'w') as f:
json.dump(grid_search_data, f, indent=4, ensure_ascii=False)
print(f"Detailed grid search results saved to: {filename}")
# Also save a CSV summary for easy analysis
csv_filename = f"dbscan_grid_search_summary_{timestamp}.csv"
self.save_grid_search_csv(all_results, csv_filename)
print(f"Grid search summary CSV saved to: {csv_filename}")
def save_grid_search_csv(self, all_results, filename):
"""Save grid search results as CSV for easy analysis"""
import csv
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['trial_id', 'eps', 'min_samples', 'n_clusters', 'n_noise',
'noise_ratio', 'silhouette_score', 'status']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for i, result in enumerate(all_results):
writer.writerow({
'trial_id': i + 1,
'eps': result['eps'],
'min_samples': result['min_samples'],
'n_clusters': result['n_clusters'],
'n_noise': result['n_noise'],
'noise_ratio': result['noise_ratio'],
'silhouette_score': result['silhouette_score'],
'status': 'success' if result['silhouette_score'] is not None else 'failed'
})
def run_mean_shift(self):
"""Run Mean Shift clustering"""
print("\n" + "="*50)
print("RUNNING MEAN SHIFT CLUSTERING")
print("="*50)
# Estimate bandwidth using different percentiles
from sklearn.cluster import estimate_bandwidth
# Try different bandwidth estimation methods
bandwidth_candidates = []
# Method 1: sklearn's estimate_bandwidth
try:
bw_est = estimate_bandwidth(self.embeddings_scaled, quantile=0.3, n_samples=min(500, len(self.embeddings_scaled)))
if bw_est > 0:
bandwidth_candidates.extend([bw_est * 0.5, bw_est, bw_est * 1.5])
except:
pass
# Method 2: nearest neighbor distances
neighbors = NearestNeighbors(n_neighbors=10)
neighbors_fit = neighbors.fit(self.embeddings_scaled)
distances, _ = neighbors_fit.kneighbors(self.embeddings_scaled)
mean_dist = np.mean(distances[:, 1:]) # Exclude self-distance
bandwidth_candidates.extend([mean_dist * 0.5, mean_dist, mean_dist * 1.5])
# Remove duplicates and invalid values
bandwidth_candidates = list(set([bw for bw in bandwidth_candidates if bw > 0]))
if not bandwidth_candidates:
bandwidth_candidates = [0.5, 1.0, 1.5, 2.0]
best_score = -1
best_bandwidth = None
best_labels = None
print("Testing different bandwidth values...")
for bandwidth in bandwidth_candidates:
try:
mean_shift = MeanShift(bandwidth=bandwidth)
labels = mean_shift.fit_predict(self.embeddings_scaled)
n_clusters = len(set(labels))
if 2 <= n_clusters <= len(self.embeddings_scaled) // 3:
score = silhouette_score(self.embeddings_scaled, labels)
print(f"bandwidth={bandwidth:.4f}: {n_clusters} clusters, silhouette={score:.4f}")
if score > best_score:
best_score = score
best_bandwidth = bandwidth
best_labels = labels
except Exception as e:
print(f"bandwidth={bandwidth:.4f}: failed ({str(e)[:50]}...)")
continue
if best_labels is not None:
n_clusters = len(set(best_labels))
print(f"\nBest Mean Shift result:")
print(f"Bandwidth: {best_bandwidth:.4f}")
print(f"Number of clusters: {n_clusters}")
print(f"Silhouette score: {best_score:.4f}")
return best_labels
else:
print("Mean Shift could not find suitable clusters")
return None
def run_affinity_propagation(self):
"""Run Affinity Propagation clustering"""
print("\n" + "="*50)
print("RUNNING AFFINITY PROPAGATION CLUSTERING")
print("="*50)
# Calculate similarity matrix
similarities = -np.sum((self.embeddings_scaled[:, np.newaxis] - self.embeddings_scaled)**2, axis=2)
# Try different preference values (percentiles of similarity matrix diagonal)
preference_candidates = [
np.percentile(similarities.diagonal(), 10),
np.percentile(similarities.diagonal(), 25),
np.percentile(similarities.diagonal(), 50),
np.median(similarities),
np.percentile(similarities.diagonal(), 75)
]
damping_candidates = [0.5, 0.7, 0.8, 0.9]
best_score = -1
best_params = None
best_labels = None
print("Testing different parameter combinations...")
for preference in preference_candidates:
for damping in damping_candidates:
try:
affinity_prop = AffinityPropagation(
preference=preference,
damping=damping,
random_state=42,
max_iter=200
)
labels = affinity_prop.fit_predict(self.embeddings_scaled)
n_clusters = len(set(labels))
if 2 <= n_clusters <= len(self.embeddings_scaled) // 3:
score = silhouette_score(self.embeddings_scaled, labels)
print(f"preference={preference:.2f}, damping={damping:.1f}: {n_clusters} clusters, silhouette={score:.4f}")
if score > best_score:
best_score = score
best_params = (preference, damping)
best_labels = labels
except Exception as e:
print(f"preference={preference:.2f}, damping={damping:.1f}: failed ({str(e)[:30]}...)")
continue
if best_labels is not None:
n_clusters = len(set(best_labels))
print(f"\nBest Affinity Propagation result:")
print(f"Parameters: preference={best_params[0]:.2f}, damping={best_params[1]:.1f}")
print(f"Number of clusters: {n_clusters}")
print(f"Silhouette score: {best_score:.4f}")
return best_labels
else:
print("Affinity Propagation could not find suitable clusters")
return None
def visualize_results(self, results_dict):
"""Visualize clustering results using PCA"""
if not results_dict:
print("No results to visualize")
return
# Reduce dimensions for visualization
pca = PCA(n_components=2, random_state=42)
embeddings_2d = pca.fit_transform(self.embeddings_scaled)
n_methods = len(results_dict)
fig, axes = plt.subplots(1, n_methods, figsize=(5*n_methods, 4))
if n_methods == 1:
axes = [axes]
for idx, (method_name, labels) in enumerate(results_dict.items()):
# Handle noise points in DBSCAN (label -1)
unique_labels = set(labels)
colors = plt.cm.Set1(np.linspace(0, 1, len(unique_labels)))
for label, color in zip(unique_labels, colors):
if label == -1:
# Noise points in black
mask = labels == label
axes[idx].scatter(embeddings_2d[mask, 0], embeddings_2d[mask, 1],
c='black', marker='x', s=20, alpha=0.5, label='Noise')
else:
mask = labels == label
axes[idx].scatter(embeddings_2d[mask, 0], embeddings_2d[mask, 1],
c=[color], s=50, alpha=0.7, label=f'Cluster {label}')
axes[idx].set_title(f'{method_name}\n({len(set(labels)) - (1 if -1 in labels else 0)} clusters)')
axes[idx].set_xlabel('PCA Component 1')
axes[idx].set_ylabel('PCA Component 2')
axes[idx].grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('auto_clustering_results.png', dpi=300, bbox_inches='tight')
plt.show()
print(f"\nVisualization saved as 'auto_clustering_results.png'")
def save_results(self, results_dict):
"""Save clustering results to JSON files"""
print(results_dict.items())
check_method_name = []
print(len(results_dict))
for method_name, labels in results_dict.items():
check_method_name.append(method_name)
# Create results for each method
method_results = []
print(method_name == 'DBSCAN')
for filepath, label in zip(self.file_paths, labels):
if method_name == 'DBSCAN':
if label == -1:
is_noise = True
else:
is_noise = False
else:
is_noise = False
method_results.append({
"filepath": filepath,
"cluster": int(label),
"is_noise": is_noise
})
print('method_name', set(check_method_name))
print(method_results[0]['is_noise'])
print(method_results[0])
# Save to file
filename = f"{method_name.lower().replace(' ', '_')}_results.json"
with open(filename, 'w') as f:
json.dump({
"method": method_name,
"n_clusters": len(set(labels)) - (1 if -1 in labels else 0),
"n_samples": len(labels),
"results": method_results
}, f, indent=4)
print(f"Results saved to {filename}")
def run_all_methods(self):
"""Run all automatic clustering methods"""
print("\n" + "="*70)
print("AUTOMATIC CLUSTERING ANALYSIS")
print("="*70)
print(f"Dataset: {len(self.file_paths)} documents")
print(f"Embedding dimension: {self.embeddings.shape[1]}")
results = {}
# Run DBSCAN
dbscan_labels = self.run_dbscan()
if dbscan_labels is not None:
results["DBSCAN"] = dbscan_labels
# Run Mean Shift
# meanshift_labels = self.run_mean_shift()
# if meanshift_labels is not None:
# results["Mean Shift"] = meanshift_labels
# Run Affinity Propagation
# affinity_labels = self.run_affinity_propagation()
# if affinity_labels is not None:
# results["Affinity Propagation"] = affinity_labels
# Summary
if results:
print("\n" + "="*70)
print("SUMMARY OF RESULTS")
print("="*70)
for method, labels in results.items():
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
if method == "DBSCAN":
n_noise = list(labels).count(-1)
print(f"{method}: {n_clusters} clusters, {n_noise} noise points")
else:
print(f"{method}: {n_clusters} clusters")
# Calculate agreement between methods if multiple succeeded
if len(results) > 1:
from sklearn.metrics import adjusted_rand_score
print("\nMethod Agreement (Adjusted Rand Index):")
method_names = list(results.keys())
for i in range(len(method_names)):
for j in range(i+1, len(method_names)):
ari = adjusted_rand_score(results[method_names[i]], results[method_names[j]])
print(f"{method_names[i]} vs {method_names[j]}: {ari:.4f}")
# Visualize and save results
self.visualize_results(results)
self.save_results(results)
else:
print("\nNo automatic clustering method found suitable clusters.")
print("This might indicate:")
print("- Data doesn't have clear cluster structure")
print("- Embeddings need different preprocessing")
print("- Different parameter ranges needed")
return results
def main():
parser = argparse.ArgumentParser(description="Run automatic clustering methods on document embeddings")
parser.add_argument("--embeddings_path", help="Path to embeddings JSON file")
parser.add_argument("--method", choices=['dbscan', 'meanshift', 'affinity', 'all'], default='all',
help="Which automatic method to run")
args = parser.parse_args()
# Initialize clustering
clustering = AutoClustering(args.embeddings_path)
# Run selected method(s)
if args.method == 'all':
clustering.run_all_methods()
elif args.method == 'dbscan':
labels = clustering.run_dbscan()
if labels is not None:
clustering.visualize_results({"DBSCAN": labels})
clustering.save_results({"DBSCAN": labels})
elif args.method == 'meanshift':
labels = clustering.run_mean_shift()
if labels is not None:
clustering.visualize_results({"Mean Shift": labels})
clustering.save_results({"Mean Shift": labels})
elif args.method == 'affinity':
labels = clustering.run_affinity_propagation()
if labels is not None:
clustering.visualize_results({"Affinity Propagation": labels})
clustering.save_results({"Affinity Propagation": labels})
if __name__ == "__main__":
main()

711
cluster/auto_cluster.py Normal file
View File

@@ -0,0 +1,711 @@
#!/usr/bin/env python3
"""
Simple script to run automatic clustering methods (DBSCAN, Mean Shift, Affinity Propagation)
These methods don't require specifying the number of clusters beforehand.
"""
import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn.cluster import DBSCAN, MeanShift, AffinityPropagation
from sklearn.preprocessing import normalize
from sklearn.metrics import silhouette_score
from sklearn.neighbors import NearestNeighbors
from sklearn.decomposition import PCA
import argparse
import warnings
warnings.filterwarnings('ignore')
from sklearn.metrics.pairwise import cosine_similarity
from datetime import datetime
import numpy as np
def value_counts(a, dropna=False):
a_flat = a.ravel()
if dropna and np.issubdtype(a.dtype, np.floating):
mask = ~np.isnan(a_flat)
a_flat = a_flat[mask]
uniq, counts = np.unique(a_flat, return_counts=True)
uniq = list(uniq)
counts = list(counts)
return dict(zip(uniq, counts))
class AutoClustering:
def __init__(self, embeddings_path):
self.embeddings_path = embeddings_path
self.embeddings = None
self.file_paths = None
self.load_embeddings()
def load_embeddings(self):
"""Load embeddings from JSON file"""
print(f"Loading embeddings from {self.embeddings_path}...")
with open(self.embeddings_path, 'r') as f:
data = json.load(f)
self.file_paths = []
embeddings_list = []
for item in data:
self.file_paths.append(item['filepath'])
embeddings_list.append(item['embedding'])
self.embeddings = np.array(embeddings_list, dtype=np.float32)
print(f"Loaded {len(self.file_paths)} samples with embedding dimension {self.embeddings.shape[1]}")
# Normalize embeddings using L2 normalization for cosine distance
self.embeddings_normalized = normalize(self.embeddings, norm='l2', axis=1)
print("Applied L2 normalization to embeddings")
sims = cosine_similarity(self.embeddings)
print(self.embeddings.shape)
# lấy upper triangle exclude diagonal để inspect
triu_idxs = np.triu_indices_from(sims, k=1)
dist_vals = sims[triu_idxs]
print(dist_vals.shape)
print("mean sim:", dist_vals.mean(), "std:", dist_vals.std())
def run_dbscan(self):
"""Run DBSCAN with extensive grid search for parameter estimation"""
print("\n" + "="*50)
print("RUNNING DBSCAN CLUSTERING WITH EXTENSIVE GRID SEARCH")
print("="*50)
# Method 1: K-nearest neighbors approach with multiple k values
# eps_candidates = []
# # Try different k values for nearest neighbors with cosine metric
# k_values = [5, 10, 15, 20, 25, 30]
# for k in k_values:
# k_actual = min(k, len(self.embeddings_normalized) // 4)
# if k_actual < 3:
# continue
# neighbors = NearestNeighbors(n_neighbors=k_actual, metric='cosine')
# neighbors_fit = neighbors.fit(self.embeddings_normalized)
# distances, indices = neighbors_fit.kneighbors(self.embeddings_normalized)
# # Sort distances and use k-th nearest neighbor distance
# distances = np.sort(distances, axis=0)
# kth_distances = distances[:, k_actual-1]
# # Multiple percentile thresholds for each k
# percentiles = [60, 65, 70, 75, 80, 85, 90, 95]
# for p in percentiles:
# eps_candidates.append(np.percentile(kth_distances, p))
# # Method 2: Statistical measures using cosine distances
# # Calculate cosine distances for a sample of data points
# sample_size = min(1000, len(self.embeddings_normalized))
# sample_indices = np.random.choice(len(self.embeddings_normalized), sample_size, replace=False)
# sample_data = self.embeddings_normalized[sample_indices]
# from scipy.spatial.distance import pdist
# cosine_distances = pdist(sample_data, metric='cosine')
# # Add statistical measures as eps candidates using cosine distances
# eps_candidates.extend([
# np.mean(cosine_distances) * 0.3,
# np.mean(cosine_distances) * 0.4,
# np.mean(cosine_distances) * 0.5,
# np.mean(cosine_distances) * 0.6,
# np.mean(cosine_distances) * 0.7,
# np.median(cosine_distances) * 0.3,
# np.median(cosine_distances) * 0.4,
# np.median(cosine_distances) * 0.5,
# np.median(cosine_distances) * 0.6,
# np.std(cosine_distances) * 0.5,
# np.std(cosine_distances) * 0.8,
# np.std(cosine_distances) * 1.0,
# np.std(cosine_distances) * 1.2
# ])
# Method 3: Manual eps values for cosine distances (0-2 range)
manual_eps = [0.001, 0.002, 0.005, 0.01, 0.02, 0.03, 0.04, 0.05, 0.1, 0.15, 0.2, 0.25, 0.3, 0.35, 0.4, 0.45, 0.5,
0.6, 0.7, 0.8, 0.9, 1.0]
# eps_candidates.extend(manual_eps)
# eps_candidates = manual_eps
eps_candidates = [0.2]
# Remove duplicates and invalid values, then sort
eps_candidates = sorted(list(set([eps for eps in eps_candidates if eps > 0])))
# Extensive min_samples candidates
# min_samples_candidates = [3, 4, 5, 6, 7, 8, 9, 10, 12, 15, 18, 20, 25, 30, 35, 40, 50]
# Filter min_samples based on dataset size
# max_min_samples = len(self.embeddings_normalized) // 10 # At most 10% of data
# min_samples_candidates = [ms for ms in min_samples_candidates if ms <= max_min_samples]
min_samples_candidates = [50]
best_score = -1
best_params = None
best_labels = None
print(f"Testing {len(eps_candidates)} eps values x {len(min_samples_candidates)} min_samples values")
print(f"Total combinations: {len(eps_candidates) * len(min_samples_candidates)}")
print("This may take a while...\n")
# Track all results for analysis
all_results = []
total_combinations = len(eps_candidates) * len(min_samples_candidates)
current_combination = 0
for eps in eps_candidates:
for min_samples in min_samples_candidates:
current_combination += 1
# Progress indicator
if current_combination % 50 == 0 or current_combination == total_combinations:
progress = (current_combination / total_combinations) * 100
print(f"Progress: {current_combination}/{total_combinations} ({progress:.1f}%)")
try:
dbscan = DBSCAN(eps=eps, min_samples=min_samples, metric='cosine')
labels = dbscan.fit_predict(self.embeddings_normalized)
# Check if we have meaningful clusters
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
n_noise = list(labels).count(-1)
noise_ratio = n_noise / len(labels)
# Store result for analysis
result_info = {
'eps': eps,
'min_samples': min_samples,
'n_clusters': n_clusters,
'n_noise': n_noise,
'noise_ratio': noise_ratio
}
# Check if we have meaningful clusters
print(n_clusters, n_noise, noise_ratio, eps, min_samples)
if n_clusters >= 2 and noise_ratio < 0.9: # Not too many noise points
# Calculate silhouette score (excluding noise) using cosine metric
mask = labels != -1
if np.sum(mask) > 1:
try:
score = silhouette_score(self.embeddings_normalized[mask], labels[mask], metric='cosine')
result_info['silhouette_score'] = score
# Print promising results
if score > 0.1: # Only show decent scores
print(f"eps={eps:.4f}, min_samples={min_samples}: {n_clusters} clusters, {n_noise} noise ({noise_ratio:.1%}), silhouette={score:.4f}")
print(value_counts(labels))
if score > best_score:
best_score = score
best_params = (eps, min_samples)
best_labels = labels
except Exception:
result_info['silhouette_score'] = None
else:
result_info['silhouette_score'] = None
all_results.append(result_info)
except Exception as e:
import traceback
traceback.print_exc()
# Skip problematic parameter combinations
continue
# Analysis of results
print("\n" + "="*50)
print("DBSCAN GRID SEARCH ANALYSIS")
print("="*50)
if all_results:
# Convert to numpy for easier analysis
import pandas as pd
df_results = pd.DataFrame(all_results)
print(f"Total parameter combinations tested: {len(df_results)}")
# Valid results (with clusters)
valid_results = df_results[df_results['n_clusters'] >= 2]
print(f"Combinations that produced clusters: {len(valid_results)}")
if len(valid_results) > 0:
# Best silhouette scores
scored_results = valid_results.dropna(subset=['silhouette_score'])
if len(scored_results) > 0:
print(f"Combinations with valid silhouette scores: {len(scored_results)}")
print(f"Best silhouette score: {scored_results['silhouette_score'].max():.4f}")
print(f"Mean silhouette score: {scored_results['silhouette_score'].mean():.4f}")
# Top 5 results
top_results = scored_results.nlargest(5, 'silhouette_score')
print("\nTop 5 parameter combinations:")
for idx, row in top_results.iterrows():
print(f" eps={row['eps']:.4f}, min_samples={row['min_samples']}: "
f"{row['n_clusters']} clusters, silhouette={row['silhouette_score']:.4f}")
# Cluster count distribution
cluster_counts = valid_results['n_clusters'].value_counts().sort_index()
print("\nCluster count distribution:")
for n_clusters, count in cluster_counts.items():
print(f" {n_clusters} clusters: {count} parameter combinations")
print("\n📁 SAVING DETAILED RESULTS...")
print("="*30)
# Save detailed grid search results to JSON file
self.save_dbscan_grid_search_results(all_results, best_params, best_score)
if best_labels is not None:
n_clusters = len(set(best_labels)) - (1 if -1 in best_labels else 0)
n_noise = list(best_labels).count(-1)
print("\nBest DBSCAN result:")
print(f"Parameters: eps={best_params[0]:.4f}, min_samples={best_params[1]}")
print(f"Number of clusters: {n_clusters}")
print(f"Number of noise points: {n_noise} ({n_noise/len(best_labels)*100:.1f}%)")
print(f"Silhouette score: {best_score:.4f}")
return best_labels
else:
print("DBSCAN could not find suitable clusters with the extensive grid search")
print("Consider:")
print("- Adjusting the embedding space (different model or preprocessing)")
print("- Using different clustering algorithms")
print("- Manual parameter tuning based on domain knowledge")
return None
def save_dbscan_grid_search_results(self, all_results, best_params, best_score):
"""Save detailed DBSCAN grid search results to JSON file"""
import datetime
# Prepare comprehensive results data
grid_search_data = {
"experiment_info": {
"timestamp": datetime.datetime.now().isoformat(),
"dataset_path": self.embeddings_path,
"total_samples": len(self.file_paths),
"embedding_dimension": self.embeddings.shape[1],
"total_combinations_tested": len(all_results)
},
"best_result": {
"eps": best_params[0] if best_params else None,
"min_samples": best_params[1] if best_params else None,
"silhouette_score": best_score if best_score > -1 else None
},
"all_trials": []
}
# Add all trial results
for i, result in enumerate(all_results):
trial_data = {
"trial_id": i + 1,
"parameters": {
"eps": result['eps'],
"min_samples": result['min_samples']
},
"results": {
"n_clusters": result['n_clusters'],
"n_noise": result['n_noise'],
"noise_ratio": result['noise_ratio'],
"silhouette_score": result['silhouette_score']
},
"status": "success" if result['silhouette_score'] is not None else "failed"
}
grid_search_data["all_trials"].append(trial_data)
# Calculate summary statistics
valid_trials = [t for t in grid_search_data["all_trials"] if t["status"] == "success"]
if valid_trials:
silhouette_scores = [t["results"]["silhouette_score"] for t in valid_trials if t["results"]["silhouette_score"] is not None]
if silhouette_scores:
grid_search_data["summary_statistics"] = {
"total_trials": len(all_results),
"successful_trials": len(valid_trials),
"success_rate": len(valid_trials) / len(all_results),
"best_silhouette_score": max(silhouette_scores),
"worst_silhouette_score": min(silhouette_scores),
"mean_silhouette_score": sum(silhouette_scores) / len(silhouette_scores),
"median_silhouette_score": sorted(silhouette_scores)[len(silhouette_scores)//2]
}
# Top 10 results
sorted_valid_trials = sorted(valid_trials,
key=lambda x: x["results"]["silhouette_score"] if x["results"]["silhouette_score"] is not None else -1,
reverse=True)
grid_search_data["top_10_results"] = sorted_valid_trials[:10]
# Parameter analysis
eps_values = [t["parameters"]["eps"] for t in valid_trials]
min_samples_values = [t["parameters"]["min_samples"] for t in valid_trials]
grid_search_data["parameter_analysis"] = {
"eps_range": {
"min": min(eps_values),
"max": max(eps_values),
"mean": sum(eps_values) / len(eps_values)
},
"min_samples_range": {
"min": min(min_samples_values),
"max": max(min_samples_values),
"mean": sum(min_samples_values) / len(min_samples_values)
}
}
# Save to file with timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
# filename = f"dbscan_grid_search_detailed_{timestamp}.json"
filename = "dbscan_grid_search_detailed.json"
print(grid_search_data.keys())
print(type(grid_search_data['parameter_analysis']))
with open(filename, 'w') as f:
json.dump(grid_search_data, f, indent=4, ensure_ascii=False)
print(f"Detailed grid search results saved to: {filename}")
# Also save a CSV summary for easy analysis
# csv_filename = f"dbscan_grid_search_summary_{timestamp}.csv"
csv_filename = "dbscan_grid_search_summary.csv"
self.save_grid_search_csv(all_results, csv_filename)
print(f"Grid search summary CSV saved to: {csv_filename}")
def save_grid_search_csv(self, all_results, filename):
"""Save grid search results as CSV for easy analysis"""
import csv
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['trial_id', 'eps', 'min_samples', 'n_clusters', 'n_noise',
'noise_ratio', 'silhouette_score', 'status']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for i, result in enumerate(all_results):
writer.writerow({
'trial_id': i + 1,
'eps': result['eps'],
'min_samples': result['min_samples'],
'n_clusters': result['n_clusters'],
'n_noise': result['n_noise'],
'noise_ratio': result['noise_ratio'],
'silhouette_score': result['silhouette_score'],
'status': 'success' if result['silhouette_score'] is not None else 'failed'
})
def run_mean_shift(self):
"""Run Mean Shift clustering"""
print("\n" + "="*50)
print("RUNNING MEAN SHIFT CLUSTERING")
print("="*50)
# Estimate bandwidth using different percentiles with cosine metric
from sklearn.cluster import estimate_bandwidth
# Try different bandwidth estimation methods
bandwidth_candidates = []
# Method 1: sklearn's estimate_bandwidth (note: estimate_bandwidth doesn't support cosine directly)
try:
bw_est = estimate_bandwidth(self.embeddings_normalized, quantile=0.3, n_samples=min(500, len(self.embeddings_normalized)))
if bw_est > 0:
bandwidth_candidates.extend([bw_est * 0.5, bw_est, bw_est * 1.5])
except Exception:
pass
# Method 2: nearest neighbor cosine distances
neighbors = NearestNeighbors(n_neighbors=10, metric='cosine')
neighbors_fit = neighbors.fit(self.embeddings_normalized)
distances, _ = neighbors_fit.kneighbors(self.embeddings_normalized)
mean_dist = np.mean(distances[:, 1:]) # Exclude self-distance
bandwidth_candidates.extend([mean_dist * 0.5, mean_dist, mean_dist * 1.5])
# Remove duplicates and invalid values
bandwidth_candidates = list(set([bw for bw in bandwidth_candidates if bw > 0]))
if not bandwidth_candidates:
bandwidth_candidates = [0.5, 1.0, 1.5, 2.0]
best_score = -1
best_bandwidth = None
best_labels = None
print("Testing different bandwidth values...")
for bandwidth in bandwidth_candidates:
try:
mean_shift = MeanShift(bandwidth=bandwidth)
labels = mean_shift.fit_predict(self.embeddings_normalized)
n_clusters = len(set(labels))
if 2 <= n_clusters <= len(self.embeddings_normalized) // 3:
score = silhouette_score(self.embeddings_normalized, labels, metric='cosine')
print(f"bandwidth={bandwidth:.4f}: {n_clusters} clusters, silhouette={score:.4f}")
if score > best_score:
best_score = score
best_bandwidth = bandwidth
best_labels = labels
except Exception as e:
print(f"bandwidth={bandwidth:.4f}: failed ({str(e)[:50]}...)")
continue
if best_labels is not None:
n_clusters = len(set(best_labels))
print("\nBest Mean Shift result:")
print(f"Bandwidth: {best_bandwidth:.4f}")
print(f"Number of clusters: {n_clusters}")
print(f"Silhouette score: {best_score:.4f}")
return best_labels
else:
print("Mean Shift could not find suitable clusters")
return None
def run_affinity_propagation(self):
"""Run Affinity Propagation clustering"""
print("\n" + "="*50)
print("RUNNING AFFINITY PROPAGATION CLUSTERING")
print("="*50)
# Calculate similarity matrix using cosine similarity
# Convert cosine distance to cosine similarity: similarity = 1 - distance
from sklearn.metrics.pairwise import cosine_similarity
similarities = cosine_similarity(self.embeddings_normalized)
# Try different preference values (percentiles of similarity matrix diagonal)
preference_candidates = [
np.percentile(similarities.diagonal(), 10),
np.percentile(similarities.diagonal(), 25),
np.percentile(similarities.diagonal(), 50),
np.median(similarities),
np.percentile(similarities.diagonal(), 75)
]
damping_candidates = [0.5, 0.7, 0.8, 0.9]
best_score = -1
best_params = None
best_labels = None
print("Testing different parameter combinations...")
for preference in preference_candidates:
for damping in damping_candidates:
try:
affinity_prop = AffinityPropagation(
preference=preference,
damping=damping,
random_state=42,
max_iter=200
)
labels = affinity_prop.fit_predict(self.embeddings_normalized)
n_clusters = len(set(labels))
if 2 <= n_clusters <= len(self.embeddings_normalized) // 3:
score = silhouette_score(self.embeddings_normalized, labels, metric='cosine')
print(f"preference={preference:.2f}, damping={damping:.1f}: {n_clusters} clusters, silhouette={score:.4f}")
if score > best_score:
best_score = score
best_params = (preference, damping)
best_labels = labels
except Exception as e:
print(f"preference={preference:.2f}, damping={damping:.1f}: failed ({str(e)[:30]}...)")
continue
if best_labels is not None:
n_clusters = len(set(best_labels))
print("\nBest Affinity Propagation result:")
print(f"Parameters: preference={best_params[0]:.2f}, damping={best_params[1]:.1f}")
print(f"Number of clusters: {n_clusters}")
print(f"Silhouette score: {best_score:.4f}")
return best_labels
else:
print("Affinity Propagation could not find suitable clusters")
return None
def visualize_results(self, results_dict):
"""Visualize clustering results using PCA"""
if not results_dict:
print("No results to visualize")
return
# Reduce dimensions for visualization
pca = PCA(n_components=2, random_state=42)
embeddings_2d = pca.fit_transform(self.embeddings_normalized)
n_methods = len(results_dict)
fig, axes = plt.subplots(1, n_methods, figsize=(5*n_methods, 4))
if n_methods == 1:
axes = [axes]
for idx, (method_name, labels) in enumerate(results_dict.items()):
# Handle noise points in DBSCAN (label -1)
unique_labels = set(labels)
colors = plt.cm.Set1(np.linspace(0, 1, len(unique_labels)))
for label, color in zip(unique_labels, colors):
if label == -1:
# Noise points in black
mask = labels == label
axes[idx].scatter(embeddings_2d[mask, 0], embeddings_2d[mask, 1],
c='black', marker='x', s=20, alpha=0.5, label='Noise')
else:
mask = labels == label
axes[idx].scatter(embeddings_2d[mask, 0], embeddings_2d[mask, 1],
c=[color], s=50, alpha=0.7, label=f'Cluster {label}')
axes[idx].set_title(f'{method_name}\n({len(set(labels)) - (1 if -1 in labels else 0)} clusters)')
axes[idx].set_xlabel('PCA Component 1')
axes[idx].set_ylabel('PCA Component 2')
axes[idx].grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('auto_clustering_results.png', dpi=300, bbox_inches='tight')
plt.show()
print("\nVisualization saved as 'auto_clustering_results.png'")
def save_results(self, results_dict):
"""Save clustering results to JSON files"""
print(results_dict.items())
check_method_name = []
print(len(results_dict))
for method_name, labels in results_dict.items():
check_method_name.append(method_name)
# Create results for each method
method_results = []
print(method_name == 'DBSCAN')
for filepath, label in zip(self.file_paths, labels):
if method_name == 'DBSCAN':
if label == -1:
is_noise = True
else:
is_noise = False
else:
is_noise = False
method_results.append({
"filepath": filepath,
"cluster": int(label),
"is_noise": is_noise
})
print('method_name', set(check_method_name))
print(method_results[0]['is_noise'])
print(method_results[0])
# Save to file
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"{method_name.lower().replace(' ', '_')}_results_{timestamp}.json"
with open(filename, 'w') as f:
json.dump({
"method": method_name,
"n_clusters": len(set(labels)) - (1 if -1 in labels else 0),
"n_samples": len(labels),
"results": method_results
}, f, indent=4)
print(f"Results saved to {filename}")
def run_all_methods(self):
"""Run all automatic clustering methods"""
print("\n" + "="*70)
print("AUTOMATIC CLUSTERING ANALYSIS")
print("="*70)
print(f"Dataset: {len(self.file_paths)} documents")
print(f"Embedding dimension: {self.embeddings.shape[1]}")
results = {}
# Run DBSCAN
dbscan_labels = self.run_dbscan()
if dbscan_labels is not None:
results["DBSCAN"] = dbscan_labels
# Run Mean Shift
# meanshift_labels = self.run_mean_shift()
# if meanshift_labels is not None:
# results["Mean Shift"] = meanshift_labels
# Run Affinity Propagation
# affinity_labels = self.run_affinity_propagation()
# if affinity_labels is not None:
# results["Affinity Propagation"] = affinity_labels
# Summary
if results:
print("\n" + "="*70)
print("SUMMARY OF RESULTS")
print("="*70)
for method, labels in results.items():
n_clusters = len(set(labels)) - (1 if -1 in labels else 0)
if method == "DBSCAN":
n_noise = list(labels).count(-1)
print(f"{method}: {n_clusters} clusters, {n_noise} noise points")
else:
print(f"{method}: {n_clusters} clusters")
# Calculate agreement between methods if multiple succeeded
if len(results) > 1:
from sklearn.metrics import adjusted_rand_score
print("\nMethod Agreement (Adjusted Rand Index):")
method_names = list(results.keys())
for i in range(len(method_names)):
for j in range(i+1, len(method_names)):
ari = adjusted_rand_score(results[method_names[i]], results[method_names[j]])
print(f"{method_names[i]} vs {method_names[j]}: {ari:.4f}")
# Visualize and save results
self.visualize_results(results)
self.save_results(results)
else:
print("\nNo automatic clustering method found suitable clusters.")
print("This might indicate:")
print("- Data doesn't have clear cluster structure")
print("- Embeddings need different preprocessing")
print("- Different parameter ranges needed")
return results
def main():
parser = argparse.ArgumentParser(description="Run automatic clustering methods on document embeddings")
parser.add_argument("--embeddings_path", help="Path to embeddings JSON file")
parser.add_argument("--method", choices=['None', 'dbscan', 'meanshift', 'affinity', 'all'], default='all',
help="Which automatic method to run")
args = parser.parse_args()
# Initialize clustering
clustering = AutoClustering(args.embeddings_path)
# Run selected method(s)
if args.method == 'all':
clustering.run_all_methods()
elif args.method == 'dbscan':
labels = clustering.run_dbscan()
if labels is not None:
clustering.visualize_results({"DBSCAN": labels})
clustering.save_results({"DBSCAN": labels})
elif args.method == 'meanshift':
labels = clustering.run_mean_shift()
if labels is not None:
clustering.visualize_results({"Mean Shift": labels})
clustering.save_results({"Mean Shift": labels})
elif args.method == 'affinity':
labels = clustering.run_affinity_propagation()
if labels is not None:
clustering.visualize_results({"Affinity Propagation": labels})
clustering.save_results({"Affinity Propagation": labels})
elif args.method == 'None':
pass
if __name__ == "__main__":
main()

Binary file not shown.

After

Width:  |  Height:  |  Size: 364 KiB

View File

@@ -0,0 +1,2 @@
trial_id,eps,min_samples,n_clusters,n_noise,noise_ratio,silhouette_score,status
1,0.2,50,5,374,0.13357142857142856,0.6100894212722778,success
1 trial_id eps min_samples n_clusters n_noise noise_ratio silhouette_score status
2 1 0.2 50 5 374 0.13357142857142856 0.6100894212722778 success

View File

@@ -0,0 +1,23 @@
trial_id,eps,min_samples,n_clusters,n_noise,noise_ratio,silhouette_score,status
1,0.001,50,0,2800,1.0,,failed
2,0.002,50,0,2800,1.0,,failed
3,0.005,50,0,2800,1.0,,failed
4,0.01,50,2,2436,0.87,0.8994060754776001,success
5,0.02,50,2,2220,0.7928571428571428,0.7592437863349915,success
6,0.03,50,1,2168,0.7742857142857142,,failed
7,0.04,50,1,2157,0.7703571428571429,,failed
8,0.05,50,2,2089,0.7460714285714286,0.8926841616630554,success
9,0.1,50,6,1204,0.43,0.6831505298614502,success
10,0.15,50,4,645,0.23035714285714284,0.6648684740066528,success
11,0.2,50,5,374,0.13357142857142856,0.6100894212722778,success
12,0.25,50,3,258,0.09214285714285714,0.41854172945022583,success
13,0.3,50,1,210,0.075,,failed
14,0.35,50,1,163,0.05821428571428571,,failed
15,0.4,50,1,145,0.05178571428571429,,failed
16,0.45,50,1,123,0.04392857142857143,,failed
17,0.5,50,1,107,0.038214285714285715,,failed
18,0.6,50,1,23,0.008214285714285714,,failed
19,0.7,50,1,0,0.0,,failed
20,0.8,50,1,0,0.0,,failed
21,0.9,50,1,0,0.0,,failed
22,1.0,50,1,0,0.0,,failed
1 trial_id eps min_samples n_clusters n_noise noise_ratio silhouette_score status
2 1 0.001 50 0 2800 1.0 failed
3 2 0.002 50 0 2800 1.0 failed
4 3 0.005 50 0 2800 1.0 failed
5 4 0.01 50 2 2436 0.87 0.8994060754776001 success
6 5 0.02 50 2 2220 0.7928571428571428 0.7592437863349915 success
7 6 0.03 50 1 2168 0.7742857142857142 failed
8 7 0.04 50 1 2157 0.7703571428571429 failed
9 8 0.05 50 2 2089 0.7460714285714286 0.8926841616630554 success
10 9 0.1 50 6 1204 0.43 0.6831505298614502 success
11 10 0.15 50 4 645 0.23035714285714284 0.6648684740066528 success
12 11 0.2 50 5 374 0.13357142857142856 0.6100894212722778 success
13 12 0.25 50 3 258 0.09214285714285714 0.41854172945022583 success
14 13 0.3 50 1 210 0.075 failed
15 14 0.35 50 1 163 0.05821428571428571 failed
16 15 0.4 50 1 145 0.05178571428571429 failed
17 16 0.45 50 1 123 0.04392857142857143 failed
18 17 0.5 50 1 107 0.038214285714285715 failed
19 18 0.6 50 1 23 0.008214285714285714 failed
20 19 0.7 50 1 0 0.0 failed
21 20 0.8 50 1 0 0.0 failed
22 21 0.9 50 1 0 0.0 failed
23 22 1.0 50 1 0 0.0 failed

View File

@@ -0,0 +1,23 @@
trial_id,eps,min_samples,n_clusters,n_noise,noise_ratio,silhouette_score,status
1,0.001,50,0,2800,1.0,,failed
2,0.002,50,0,2800,1.0,,failed
3,0.005,50,0,2800,1.0,,failed
4,0.01,50,2,2436,0.87,0.8994060754776001,success
5,0.02,50,2,2220,0.7928571428571428,0.7592437863349915,success
6,0.03,50,1,2168,0.7742857142857142,,failed
7,0.04,50,1,2157,0.7703571428571429,,failed
8,0.05,50,2,2089,0.7460714285714286,0.8926841616630554,success
9,0.1,50,6,1204,0.43,0.6831505298614502,success
10,0.15,50,4,645,0.23035714285714284,0.6648684740066528,success
11,0.2,50,5,374,0.13357142857142856,0.6100894212722778,success
12,0.25,50,3,258,0.09214285714285714,0.41854172945022583,success
13,0.3,50,1,210,0.075,,failed
14,0.35,50,1,163,0.05821428571428571,,failed
15,0.4,50,1,145,0.05178571428571429,,failed
16,0.45,50,1,123,0.04392857142857143,,failed
17,0.5,50,1,107,0.038214285714285715,,failed
18,0.6,50,1,23,0.008214285714285714,,failed
19,0.7,50,1,0,0.0,,failed
20,0.8,50,1,0,0.0,,failed
21,0.9,50,1,0,0.0,,failed
22,1.0,50,1,0,0.0,,failed
1 trial_id eps min_samples n_clusters n_noise noise_ratio silhouette_score status
2 1 0.001 50 0 2800 1.0 failed
3 2 0.002 50 0 2800 1.0 failed
4 3 0.005 50 0 2800 1.0 failed
5 4 0.01 50 2 2436 0.87 0.8994060754776001 success
6 5 0.02 50 2 2220 0.7928571428571428 0.7592437863349915 success
7 6 0.03 50 1 2168 0.7742857142857142 failed
8 7 0.04 50 1 2157 0.7703571428571429 failed
9 8 0.05 50 2 2089 0.7460714285714286 0.8926841616630554 success
10 9 0.1 50 6 1204 0.43 0.6831505298614502 success
11 10 0.15 50 4 645 0.23035714285714284 0.6648684740066528 success
12 11 0.2 50 5 374 0.13357142857142856 0.6100894212722778 success
13 12 0.25 50 3 258 0.09214285714285714 0.41854172945022583 success
14 13 0.3 50 1 210 0.075 failed
15 14 0.35 50 1 163 0.05821428571428571 failed
16 15 0.4 50 1 145 0.05178571428571429 failed
17 16 0.45 50 1 123 0.04392857142857143 failed
18 17 0.5 50 1 107 0.038214285714285715 failed
19 18 0.6 50 1 23 0.008214285714285714 failed
20 19 0.7 50 1 0 0.0 failed
21 20 0.8 50 1 0 0.0 failed
22 21 0.9 50 1 0 0.0 failed
23 22 1.0 50 1 0 0.0 failed

Binary file not shown.

After

Width:  |  Height:  |  Size: 747 KiB

649
cluster/gmm_extensive.py Normal file
View File

@@ -0,0 +1,649 @@
#!/usr/bin/env python3
"""
Extensive Gaussian Mixture Model clustering with grid search for optimal parameters
Includes BIC and AIC metrics for model selection
"""
import json
import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score, calinski_harabasz_score, davies_bouldin_score
from sklearn.decomposition import PCA
import datetime
import csv
import argparse
import warnings
warnings.filterwarnings('ignore')
class GMMExtensiveClustering:
def __init__(self, embeddings_path):
self.embeddings_path = embeddings_path
self.embeddings = None
self.file_paths = None
self.load_embeddings()
def load_embeddings(self):
"""Load embeddings from JSON file"""
print(f"Loading embeddings from {self.embeddings_path}...")
with open(self.embeddings_path, 'r') as f:
data = json.load(f)
self.file_paths = []
embeddings_list = []
for item in data:
self.file_paths.append(item['filepath'])
embeddings_list.append(item['embedding'])
self.embeddings = np.array(embeddings_list, dtype=np.float32)
print(f"Loaded {len(self.file_paths)} samples with embedding dimension {self.embeddings.shape[1]}")
# Standardize embeddings for better clustering
self.scaler = StandardScaler()
self.embeddings_scaled = self.scaler.fit_transform(self.embeddings)
def run_gmm_grid_search(self):
"""Run GMM with optimized grid search for faster execution"""
print("\n" + "="*70)
print("RUNNING GAUSSIAN MIXTURE MODEL CLUSTERING WITH OPTIMIZED GRID SEARCH")
print("="*70)
# Optimized GMM parameter candidates for faster execution
# Smart n_components range with larger steps
max_components = min(50, len(self.embeddings_scaled) // 20) # Reduced max and increased divisor
n_components_candidates = []
# Progressive step sizes: smaller steps for low numbers, larger for high
for n in range(2, max_components + 1):
if n <= 5:
n_components_candidates.append(n) # 2, 3, 4, 5
elif n <= 10:
if n % 2 == 0: # 6, 8, 10
n_components_candidates.append(n)
else:
if n % 3 == 2: # 11, 14, 17, 20
n_components_candidates.append(n)
# Reduced covariance types - focus on most important ones
covariance_types = [
# 'full', 'diag',
'tied', 'spherical'
] # Removed 'tied' and 'spherical' as they're less common
# Simplified regularization - focus on key values
reg_covar_candidates = [1e-5, 1e-4, 1e-3] # Removed extreme values
# Reduced n_init - 1 is often sufficient for good initialization methods
n_init_candidates = [1, 5] # Removed 10 to save time
# Focus on best initialization methods
init_params_candidates = ['kmeans', 'k-means++'] # Removed 'random' and 'random_from_data'
# Simplified max_iter - most problems converge quickly
max_iter_candidates = [100, 300] # Removed 500, added 300 as middle ground
print(f"Optimized parameter combinations:")
print(f" - n_components: {len(n_components_candidates)} values {n_components_candidates}")
print(f" - covariance_types: {len(covariance_types)} options {covariance_types}")
print(f" - reg_covar: {len(reg_covar_candidates)} values {reg_covar_candidates}")
print(f" - n_init: {len(n_init_candidates)} values {n_init_candidates}")
print(f" - init_params: {len(init_params_candidates)} options {init_params_candidates}")
print(f" - max_iter: {len(max_iter_candidates)} values {max_iter_candidates}")
total_combinations = (len(n_components_candidates) * len(covariance_types) *
len(reg_covar_candidates) * len(n_init_candidates) *
len(init_params_candidates) * len(max_iter_candidates))
print(f"Total combinations: {total_combinations} (optimized for speed)")
# Estimate time
estimated_time_per_combination = 0.5 # seconds (conservative estimate)
estimated_total_time = total_combinations * estimated_time_per_combination
print(f"Estimated runtime: {estimated_total_time/60:.1f} minutes")
print("This should be much faster...\n")
# Track all results for analysis
all_results = []
# Early stopping criteria for speed optimization
early_stopping_threshold = 0.7 # If we find a very good silhouette score, we can be less exhaustive
good_results_found = 0
max_good_results = 5 # Stop early if we find several very good results
best_bic_score = float('inf')
best_aic_score = float('inf')
best_silhouette_score = -1
best_params_bic = None
best_params_aic = None
best_params_silhouette = None
best_labels_bic = None
best_labels_aic = None
best_labels_silhouette = None
current_combination = 0
# Optimized iteration order: test simpler models first (fewer components, simpler covariance)
for covariance_type in covariance_types: # Start with covariance type
for n_components in n_components_candidates: # Then components
for init_params in init_params_candidates: # Good initialization methods
for reg_covar in reg_covar_candidates: # Regularization
for n_init in n_init_candidates: # Number of initializations
for max_iter in max_iter_candidates: # Iterations last
current_combination += 1
# Progress indicator with time estimation
if current_combination % 50 == 0 or current_combination == total_combinations:
progress = (current_combination / total_combinations) * 100
print(f"Progress: {current_combination}/{total_combinations} ({progress:.1f}%) - "
f"Best scores so far: BIC={best_bic_score:.2f}, Silhouette={best_silhouette_score:.3f}")
try:
# Early convergence check for faster models
tol = 1e-3 if n_components <= 5 else 1e-4 # Less strict tolerance for simple models
# Run GMM
gmm = GaussianMixture(
n_components=n_components,
covariance_type=covariance_type,
reg_covar=reg_covar,
n_init=n_init,
init_params=init_params,
max_iter=max_iter,
tol=tol, # Added tolerance for faster convergence
random_state=42
)
# Fit and predict
gmm.fit(self.embeddings_scaled)
labels = gmm.predict(self.embeddings_scaled)
# Quick validation - skip if model didn't converge properly
if not gmm.converged_ and max_iter <= 100:
continue # Skip non-converged simple models
# Calculate metrics
bic_score = gmm.bic(self.embeddings_scaled)
aic_score = gmm.aic(self.embeddings_scaled)
log_likelihood = gmm.score(self.embeddings_scaled)
# Only calculate clustering metrics if we have multiple clusters
if len(set(labels)) > 1:
silhouette = silhouette_score(self.embeddings_scaled, labels)
calinski_harabasz = calinski_harabasz_score(self.embeddings_scaled, labels)
davies_bouldin = davies_bouldin_score(self.embeddings_scaled, labels)
# Early stopping check
if silhouette > early_stopping_threshold:
good_results_found += 1
print(f"🎯 Excellent result found: n_comp={n_components}, cov={covariance_type}, "
f"silhouette={silhouette:.4f}")
else:
silhouette = -1
calinski_harabasz = 0
davies_bouldin = float('inf')
# Store result for analysis
result_info = {
'n_components': n_components,
'covariance_type': covariance_type,
'reg_covar': reg_covar,
'n_init': n_init,
'init_params': init_params,
'max_iter': max_iter,
'bic_score': bic_score,
'aic_score': aic_score,
'log_likelihood': log_likelihood,
'silhouette_score': silhouette,
'calinski_harabasz_score': calinski_harabasz,
'davies_bouldin_score': davies_bouldin,
'converged': gmm.converged_,
'n_iter': gmm.n_iter_,
'unique_clusters': len(set(labels))
}
all_results.append(result_info)
# Print promising results
if (silhouette > 0.3 and bic_score < np.percentile([r['bic_score'] for r in all_results], 25)):
print(f"n_components={n_components}, cov={covariance_type}, init={init_params}: "
f"BIC={bic_score:.2f}, AIC={aic_score:.2f}, silhouette={silhouette:.4f}")
# Track best results for different criteria
if bic_score < best_bic_score:
best_bic_score = bic_score
best_params_bic = {
'n_components': n_components,
'covariance_type': covariance_type,
'reg_covar': reg_covar,
'n_init': n_init,
'init_params': init_params,
'max_iter': max_iter
}
best_labels_bic = labels
if aic_score < best_aic_score:
best_aic_score = aic_score
best_params_aic = {
'n_components': n_components,
'covariance_type': covariance_type,
'reg_covar': reg_covar,
'n_init': n_init,
'init_params': init_params,
'max_iter': max_iter
}
best_labels_aic = labels
if silhouette > best_silhouette_score and len(set(labels)) > 1:
best_silhouette_score = silhouette
best_params_silhouette = {
'n_components': n_components,
'covariance_type': covariance_type,
'reg_covar': reg_covar,
'n_init': n_init,
'init_params': init_params,
'max_iter': max_iter
}
best_labels_silhouette = labels
# Early stopping check
if good_results_found >= 5 and silhouette > 0.6:
print(f"🛑 Early stopping triggered: Found {good_results_found} excellent results. "
f"Stopping at {current_combination}/{total_combinations} combinations.")
break
except Exception:
# Skip problematic parameter combinations
continue
# Break from nested loops if early stopping triggered
if good_results_found >= 5 and best_silhouette_score > 0.6:
break
if good_results_found >= 5 and best_silhouette_score > 0.6:
break
if good_results_found >= 5 and best_silhouette_score > 0.6:
break
if good_results_found >= 5 and best_silhouette_score > 0.6:
break
if good_results_found >= 5 and best_silhouette_score > 0.6:
break
# Analysis of results
print("\n" + "="*70)
print("GAUSSIAN MIXTURE MODEL GRID SEARCH ANALYSIS")
print("="*70)
if all_results:
import pandas as pd
df_results = pd.DataFrame(all_results)
print(f"Total parameter combinations tested: {len(df_results)}")
# Filter results with valid clustering (more than 1 cluster)
valid_results = df_results[df_results['unique_clusters'] > 1]
print(f"Combinations with valid clustering: {len(valid_results)}")
if len(valid_results) > 0:
# Best scores analysis
print(f"\nModel Selection Metrics:")
print(f"Best BIC score: {df_results['bic_score'].min():.2f}")
print(f"Best AIC score: {df_results['aic_score'].min():.2f}")
print(f"Best Log-Likelihood: {df_results['log_likelihood'].max():.2f}")
print(f"\nClustering Quality Metrics:")
print(f"Best silhouette score: {valid_results['silhouette_score'].max():.4f}")
print(f"Mean silhouette score: {valid_results['silhouette_score'].mean():.4f}")
print(f"Best Calinski-Harabasz score: {valid_results['calinski_harabasz_score'].max():.2f}")
print(f"Best Davies-Bouldin score: {valid_results['davies_bouldin_score'].min():.4f}")
# Top results by different criteria
print(f"\nTop 5 results by BIC (lower is better):")
top_bic = df_results.nsmallest(5, 'bic_score')
for idx, row in top_bic.iterrows():
print(f" n_comp={row['n_components']}, cov={row['covariance_type']}: "
f"BIC={row['bic_score']:.2f}, AIC={row['aic_score']:.2f}")
print(f"\nTop 5 results by AIC (lower is better):")
top_aic = df_results.nsmallest(5, 'aic_score')
for idx, row in top_aic.iterrows():
print(f" n_comp={row['n_components']}, cov={row['covariance_type']}: "
f"BIC={row['bic_score']:.2f}, AIC={row['aic_score']:.2f}")
if len(valid_results) > 0:
print(f"\nTop 5 results by Silhouette Score:")
top_silhouette = valid_results.nlargest(5, 'silhouette_score')
for idx, row in top_silhouette.iterrows():
print(f" n_comp={row['n_components']}, cov={row['covariance_type']}: "
f"silhouette={row['silhouette_score']:.4f}")
# Component count analysis
component_performance = df_results.groupby('n_components').agg({
'bic_score': 'min',
'aic_score': 'min',
'silhouette_score': 'max'
}).reset_index()
print(f"\nComponent count analysis (top 10 by BIC):")
top_components = component_performance.nsmallest(10, 'bic_score')
for idx, row in top_components.iterrows():
print(f" {row['n_components']} components: "
f"BIC={row['bic_score']:.2f}, AIC={row['aic_score']:.2f}, "
f"silhouette={row['silhouette_score']:.4f}")
print(f"\n📁 SAVING DETAILED RESULTS...")
print("="*30)
# Save detailed grid search results
self.save_gmm_grid_search_results(all_results,
best_params_bic, best_bic_score,
best_params_aic, best_aic_score,
best_params_silhouette, best_silhouette_score)
# Return best results based on BIC (primary), AIC (secondary), Silhouette (tertiary)
results = {
'bic': (best_labels_bic, best_params_bic, best_bic_score),
'aic': (best_labels_aic, best_params_aic, best_aic_score),
'silhouette': (best_labels_silhouette, best_params_silhouette, best_silhouette_score)
}
# Print best results
if best_labels_bic is not None:
print(f"\nBest GMM result by BIC:")
print(f"Parameters: {best_params_bic}")
print(f"BIC score: {best_bic_score:.2f}")
if best_labels_aic is not None:
print(f"\nBest GMM result by AIC:")
print(f"Parameters: {best_params_aic}")
print(f"AIC score: {best_aic_score:.2f}")
if best_labels_silhouette is not None:
print(f"\nBest GMM result by Silhouette:")
print(f"Parameters: {best_params_silhouette}")
print(f"Silhouette score: {best_silhouette_score:.4f}")
return results
def save_gmm_grid_search_results(self, all_results,
best_params_bic, best_bic_score,
best_params_aic, best_aic_score,
best_params_silhouette, best_silhouette_score):
"""Save detailed GMM grid search results to JSON file"""
# Prepare comprehensive results data
grid_search_data = {
"experiment_info": {
"timestamp": datetime.datetime.now().isoformat(),
"dataset_path": self.embeddings_path,
"total_samples": len(self.file_paths),
"embedding_dimension": self.embeddings.shape[1],
"total_combinations_tested": len(all_results),
"method": "Gaussian Mixture Model"
},
"best_results": {
"by_bic": {
"parameters": best_params_bic,
"bic_score": best_bic_score if best_bic_score != float('inf') else None
},
"by_aic": {
"parameters": best_params_aic,
"aic_score": best_aic_score if best_aic_score != float('inf') else None
},
"by_silhouette": {
"parameters": best_params_silhouette,
"silhouette_score": best_silhouette_score if best_silhouette_score > -1 else None
}
},
"all_trials": []
}
# Add all trial results
for i, result in enumerate(all_results):
trial_data = {
"trial_id": i + 1,
"parameters": {
"n_components": result['n_components'],
"covariance_type": result['covariance_type'],
"reg_covar": result['reg_covar'],
"n_init": result['n_init'],
"init_params": result['init_params'],
"max_iter": result['max_iter']
},
"results": {
"bic_score": result['bic_score'],
"aic_score": result['aic_score'],
"log_likelihood": result['log_likelihood'],
"silhouette_score": result['silhouette_score'],
"calinski_harabasz_score": result['calinski_harabasz_score'],
"davies_bouldin_score": result['davies_bouldin_score'],
"converged": result['converged'],
"n_iter": result['n_iter'],
"unique_clusters": result['unique_clusters']
}
}
grid_search_data["all_trials"].append(trial_data)
# Calculate summary statistics
if all_results:
bic_scores = [r['bic_score'] for r in all_results]
aic_scores = [r['aic_score'] for r in all_results]
log_likelihoods = [r['log_likelihood'] for r in all_results]
valid_silhouette = [r['silhouette_score'] for r in all_results if r['silhouette_score'] > -1]
grid_search_data["summary_statistics"] = {
"total_trials": len(all_results),
"valid_clustering_trials": len(valid_silhouette),
"bic_score": {
"best": min(bic_scores),
"worst": max(bic_scores),
"mean": sum(bic_scores) / len(bic_scores),
"median": sorted(bic_scores)[len(bic_scores)//2]
},
"aic_score": {
"best": min(aic_scores),
"worst": max(aic_scores),
"mean": sum(aic_scores) / len(aic_scores),
"median": sorted(aic_scores)[len(aic_scores)//2]
},
"log_likelihood": {
"best": max(log_likelihoods),
"worst": min(log_likelihoods),
"mean": sum(log_likelihoods) / len(log_likelihoods)
}
}
if valid_silhouette:
grid_search_data["summary_statistics"]["silhouette_score"] = {
"best": max(valid_silhouette),
"worst": min(valid_silhouette),
"mean": sum(valid_silhouette) / len(valid_silhouette),
"median": sorted(valid_silhouette)[len(valid_silhouette)//2]
}
# Top 10 results by different criteria
sorted_by_bic = sorted(all_results, key=lambda x: x['bic_score'])
sorted_by_aic = sorted(all_results, key=lambda x: x['aic_score'])
valid_results = [r for r in all_results if r['silhouette_score'] > -1]
sorted_by_silhouette = sorted(valid_results, key=lambda x: x['silhouette_score'], reverse=True)
grid_search_data["top_10_results"] = {
"by_bic": [],
"by_aic": [],
"by_silhouette": []
}
for i, result in enumerate(sorted_by_bic[:10]):
grid_search_data["top_10_results"]["by_bic"].append({
"rank": i + 1,
"parameters": {
"n_components": result['n_components'],
"covariance_type": result['covariance_type'],
"init_params": result['init_params']
},
"bic_score": result['bic_score'],
"aic_score": result['aic_score']
})
for i, result in enumerate(sorted_by_aic[:10]):
grid_search_data["top_10_results"]["by_aic"].append({
"rank": i + 1,
"parameters": {
"n_components": result['n_components'],
"covariance_type": result['covariance_type'],
"init_params": result['init_params']
},
"bic_score": result['bic_score'],
"aic_score": result['aic_score']
})
for i, result in enumerate(sorted_by_silhouette[:10]):
grid_search_data["top_10_results"]["by_silhouette"].append({
"rank": i + 1,
"parameters": {
"n_components": result['n_components'],
"covariance_type": result['covariance_type'],
"init_params": result['init_params']
},
"silhouette_score": result['silhouette_score']
})
# Save to file with timestamp
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
filename = f"gmm_grid_search_detailed_{timestamp}.json"
# print()
# with open(filename, 'w') as f:
# json.dump(grid_search_data, f, indent=4, ensure_ascii=False)
print(f"Detailed grid search results saved to: {filename}")
# Also save a CSV summary for easy analysis
csv_filename = f"gmm_grid_search_summary_{timestamp}.csv"
self.save_grid_search_csv(all_results, csv_filename)
print(f"Grid search summary CSV saved to: {csv_filename}")
def save_grid_search_csv(self, all_results, filename):
"""Save grid search results as CSV for easy analysis"""
with open(filename, 'w', newline='', encoding='utf-8') as csvfile:
fieldnames = ['trial_id', 'n_components', 'covariance_type', 'reg_covar',
'n_init', 'init_params', 'max_iter', 'bic_score', 'aic_score',
'log_likelihood', 'silhouette_score', 'calinski_harabasz_score',
'davies_bouldin_score', 'converged', 'n_iter', 'unique_clusters']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for i, result in enumerate(all_results):
writer.writerow({
'trial_id': i + 1,
'n_components': result['n_components'],
'covariance_type': result['covariance_type'],
'reg_covar': result['reg_covar'],
'n_init': result['n_init'],
'init_params': result['init_params'],
'max_iter': result['max_iter'],
'bic_score': result['bic_score'],
'aic_score': result['aic_score'],
'log_likelihood': result['log_likelihood'],
'silhouette_score': result['silhouette_score'],
'calinski_harabasz_score': result['calinski_harabasz_score'],
'davies_bouldin_score': result['davies_bouldin_score'],
'converged': result['converged'],
'n_iter': result['n_iter'],
'unique_clusters': result['unique_clusters']
})
def visualize_results(self, results):
"""Visualize clustering results using PCA"""
fig, axes = plt.subplots(1, 3, figsize=(18, 6))
# Reduce dimensions for visualization
pca = PCA(n_components=2, random_state=42)
embeddings_2d = pca.fit_transform(self.embeddings_scaled)
methods = ['bic', 'aic', 'silhouette']
titles = ['Best by BIC', 'Best by AIC', 'Best by Silhouette']
for idx, (method, title) in enumerate(zip(methods, titles)):
labels, params, score = results[method]
if labels is not None:
unique_labels = set(labels)
colors = plt.cm.Set3(np.linspace(0, 1, len(unique_labels)))
for label, color in zip(unique_labels, colors):
mask = labels == label
axes[idx].scatter(embeddings_2d[mask, 0], embeddings_2d[mask, 1],
c=[color], s=50, alpha=0.7, label=f'Cluster {label}')
axes[idx].set_title(f'{title}\nn_components={params["n_components"]}, '
f'cov={params["covariance_type"]}')
else:
axes[idx].text(0.5, 0.5, 'No valid clustering', ha='center', va='center',
transform=axes[idx].transAxes, fontsize=12)
axes[idx].set_title(f'{title}\n(Failed)')
axes[idx].set_xlabel('PCA Component 1')
axes[idx].set_ylabel('PCA Component 2')
axes[idx].grid(True, alpha=0.3)
plt.tight_layout()
plt.savefig('gmm_clustering_results.png', dpi=300, bbox_inches='tight')
plt.show()
print(f"Visualization saved as 'gmm_clustering_results.png'")
def save_clustering_results(self, results):
"""Save final clustering results to JSON files"""
timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
for method in ['bic', 'aic', 'silhouette']:
labels, params, score = results[method]
if labels is not None:
clustering_results = []
for filepath, label in zip(self.file_paths, labels):
clustering_results.append({
"filepath": filepath,
"cluster": int(label)
})
filename = f"gmm_final_results_{method}_{timestamp}.json"
with open(filename, 'w') as f:
json.dump({
"method": f"GMM (best by {method.upper()})",
"parameters": params,
"n_components": params['n_components'],
"n_samples": len(labels),
f"{method}_score": score,
"results": clustering_results
}, f, indent=4)
print(f"Final clustering results ({method}) saved to: {filename}")
def main():
parser = argparse.ArgumentParser(description="Run extensive Gaussian Mixture Model clustering on document embeddings")
parser.add_argument("--embeddings_path", required=True, help="Path to embeddings JSON file")
args = parser.parse_args()
# Initialize clustering
clustering = GMMExtensiveClustering(args.embeddings_path)
# Run extensive grid search
results = clustering.run_gmm_grid_search()
if any(labels is not None for labels, _, _ in results.values()):
# Visualize and save results
clustering.visualize_results(results)
clustering.save_clustering_results(results)
print("\nGMM extensive clustering completed successfully!")
else:
print("\nGMM extensive clustering did not find suitable clusters.")
if __name__ == "__main__":
main()

View File

@@ -0,0 +1,414 @@
trial_id,n_components,covariance_type,reg_covar,n_init,init_params,max_iter,bic_score,aic_score,log_likelihood,silhouette_score,calinski_harabasz_score,davies_bouldin_score,converged,n_iter,unique_clusters
1,2,full,0.0001,1,kmeans,100,17260132.605124418,-7679507.0,2871.501,0.36924269795417786,1331.6861572265625,1.080913887973297,True,2,2
2,2,full,0.0001,1,kmeans,300,17260132.605124418,-7679507.0,2871.501,0.36924269795417786,1331.6861572265625,1.080913887973297,True,2,2
3,2,full,0.0001,5,kmeans,100,17260132.605124418,-7679507.0,2871.501,0.36924269795417786,1331.6861572265625,1.080913887973297,True,2,2
4,2,full,0.0001,5,kmeans,300,17260132.605124418,-7679507.0,2871.501,0.36924269795417786,1331.6861572265625,1.080913887973297,True,2,2
5,2,full,0.001,1,kmeans,100,20844797.605124418,-4094842.0,2231.382,0.36924269795417786,1331.6861572265625,1.080913887973297,True,2,2
6,2,full,0.001,1,kmeans,300,20844797.605124418,-4094842.0,2231.382,0.36924269795417786,1331.6861572265625,1.080913887973297,True,2,2
7,2,full,0.001,5,kmeans,100,20844797.605124418,-4094842.0,2231.382,0.36924269795417786,1331.6861572265625,1.080913887973297,True,2,2
8,2,full,0.001,5,kmeans,300,20844797.605124418,-4094842.0,2231.382,0.36924269795417786,1331.6861572265625,1.080913887973297,True,2,2
9,2,full,0.0001,1,k-means++,100,17370120.605124418,-7569519.0,2851.86,0.3683019280433655,1320.3240966796875,1.0772816604479254,True,3,2
10,2,full,0.0001,1,k-means++,300,17370120.605124418,-7569519.0,2851.86,0.3683019280433655,1320.3240966796875,1.0772816604479254,True,3,2
11,2,full,0.0001,5,k-means++,100,17270534.605124418,-7669105.0,2869.6433,0.3693194091320038,1331.4493408203125,1.0799693510874797,True,3,2
12,2,full,0.0001,5,k-means++,300,17270534.605124418,-7669105.0,2869.6433,0.3693194091320038,1331.4493408203125,1.0799693510874797,True,3,2
13,2,full,0.001,1,k-means++,100,20919727.605124418,-4019912.0,2218.0017,0.3683019280433655,1320.3240966796875,1.0772816604479254,True,3,2
14,2,full,0.001,1,k-means++,300,20919727.605124418,-4019912.0,2218.0017,0.3683019280433655,1320.3240966796875,1.0772816604479254,True,3,2
15,2,full,0.001,5,k-means++,100,20851959.605124418,-4087680.0,2230.1033,0.3693194091320038,1331.4493408203125,1.0799693510874797,True,3,2
16,2,full,0.001,5,k-means++,300,20851959.605124418,-4087680.0,2230.1033,0.3693194091320038,1331.4493408203125,1.0799693510874797,True,3,2
17,3,full,0.0001,1,kmeans,100,33833558.37637398,-3575904.0,2888.795,0.37564584612846375,781.5426635742188,0.7905502894819209,True,2,3
18,3,full,0.0001,1,kmeans,300,33833558.37637398,-3575904.0,2888.795,0.37564584612846375,781.5426635742188,0.7905502894819209,True,2,3
19,3,full,0.0001,5,kmeans,100,26462676.376373976,-10946786.0,4205.024,0.2200196236371994,918.8184814453125,1.8325651201939497,True,2,3
20,3,full,0.0001,5,kmeans,300,26462676.376373976,-10946786.0,4205.024,0.2200196236371994,918.8184814453125,1.8325651201939497,True,2,3
21,3,full,0.001,1,kmeans,100,37452100.37637398,42638.0,2242.6267,0.37564584612846375,781.5426635742188,0.7905502894819209,True,2,3
22,3,full,0.001,1,kmeans,300,37452100.37637398,42638.0,2242.6267,0.37564584612846375,781.5426635742188,0.7905502894819209,True,2,3
23,3,full,0.001,5,kmeans,100,33411843.376373976,-3997619.0,2964.1013,0.2200196236371994,918.8184814453125,1.8325651201939497,True,2,3
24,3,full,0.001,5,kmeans,300,33411843.376373976,-3997619.0,2964.1013,0.2200196236371994,918.8184814453125,1.8325651201939497,True,2,3
25,3,full,0.001,1,k-means++,100,37089716.37637398,-319746.0,2307.3381,0.3671606779098511,710.0538940429688,1.7711288790751016,True,3,3
26,3,full,0.001,1,k-means++,300,37089716.37637398,-319746.0,2307.3381,0.3671606779098511,710.0538940429688,1.7711288790751016,True,3,3
27,3,full,0.001,5,k-means++,100,33366158.376373976,-4043304.0,2972.2593,0.19688381254673004,834.6746215820312,2.265938022603405,True,3,3
28,3,full,0.001,5,k-means++,300,33366158.376373976,-4043304.0,2972.2593,0.19688381254673004,834.6746215820312,2.265938022603405,True,3,3
29,4,full,0.0001,1,kmeans,100,44240183.14762353,-5639102.0,4007.3035,0.310958594083786,829.0513916015625,1.0906616124036408,True,2,4
30,4,full,0.0001,1,kmeans,300,44240183.14762353,-5639102.0,4007.3035,0.310958594083786,829.0513916015625,1.0906616124036408,True,2,4
31,4,full,0.0001,5,kmeans,100,38759701.14762353,-11119584.0,4985.961,0.2639731168746948,873.2352905273438,1.6723937598752374,True,2,4
32,4,full,0.0001,5,kmeans,300,38759701.14762353,-11119584.0,4985.961,0.2639731168746948,873.2352905273438,1.6723937598752374,True,2,4
33,4,full,0.001,1,kmeans,100,50532988.14762353,653703.0,2883.5884,0.310958594083786,829.0513916015625,1.0906616124036408,True,2,4
34,4,full,0.001,1,kmeans,300,50532988.14762353,653703.0,2883.5884,0.310958594083786,829.0513916015625,1.0906616124036408,True,2,4
35,4,full,0.001,5,kmeans,100,47456739.14762353,-2422546.0,3432.9185,0.2639731168746948,873.2352905273438,1.6723937598752374,True,2,4
36,4,full,0.001,5,kmeans,300,47456739.14762353,-2422546.0,3432.9185,0.2639731168746948,873.2352905273438,1.6723937598752374,True,2,4
37,4,full,0.001,1,k-means++,100,50369343.14762353,490058.0,2912.8108,0.16492997109889984,579.3992309570312,2.14603204385876,True,3,4
38,4,full,0.001,1,k-means++,300,50369343.14762353,490058.0,2912.8108,0.16492997109889984,579.3992309570312,2.14603204385876,True,3,4
39,4,full,0.001,5,k-means++,100,48104059.14762353,-1775226.0,3317.3257,0.19116489589214325,729.421630859375,2.391271825318095,True,3,4
40,4,full,0.001,5,k-means++,300,48104059.14762353,-1775226.0,3317.3257,0.19116489589214325,729.421630859375,2.391271825318095,True,3,4
41,5,full,0.0001,1,kmeans,100,60034171.91887309,-2314936.0,4163.7827,0.3162730038166046,780.908935546875,1.0143329238161003,True,2,5
42,5,full,0.0001,1,kmeans,300,60034171.91887309,-2314936.0,4163.7827,0.3162730038166046,780.908935546875,1.0143329238161003,True,2,5
43,5,full,0.0001,5,kmeans,100,54230057.91887309,-8119050.0,5200.232,0.25592249631881714,721.9691162109375,1.585169064053077,True,2,5
44,5,full,0.0001,5,kmeans,300,54230057.91887309,-8119050.0,5200.232,0.25592249631881714,721.9691162109375,1.585169064053077,True,2,5
45,5,full,0.001,1,kmeans,100,66698922.91887309,4349815.0,2973.6487,0.3162730038166046,780.908935546875,1.0143329238161003,True,2,5
46,5,full,0.001,1,kmeans,300,66698922.91887309,4349815.0,2973.6487,0.3162730038166046,780.908935546875,1.0143329238161003,True,2,5
47,5,full,0.001,5,kmeans,100,63375271.91887309,1026164.0,3567.158,0.25592249631881714,721.9691162109375,1.585169064053077,True,2,5
48,5,full,0.001,5,kmeans,300,63375271.91887309,1026164.0,3567.158,0.25592249631881714,721.9691162109375,1.585169064053077,True,2,5
49,5,full,0.001,1,k-means++,100,66517075.91887309,4167968.0,3006.1213,0.16880138218402863,549.265625,1.8881643569801063,True,3,5
50,5,full,0.001,1,k-means++,300,66517075.91887309,4167968.0,3006.1213,0.16880138218402863,549.265625,1.8881643569801063,True,3,5
51,5,full,0.001,5,k-means++,100,63364071.91887309,1014964.0,3569.158,0.25286975502967834,715.984619140625,1.6093410042807197,True,3,5
52,5,full,0.001,5,k-means++,300,63364071.91887309,1014964.0,3569.158,0.25286975502967834,715.984619140625,1.6093410042807197,True,3,5
53,6,full,0.0001,1,kmeans,100,73062550.69012265,-1756380.0,4814.121,0.24853873252868652,668.8661499023438,1.646429379523011,True,2,6
54,6,full,0.0001,1,kmeans,300,73062550.69012265,-1756380.0,4814.121,0.24853873252868652,668.8661499023438,1.646429379523011,True,2,6
55,6,full,0.0001,5,kmeans,100,69890932.69012265,-4927998.0,5380.4814,0.27074411511421204,655.6273193359375,1.6294192539951549,True,2,6
56,6,full,0.0001,5,kmeans,300,69890932.69012265,-4927998.0,5380.4814,0.27074411511421204,655.6273193359375,1.6294192539951549,True,2,6
57,6,full,0.001,1,kmeans,100,81179056.69012265,6360126.0,3364.745,0.24853873252868652,668.8661499023438,1.646429379523011,True,2,6
58,6,full,0.001,1,kmeans,300,81179056.69012265,6360126.0,3364.745,0.24853873252868652,668.8661499023438,1.646429379523011,True,2,6
59,6,full,0.001,5,kmeans,100,79356620.69012265,4537690.0,3690.18,0.27074411511421204,655.6273193359375,1.6294192539951549,True,2,6
60,6,full,0.001,5,kmeans,300,79356620.69012265,4537690.0,3690.18,0.27074411511421204,655.6273193359375,1.6294192539951549,True,2,6
61,6,full,1e-05,1,k-means++,100,68753328.69012265,-6065602.0,5583.625,0.17110876739025116,480.8229675292969,1.5572656008570327,True,3,6
62,6,full,1e-05,1,k-means++,300,68753328.69012265,-6065602.0,5583.625,0.17110876739025116,480.8229675292969,1.5572656008570327,True,3,6
63,6,full,0.0001,1,k-means++,100,75948732.69012265,1129802.0,4298.7314,0.17110876739025116,480.8229675292969,1.5572656008570327,True,3,6
64,6,full,0.0001,1,k-means++,300,75948732.69012265,1129802.0,4298.7314,0.17110876739025116,480.8229675292969,1.5572656008570327,True,3,6
65,6,full,0.0001,5,k-means++,100,69381502.69012265,-5437428.0,5471.451,0.21281521022319794,580.2518920898438,2.180165862436555,True,3,6
66,6,full,0.0001,5,k-means++,300,69381502.69012265,-5437428.0,5471.451,0.21281521022319794,580.2518920898438,2.180165862436555,True,3,6
67,6,full,0.001,1,k-means++,100,83185656.69012265,8366726.0,3006.4236,0.17110876739025116,480.8229675292969,1.5572656008570327,True,3,6
68,6,full,0.001,1,k-means++,300,83185656.69012265,8366726.0,3006.4236,0.17110876739025116,480.8229675292969,1.5572656008570327,True,3,6
69,6,full,0.001,5,k-means++,100,79079084.69012265,4260154.0,3739.74,0.21281521022319794,580.2518920898438,2.180165862436555,True,3,6
70,6,full,0.001,5,k-means++,300,79079084.69012265,4260154.0,3739.74,0.21281521022319794,580.2518920898438,2.180165862436555,True,3,6
71,8,full,0.0001,1,kmeans,100,101322900.23262176,1564326.0,5721.299,0.2680038809776306,557.3214721679688,1.5652706740038278,True,2,8
72,8,full,0.0001,1,kmeans,300,101322900.23262176,1564326.0,5721.299,0.2680038809776306,557.3214721679688,1.5652706740038278,True,2,8
73,8,full,0.0001,5,kmeans,100,100810002.23262176,1051428.0,5812.8877,0.27481919527053833,642.9092407226562,1.6967101819134367,True,2,8
74,8,full,0.0001,5,kmeans,300,100810002.23262176,1051428.0,5812.8877,0.27481919527053833,642.9092407226562,1.6967101819134367,True,2,8
75,8,full,0.001,1,kmeans,100,111448618.23262176,11690044.0,3913.135,0.2680038809776306,557.3214721679688,1.5652706740038278,True,2,8
76,8,full,0.001,1,kmeans,300,111448618.23262176,11690044.0,3913.135,0.2680038809776306,557.3214721679688,1.5652706740038278,True,2,8
77,8,full,0.001,5,kmeans,100,111172686.23262176,11414112.0,3962.4087,0.27481919527053833,642.9092407226562,1.6967101819134367,True,2,8
78,8,full,0.001,5,kmeans,300,111172686.23262176,11414112.0,3962.4087,0.27481919527053833,642.9092407226562,1.6967101819134367,True,2,8
79,8,full,0.001,1,k-means++,100,111979964.23262176,12221390.0,3818.2517,0.2020130306482315,465.200927734375,1.9463124697846808,True,3,8
80,8,full,0.001,1,k-means++,300,111979964.23262176,12221390.0,3818.2517,0.2020130306482315,465.200927734375,1.9463124697846808,True,3,8
81,8,full,0.001,5,k-means++,100,111327662.23262176,11569088.0,3934.7344,0.2736768126487732,617.4371948242188,1.7398856934277325,True,3,8
82,8,full,0.001,5,k-means++,300,111327662.23262176,11569088.0,3934.7344,0.2736768126487732,617.4371948242188,1.7398856934277325,True,3,8
83,10,full,0.0001,1,kmeans,100,133265705.77512088,8567482.0,5970.8955,0.24388161301612854,576.40185546875,1.5385559411472558,True,2,10
84,10,full,0.0001,1,kmeans,300,133265705.77512088,8567482.0,5970.8955,0.24388161301612854,576.40185546875,1.5385559411472558,True,2,10
85,10,full,0.0001,5,kmeans,100,132892239.77512088,8194016.0,6037.586,0.28627628087997437,557.144775390625,1.6716653781194553,True,2,10
86,10,full,0.0001,5,kmeans,300,132892239.77512088,8194016.0,6037.586,0.28627628087997437,557.144775390625,1.6716653781194553,True,2,10
87,10,full,0.001,1,kmeans,100,143970687.77512088,19272464.0,4059.2915,0.24388161301612854,576.40185546875,1.5385559411472558,True,2,10
88,10,full,0.001,1,kmeans,300,143970687.77512088,19272464.0,4059.2915,0.24388161301612854,576.40185546875,1.5385559411472558,True,2,10
89,10,full,0.001,5,kmeans,100,143652495.77512088,18954272.0,4116.1113,0.28627628087997437,557.144775390625,1.6716653781194553,True,2,10
90,10,full,0.001,5,kmeans,300,143652495.77512088,18954272.0,4116.1113,0.28627628087997437,557.144775390625,1.6716653781194553,True,2,10
91,10,full,0.001,1,k-means++,100,144482919.77512088,19784696.0,3967.8215,0.17508849501609802,474.4588928222656,1.846488092509191,True,3,10
92,10,full,0.001,1,k-means++,300,144482919.77512088,19784696.0,3967.8215,0.17508849501609802,474.4588928222656,1.846488092509191,True,3,10
93,10,full,0.001,5,k-means++,100,144071547.77512088,19373324.0,4041.2808,0.22849640250205994,521.3035278320312,1.9523215129883376,True,3,10
94,10,full,0.001,5,k-means++,300,144071547.77512088,19373324.0,4041.2808,0.22849640250205994,521.3035278320312,1.9523215129883376,True,3,10
95,11,full,0.0001,1,kmeans,100,149128048.54637042,11960004.0,6115.1685,0.2577499449253082,598.6676635742188,1.5193188313170118,True,2,11
96,11,full,0.0001,1,kmeans,300,149128048.54637042,11960004.0,6115.1685,0.2577499449253082,598.6676635742188,1.5193188313170118,True,2,11
97,11,full,0.0001,5,kmeans,100,149128048.54637042,11960004.0,6115.1685,0.2577499449253082,598.6676635742188,1.5193188313170118,True,2,11
98,11,full,0.0001,5,kmeans,300,149128048.54637042,11960004.0,6115.1685,0.2577499449253082,598.6676635742188,1.5193188313170118,True,2,11
99,11,full,0.001,1,kmeans,100,160074600.54637042,22906556.0,4160.4272,0.2577499449253082,598.6676635742188,1.5193188313170118,True,2,11
100,11,full,0.001,1,kmeans,300,160074600.54637042,22906556.0,4160.4272,0.2577499449253082,598.6676635742188,1.5193188313170118,True,2,11
101,11,full,0.001,5,kmeans,100,160074600.54637042,22906556.0,4160.4272,0.2577499449253082,598.6676635742188,1.5193188313170118,True,2,11
102,11,full,0.001,5,kmeans,300,160074600.54637042,22906556.0,4160.4272,0.2577499449253082,598.6676635742188,1.5193188313170118,True,2,11
103,11,full,0.001,1,k-means++,100,160636770.54637042,23468726.0,4060.0396,0.18649740517139435,485.63348388671875,1.8009971426865101,True,3,11
104,11,full,0.001,1,k-means++,300,160636770.54637042,23468726.0,4060.0396,0.18649740517139435,485.63348388671875,1.8009971426865101,True,3,11
105,11,full,0.001,5,k-means++,100,160636770.54637042,23468726.0,4060.0396,0.18649740517139435,485.63348388671875,1.8009971426865101,True,3,11
106,11,full,0.001,5,k-means++,300,160636770.54637042,23468726.0,4060.0396,0.18649740517139435,485.63348388671875,1.8009971426865101,True,3,11
107,14,full,0.0001,1,kmeans,100,198149922.8601191,23572408.0,6291.7656,0.21188320219516754,491.18792724609375,1.7082811638393387,True,2,14
108,14,full,0.0001,1,kmeans,300,198149922.8601191,23572408.0,6291.7656,0.21188320219516754,491.18792724609375,1.7082811638393387,True,2,14
109,14,full,0.0001,5,kmeans,100,197540314.8601191,22962800.0,6400.6245,0.20964229106903076,496.4472351074219,1.950038464238459,True,2,14
110,14,full,0.0001,5,kmeans,300,197540314.8601191,22962800.0,6400.6245,0.20964229106903076,496.4472351074219,1.950038464238459,True,2,14
111,14,full,0.001,1,kmeans,100,209401674.8601191,34824160.0,4282.5244,0.21188320219516754,491.18792724609375,1.7082811638393387,True,2,14
112,14,full,0.001,1,kmeans,300,209401674.8601191,34824160.0,4282.5244,0.21188320219516754,491.18792724609375,1.7082811638393387,True,2,14
113,14,full,0.001,5,kmeans,100,208994740.8601191,34417224.0,4355.191,0.20964229106903076,496.4472351074219,1.950038464238459,True,2,14
114,14,full,0.001,5,kmeans,300,208994740.8601191,34417224.0,4355.191,0.20964229106903076,496.4472351074219,1.950038464238459,True,2,14
115,14,full,0.0001,1,k-means++,100,197987434.8601191,23409920.0,6320.7812,0.16694776713848114,449.8548889160156,1.8331922544784534,True,3,14
116,14,full,0.0001,1,k-means++,300,197987434.8601191,23409920.0,6320.7812,0.16694776713848114,449.8548889160156,1.8331922544784534,True,3,14
117,14,full,0.0001,5,k-means++,100,197987434.8601191,23409920.0,6320.7812,0.16694776713848114,449.8548889160156,1.8331922544784534,True,3,14
118,14,full,0.0001,5,k-means++,300,197987434.8601191,23409920.0,6320.7812,0.16694776713848114,449.8548889160156,1.8331922544784534,True,3,14
119,14,full,0.001,1,k-means++,100,209335602.8601191,34758090.0,4294.3228,0.16694776713848114,449.8548889160156,1.8331922544784534,True,3,14
120,14,full,0.001,1,k-means++,300,209335602.8601191,34758090.0,4294.3228,0.16694776713848114,449.8548889160156,1.8331922544784534,True,3,14
121,14,full,0.001,5,k-means++,100,209335602.8601191,34758090.0,4294.3228,0.16694776713848114,449.8548889160156,1.8331922544784534,True,3,14
122,14,full,0.001,5,k-means++,300,209335602.8601191,34758090.0,4294.3228,0.16694776713848114,449.8548889160156,1.8331922544784534,True,3,14
123,17,full,0.0001,1,kmeans,100,247173509.17386776,35186530.0,6468.057,0.1834743171930313,427.35272216796875,1.8956740098304399,True,2,17
124,17,full,0.0001,1,kmeans,300,247173509.17386776,35186530.0,6468.057,0.1834743171930313,427.35272216796875,1.8956740098304399,True,2,17
125,17,full,0.0001,5,kmeans,100,246850361.17386776,34863380.0,6525.762,0.2085043042898178,427.7763977050781,1.9588585142518828,True,2,17
126,17,full,0.0001,5,kmeans,300,246850361.17386776,34863380.0,6525.762,0.2085043042898178,427.7763977050781,1.9588585142518828,True,2,17
127,17,full,0.001,1,kmeans,100,258736973.17386776,46749990.0,4403.153,0.1834743171930313,427.35272216796875,1.8956740098304399,True,2,17
128,17,full,0.001,1,kmeans,300,258736973.17386776,46749990.0,4403.153,0.1834743171930313,427.35272216796875,1.8956740098304399,True,2,17
129,17,full,0.001,5,kmeans,100,258504801.17386776,46517820.0,4444.6123,0.2085043042898178,427.7763977050781,1.9588585142518828,True,2,17
130,17,full,0.001,5,kmeans,300,258504801.17386776,46517820.0,4444.6123,0.2085043042898178,427.7763977050781,1.9588585142518828,True,2,17
131,17,full,0.0001,1,k-means++,100,247607397.17386776,35620416.0,6390.577,0.14455115795135498,384.99053955078125,2.108500185002096,True,3,17
132,17,full,0.0001,1,k-means++,300,247607397.17386776,35620416.0,6390.577,0.14455115795135498,384.99053955078125,2.108500185002096,True,3,17
133,17,full,0.0001,5,k-means++,100,246784997.17386776,34798016.0,6537.434,0.13458234071731567,386.3608093261719,2.3614049227531075,True,3,17
134,17,full,0.0001,5,k-means++,300,246784997.17386776,34798016.0,6537.434,0.13458234071731567,386.3608093261719,2.3614049227531075,True,3,17
135,17,full,0.001,1,k-means++,100,259055585.17386776,47068604.0,4346.258,0.14455115795135498,384.99053955078125,2.108500185002096,True,3,17
136,17,full,0.001,1,k-means++,300,259055585.17386776,47068604.0,4346.258,0.14455115795135498,384.99053955078125,2.108500185002096,True,3,17
137,17,full,0.001,5,k-means++,100,258522869.17386776,46535890.0,4441.3857,0.13458234071731567,386.3608093261719,2.3614049227531075,True,3,17
138,17,full,0.001,5,k-means++,300,258522869.17386776,46535890.0,4441.3857,0.13458234071731567,386.3608093261719,2.3614049227531075,True,3,17
139,20,full,0.0001,1,kmeans,100,296473639.4876164,47077190.0,6594.966,0.1770476996898651,382.437744140625,1.8608292401058428,True,2,20
140,20,full,0.0001,1,kmeans,300,296473639.4876164,47077190.0,6594.966,0.1770476996898651,382.437744140625,1.8608292401058428,True,2,20
141,20,full,0.001,1,kmeans,100,308235301.4876164,58838856.0,4494.669,0.1770476996898651,382.437744140625,1.8608292401058428,True,2,20
142,20,full,0.001,1,kmeans,300,308235301.4876164,58838856.0,4494.669,0.1770476996898651,382.437744140625,1.8608292401058428,True,2,20
143,20,full,0.001,5,kmeans,100,307947927.4876164,58551480.0,4545.986,0.12884767353534698,377.9795227050781,2.0180962938149367,True,2,20
144,20,full,0.001,5,kmeans,300,307947927.4876164,58551480.0,4545.986,0.12884767353534698,377.9795227050781,2.0180962938149367,True,2,20
145,20,full,0.0001,1,k-means++,100,297139767.4876164,47743320.0,6476.014,0.13996723294258118,336.5575866699219,2.2953358196957456,True,3,20
146,20,full,0.0001,1,k-means++,300,297139767.4876164,47743320.0,6476.014,0.13996723294258118,336.5575866699219,2.2953358196957456,True,3,20
147,20,full,0.001,1,k-means++,100,308712155.4876164,59315708.0,4409.5166,0.13996723294258118,336.5575866699219,2.2953358196957456,True,3,20
148,20,full,0.001,1,k-means++,300,308712155.4876164,59315708.0,4409.5166,0.13996723294258118,336.5575866699219,2.2953358196957456,True,3,20
149,20,full,0.001,5,k-means++,100,308599855.4876164,59203410.0,4429.57,0.15204866230487823,341.3536376953125,2.231048217195437,True,3,20
150,20,full,0.001,5,k-means++,300,308599855.4876164,59203410.0,4429.57,0.15204866230487823,341.3536376953125,2.231048217195437,True,3,20
151,2,diag,1e-05,1,kmeans,100,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
152,2,diag,1e-05,1,kmeans,300,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
153,2,diag,1e-05,5,kmeans,100,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
154,2,diag,1e-05,5,kmeans,300,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
155,2,diag,0.0001,1,kmeans,100,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
156,2,diag,0.0001,1,kmeans,300,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
157,2,diag,0.0001,5,kmeans,100,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
158,2,diag,0.0001,5,kmeans,300,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
159,2,diag,0.001,1,kmeans,100,13089203.910885666,13040559.0,-2325.745,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
160,2,diag,0.001,1,kmeans,300,13089203.910885666,13040559.0,-2325.745,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
161,2,diag,0.001,5,kmeans,100,13089203.910885666,13040559.0,-2325.745,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
162,2,diag,0.001,5,kmeans,300,13089203.910885666,13040559.0,-2325.745,0.36971479654312134,1327.397216796875,1.073152783729392,True,4,2
163,2,diag,1e-05,1,k-means++,100,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,6,2
164,2,diag,1e-05,1,k-means++,300,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,6,2
165,2,diag,1e-05,5,k-means++,100,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,6,2
166,2,diag,1e-05,5,k-means++,300,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,6,2
167,2,diag,0.0001,1,k-means++,100,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,5,2
168,2,diag,0.0001,1,k-means++,300,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,5,2
169,2,diag,0.0001,5,k-means++,100,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,5,2
170,2,diag,0.0001,5,k-means++,300,13089173.910885666,13040529.0,-2325.7397,0.36971479654312134,1327.397216796875,1.073152783729392,True,5,2
171,2,diag,0.001,1,k-means++,100,13089203.910885666,13040559.0,-2325.745,0.36971479654312134,1327.397216796875,1.073152783729392,True,5,2
172,2,diag,0.001,1,k-means++,300,13089203.910885666,13040559.0,-2325.745,0.36971479654312134,1327.397216796875,1.073152783729392,True,5,2
173,2,diag,0.001,5,k-means++,100,13089203.910885666,13040559.0,-2325.745,0.36971479654312134,1327.397216796875,1.073152783729392,True,5,2
174,2,diag,0.001,5,k-means++,300,13089203.910885666,13040559.0,-2325.745,0.36971479654312134,1327.397216796875,1.073152783729392,True,5,2
175,3,diag,1e-05,1,kmeans,100,12693850.335015846,12620880.0,-2249.3394,0.3760926127433777,779.2965087890625,0.7860265546274455,True,6,3
176,3,diag,1e-05,1,kmeans,300,12693850.335015846,12620880.0,-2249.3394,0.3760926127433777,779.2965087890625,0.7860265546274455,True,6,3
177,3,diag,1e-05,5,kmeans,100,11770626.335015846,11697656.0,-2084.4778,0.1531982570886612,837.4287719726562,1.6999940518251055,True,19,3
178,3,diag,1e-05,5,kmeans,300,11770626.335015846,11697656.0,-2084.4778,0.1531982570886612,837.4287719726562,1.6999940518251055,True,19,3
179,3,diag,0.0001,1,kmeans,100,12699627.335015846,12626657.0,-2250.3708,0.3760926127433777,779.2965087890625,0.7860265546274455,True,6,3
180,3,diag,0.0001,1,kmeans,300,12699627.335015846,12626657.0,-2250.3708,0.3760926127433777,779.2965087890625,0.7860265546274455,True,6,3
181,3,diag,0.0001,5,kmeans,100,11770626.335015846,11697656.0,-2084.4778,0.1531982570886612,837.4287719726562,1.6999940518251055,True,20,3
182,3,diag,0.0001,5,kmeans,300,11770626.335015846,11697656.0,-2084.4778,0.1531982570886612,837.4287719726562,1.6999940518251055,True,20,3
183,3,diag,0.001,1,kmeans,100,12718245.335015846,12645275.0,-2253.6956,0.3760926127433777,779.2965087890625,0.7860265546274455,True,7,3
184,3,diag,0.001,1,kmeans,300,12718245.335015846,12645275.0,-2253.6956,0.3760926127433777,779.2965087890625,0.7860265546274455,True,7,3
185,3,diag,0.001,5,kmeans,100,11770859.335015846,11697889.0,-2084.5195,0.15369778871536255,838.0372924804688,1.7007544548321498,True,19,3
186,3,diag,0.001,5,kmeans,300,11770859.335015846,11697889.0,-2084.5195,0.15369778871536255,838.0372924804688,1.7007544548321498,True,19,3
187,3,diag,1e-05,1,k-means++,100,11686081.335015846,11613111.0,-2069.3806,0.2351498007774353,882.5064086914062,2.071681816869212,True,19,3
188,3,diag,1e-05,1,k-means++,300,11686081.335015846,11613111.0,-2069.3806,0.2351498007774353,882.5064086914062,2.071681816869212,True,19,3
189,3,diag,1e-05,5,k-means++,100,11686081.335015846,11613111.0,-2069.3806,0.2351498007774353,882.5064086914062,2.071681816869212,True,19,3
190,3,diag,1e-05,5,k-means++,300,11686081.335015846,11613111.0,-2069.3806,0.2351498007774353,882.5064086914062,2.071681816869212,True,19,3
191,3,diag,0.0001,1,k-means++,100,11686083.335015846,11613113.0,-2069.3809,0.2351498007774353,882.5064086914062,2.071681816869212,True,21,3
192,3,diag,0.0001,1,k-means++,300,11686083.335015846,11613113.0,-2069.3809,0.2351498007774353,882.5064086914062,2.071681816869212,True,21,3
193,3,diag,0.0001,5,k-means++,100,11686083.335015846,11613113.0,-2069.3809,0.2351498007774353,882.5064086914062,2.071681816869212,True,21,3
194,3,diag,0.0001,5,k-means++,300,11686083.335015846,11613113.0,-2069.3809,0.2351498007774353,882.5064086914062,2.071681816869212,True,21,3
195,3,diag,0.001,1,k-means++,100,11686162.335015846,11613192.0,-2069.395,0.2351498007774353,882.5064086914062,2.071681816869212,True,22,3
196,3,diag,0.001,1,k-means++,300,11686162.335015846,11613192.0,-2069.395,0.2351498007774353,882.5064086914062,2.071681816869212,True,22,3
197,3,diag,0.001,5,k-means++,100,11686154.335015846,11613184.0,-2069.3936,0.2351498007774353,882.5064697265625,2.071681816869212,True,13,3
198,3,diag,0.001,5,k-means++,300,11686154.335015846,11613184.0,-2069.3936,0.2351498007774353,882.5064697265625,2.071681816869212,True,13,3
199,4,diag,1e-05,1,kmeans,100,11525150.759146027,11427855.0,-2034.8359,0.3090108335018158,828.0037841796875,1.0965690514458653,True,4,4
200,4,diag,1e-05,1,kmeans,300,11525150.759146027,11427855.0,-2034.8359,0.3090108335018158,828.0037841796875,1.0965690514458653,True,4,4
201,4,diag,1e-05,5,kmeans,100,10872145.759146027,10774850.0,-1918.2279,0.22238799929618835,665.7476806640625,2.503304023275595,True,17,4
202,4,diag,1e-05,5,kmeans,300,10872145.759146027,10774850.0,-1918.2279,0.22238799929618835,665.7476806640625,2.503304023275595,True,17,4
203,4,diag,0.0001,1,kmeans,100,11530927.759146027,11433632.0,-2035.8676,0.3090108335018158,828.0037841796875,1.0965690514458653,True,4,4
204,4,diag,0.0001,1,kmeans,300,11530927.759146027,11433632.0,-2035.8676,0.3090108335018158,828.0037841796875,1.0965690514458653,True,4,4
205,4,diag,0.0001,5,kmeans,100,10872147.759146027,10774852.0,-1918.2283,0.22238799929618835,665.7476806640625,2.503304023275595,True,16,4
206,4,diag,0.0001,5,kmeans,300,10872147.759146027,10774852.0,-1918.2283,0.22238799929618835,665.7476806640625,2.503304023275595,True,16,4
207,4,diag,0.001,1,kmeans,100,11549555.759146027,11452260.0,-2039.194,0.3090108335018158,828.0037841796875,1.0965690514458653,True,4,4
208,4,diag,0.001,1,kmeans,300,11549555.759146027,11452260.0,-2039.194,0.3090108335018158,828.0037841796875,1.0965690514458653,True,4,4
209,4,diag,0.001,5,kmeans,100,10872103.759146027,10774808.0,-1918.2203,0.2233457714319229,667.0189208984375,2.500263810780557,True,18,4
210,4,diag,0.001,5,kmeans,300,10872103.759146027,10774808.0,-1918.2203,0.2233457714319229,667.0189208984375,2.500263810780557,True,18,4
211,4,diag,1e-05,1,k-means++,100,10871971.759146027,10774676.0,-1918.1968,0.22509750723838806,669.1204223632812,2.4944239618747446,True,17,4
212,4,diag,1e-05,1,k-means++,300,10871971.759146027,10774676.0,-1918.1968,0.22509750723838806,669.1204223632812,2.4944239618747446,True,17,4
213,4,diag,1e-05,5,k-means++,100,10865268.759146027,10767973.0,-1916.9999,0.18924137949943542,715.3873291015625,1.9068310882928445,True,23,4
214,4,diag,1e-05,5,k-means++,300,10865268.759146027,10767973.0,-1916.9999,0.18924137949943542,715.3873291015625,1.9068310882928445,True,23,4
215,4,diag,0.0001,1,k-means++,100,11379467.759146027,11282172.0,-2008.821,0.23593758046627045,682.464599609375,1.5970337323460135,True,12,4
216,4,diag,0.0001,1,k-means++,300,11379467.759146027,11282172.0,-2008.821,0.23593758046627045,682.464599609375,1.5970337323460135,True,12,4
217,4,diag,0.0001,5,k-means++,100,10872147.759146027,10774852.0,-1918.2283,0.22238799929618835,665.7476806640625,2.503304023275595,True,26,4
218,4,diag,0.0001,5,k-means++,300,10872147.759146027,10774852.0,-1918.2283,0.22238799929618835,665.7476806640625,2.503304023275595,True,26,4
219,4,diag,0.001,1,k-means++,100,11398228.759146027,11300933.0,-2012.1713,0.23644769191741943,683.3140869140625,1.5953322750132166,True,13,4
220,4,diag,0.001,1,k-means++,300,11398228.759146027,11300933.0,-2012.1713,0.23644769191741943,683.3140869140625,1.5953322750132166,True,13,4
221,4,diag,0.001,5,k-means++,100,10872234.759146027,10774939.0,-1918.2438,0.22265465557575226,666.09033203125,2.5026321909350457,True,24,4
222,4,diag,0.001,5,k-means++,300,10872234.759146027,10774939.0,-1918.2438,0.22265465557575226,666.09033203125,2.5026321909350457,True,24,4
223,5,diag,1e-05,1,kmeans,100,10641753.183276208,10520132.0,-1871.2793,0.3118983507156372,774.8809814453125,1.0195520325095044,True,5,5
224,5,diag,1e-05,1,kmeans,300,10641753.183276208,10520132.0,-1871.2793,0.3118983507156372,774.8809814453125,1.0195520325095044,True,5,5
225,5,diag,1e-05,5,kmeans,100,10324953.183276208,10203332.0,-1814.7079,0.21296893060207367,639.6068115234375,1.5943357881476847,True,20,5
226,5,diag,1e-05,5,kmeans,300,10324953.183276208,10203332.0,-1814.7079,0.21296893060207367,639.6068115234375,1.5943357881476847,True,20,5
227,5,diag,0.0001,1,kmeans,100,10647529.183276208,10525908.0,-1872.3107,0.3118983507156372,774.8809814453125,1.0195520325095044,True,5,5
228,5,diag,0.0001,1,kmeans,300,10647529.183276208,10525908.0,-1872.3107,0.3118983507156372,774.8809814453125,1.0195520325095044,True,5,5
229,5,diag,0.0001,5,kmeans,100,10324954.183276208,10203333.0,-1814.708,0.21296893060207367,639.6068115234375,1.5943357881476847,True,20,5
230,5,diag,0.0001,5,kmeans,300,10324954.183276208,10203333.0,-1814.708,0.21296893060207367,639.6068115234375,1.5943357881476847,True,20,5
231,5,diag,0.001,1,kmeans,100,10666196.183276208,10544575.0,-1875.6442,0.3118983507156372,774.8809814453125,1.0195520325095044,True,5,5
232,5,diag,0.001,1,kmeans,300,10666196.183276208,10544575.0,-1875.6442,0.3118983507156372,774.8809814453125,1.0195520325095044,True,5,5
233,5,diag,0.001,5,kmeans,100,10327782.183276208,10206161.0,-1815.213,0.2155037522315979,645.0463256835938,1.5864905576853905,True,23,5
234,5,diag,0.001,5,kmeans,300,10327782.183276208,10206161.0,-1815.213,0.2155037522315979,645.0463256835938,1.5864905576853905,True,23,5
235,5,diag,1e-05,1,k-means++,100,9931250.183276208,9809629.0,-1744.4038,0.2225552350282669,602.316162109375,2.189639810006293,True,17,5
236,5,diag,1e-05,1,k-means++,300,9931250.183276208,9809629.0,-1744.4038,0.2225552350282669,602.316162109375,2.189639810006293,True,17,5
237,5,diag,1e-05,5,k-means++,100,9931250.183276208,9809629.0,-1744.4038,0.2225552350282669,602.316162109375,2.189639810006293,True,17,5
238,5,diag,1e-05,5,k-means++,300,9931250.183276208,9809629.0,-1744.4038,0.2225552350282669,602.316162109375,2.189639810006293,True,17,5
239,5,diag,0.0001,1,k-means++,100,10466296.183276208,10344675.0,-1839.9476,0.24029850959777832,631.74609375,1.4320268333089838,True,14,5
240,5,diag,0.0001,1,k-means++,300,10466296.183276208,10344675.0,-1839.9476,0.24029850959777832,631.74609375,1.4320268333089838,True,14,5
241,5,diag,0.0001,5,k-means++,100,9948819.183276208,9827198.0,-1747.541,0.19564315676689148,662.848876953125,1.6781877684435718,True,17,5
242,5,diag,0.0001,5,k-means++,300,9948819.183276208,9827198.0,-1747.541,0.19564315676689148,662.848876953125,1.6781877684435718,True,17,5
243,5,diag,0.001,1,k-means++,100,10485035.183276208,10363414.0,-1843.294,0.24058617651462555,631.865478515625,1.432154375281304,True,17,5
244,5,diag,0.001,1,k-means++,300,10485035.183276208,10363414.0,-1843.294,0.24058617651462555,631.865478515625,1.432154375281304,True,17,5
245,5,diag,0.001,5,k-means++,100,9949011.183276208,9827390.0,-1747.5753,0.19609026610851288,663.4921264648438,1.6774180513139325,True,19,5
246,5,diag,0.001,5,k-means++,300,9949011.183276208,9827390.0,-1747.5753,0.19609026610851288,663.4921264648438,1.6774180513139325,True,19,5
247,6,diag,1e-05,1,kmeans,100,9799004.60740639,9653058.0,-1714.9814,0.26129186153411865,629.4482421875,1.9915981688116708,True,15,6
248,6,diag,1e-05,1,kmeans,300,9799004.60740639,9653058.0,-1714.9814,0.26129186153411865,629.4482421875,1.9915981688116708,True,15,6
249,6,diag,1e-05,5,kmeans,100,9102218.60740639,8956272.0,-1590.5553,0.21352295577526093,566.8534545898438,2.13510416879353,True,19,6
250,6,diag,1e-05,5,kmeans,300,9102218.60740639,8956272.0,-1590.5553,0.21352295577526093,566.8534545898438,2.13510416879353,True,19,6
251,6,diag,0.0001,1,kmeans,100,9804784.60740639,9658838.0,-1716.0135,0.26129186153411865,629.4482421875,1.9915981688116708,True,15,6
252,6,diag,0.0001,1,kmeans,300,9804784.60740639,9658838.0,-1716.0135,0.26129186153411865,629.4482421875,1.9915981688116708,True,15,6
253,6,diag,0.0001,5,kmeans,100,9102220.60740639,8956274.0,-1590.5557,0.21352295577526093,566.8534545898438,2.13510416879353,True,19,6
254,6,diag,0.0001,5,kmeans,300,9102220.60740639,8956274.0,-1590.5557,0.21352295577526093,566.8534545898438,2.13510416879353,True,19,6
255,6,diag,0.001,1,kmeans,100,9823540.60740639,9677594.0,-1719.3629,0.26235219836235046,629.54638671875,1.991657340604508,True,16,6
256,6,diag,0.001,1,kmeans,300,9823540.60740639,9677594.0,-1719.3629,0.26235219836235046,629.54638671875,1.991657340604508,True,16,6
257,6,diag,0.001,5,kmeans,100,9102520.60740639,8956574.0,-1590.6093,0.21336553990840912,567.1337890625,2.137562111985855,True,28,6
258,6,diag,0.001,5,kmeans,300,9102520.60740639,8956574.0,-1590.6093,0.21336553990840912,567.1337890625,2.137562111985855,True,28,6
259,6,diag,1e-05,1,k-means++,100,9646063.60740639,9500117.0,-1687.6705,0.20277422666549683,612.2411499023438,1.4227889323530194,True,20,6
260,6,diag,1e-05,1,k-means++,300,9646063.60740639,9500117.0,-1687.6705,0.20277422666549683,612.2411499023438,1.4227889323530194,True,20,6
261,6,diag,1e-05,5,k-means++,100,9102296.60740639,8956350.0,-1590.5693,0.21240639686584473,565.2139282226562,2.136853531967839,True,21,6
262,6,diag,1e-05,5,k-means++,300,9102296.60740639,8956350.0,-1590.5693,0.21240639686584473,565.2139282226562,2.136853531967839,True,21,6
263,6,diag,0.0001,1,k-means++,100,9682908.60740639,9536962.0,-1694.25,0.17942221462726593,527.6393432617188,1.9590089629656866,True,34,6
264,6,diag,0.0001,1,k-means++,300,9682908.60740639,9536962.0,-1694.25,0.17942221462726593,527.6393432617188,1.9590089629656866,True,34,6
265,6,diag,0.0001,5,k-means++,100,9102298.60740639,8956352.0,-1590.5697,0.21240639686584473,565.2139282226562,2.136853531967839,True,24,6
266,6,diag,0.0001,5,k-means++,300,9102298.60740639,8956352.0,-1590.5697,0.21240639686584473,565.2139282226562,2.136853531967839,True,24,6
267,6,diag,0.001,1,k-means++,100,9701922.60740639,9555976.0,-1697.6454,0.18013142049312592,529.0560913085938,1.9546049777626981,True,31,6
268,6,diag,0.001,1,k-means++,300,9701922.60740639,9555976.0,-1697.6454,0.18013142049312592,529.0560913085938,1.9546049777626981,True,31,6
269,6,diag,0.001,5,k-means++,100,9102520.60740639,8956574.0,-1590.6093,0.21336553990840912,567.1337890625,2.137562111985855,True,31,6
270,6,diag,0.001,5,k-means++,300,9102520.60740639,8956574.0,-1590.6093,0.21336553990840912,567.1337890625,2.137562111985855,True,31,6
271,8,diag,1e-05,1,kmeans,100,9403674.455666753,9209077.0,-1632.7727,0.25722020864486694,545.548095703125,1.7082735900456691,True,9,8
272,8,diag,1e-05,1,kmeans,300,9403674.455666753,9209077.0,-1632.7727,0.25722020864486694,545.548095703125,1.7082735900456691,True,9,8
273,8,diag,1e-05,5,kmeans,100,8401628.455666753,8207031.0,-1453.8359,0.24377639591693878,568.4773559570312,2.056540191275295,True,13,8
274,8,diag,1e-05,5,kmeans,300,8401628.455666753,8207031.0,-1453.8359,0.24377639591693878,568.4773559570312,2.056540191275295,True,13,8
275,8,diag,0.0001,1,kmeans,100,9409411.455666753,9214814.0,-1633.7971,0.25722020864486694,545.548095703125,1.7082735900456691,True,9,8
276,8,diag,0.0001,1,kmeans,300,9409411.455666753,9214814.0,-1633.7971,0.25722020864486694,545.548095703125,1.7082735900456691,True,9,8
277,8,diag,0.0001,5,kmeans,100,8401766.955666753,8207169.5,-1453.8606,0.24412217736244202,568.5999755859375,2.0555351393061194,True,13,8
278,8,diag,0.0001,5,kmeans,300,8401766.955666753,8207169.5,-1453.8606,0.24412217736244202,568.5999755859375,2.0555351393061194,True,13,8
279,8,diag,0.001,1,kmeans,100,9428169.455666753,9233572.0,-1637.1467,0.2572267949581146,545.4515991210938,1.707896480449314,True,12,8
280,8,diag,0.001,1,kmeans,300,9428169.455666753,9233572.0,-1637.1467,0.2572267949581146,545.4515991210938,1.707896480449314,True,12,8
281,8,diag,0.001,5,kmeans,100,8402030.455666753,8207433.0,-1453.9077,0.24412217736244202,568.5999755859375,2.0555351393061194,True,19,8
282,8,diag,0.001,5,kmeans,300,8402030.455666753,8207433.0,-1453.9077,0.24412217736244202,568.5999755859375,2.0555351393061194,True,19,8
283,8,diag,1e-05,1,k-means++,100,9222857.455666753,9028260.0,-1600.4839,0.20888392627239227,437.5425109863281,2.1599292696306343,True,22,8
284,8,diag,1e-05,1,k-means++,300,9222857.455666753,9028260.0,-1600.4839,0.20888392627239227,437.5425109863281,2.1599292696306343,True,22,8
285,8,diag,1e-05,5,k-means++,100,8425934.455666753,8231337.0,-1458.1763,0.240326389670372,491.6292724609375,2.0912175194979867,True,18,8
286,8,diag,1e-05,5,k-means++,300,8425934.455666753,8231337.0,-1458.1763,0.240326389670372,491.6292724609375,2.0912175194979867,True,18,8
287,8,diag,0.0001,1,k-means++,100,9489607.455666753,9295010.0,-1648.1178,0.214387446641922,498.1647033691406,1.8502738691794258,True,15,8
288,8,diag,0.0001,1,k-means++,300,9489607.455666753,9295010.0,-1648.1178,0.214387446641922,498.1647033691406,1.8502738691794258,True,15,8
289,8,diag,0.0001,5,k-means++,100,8401862.455666753,8207265.0,-1453.8777,0.24387237429618835,568.502197265625,2.056662610079275,True,14,8
290,8,diag,0.0001,5,k-means++,300,8401862.455666753,8207265.0,-1453.8777,0.24387237429618835,568.502197265625,2.056662610079275,True,14,8
291,8,diag,0.001,1,k-means++,100,9508329.455666753,9313732.0,-1651.461,0.21473833918571472,498.2613830566406,1.850583105515141,True,13,8
292,8,diag,0.001,1,k-means++,300,9508329.455666753,9313732.0,-1651.461,0.21473833918571472,498.2613830566406,1.850583105515141,True,13,8
293,8,diag,0.001,5,k-means++,100,8402078.455666753,8207481.0,-1453.9163,0.2442118227481842,568.6036987304688,2.0567161321422094,True,17,8
294,8,diag,0.001,5,k-means++,300,8402078.455666753,8207481.0,-1453.9163,0.2442118227481842,568.6036987304688,2.0567161321422094,True,17,8
295,10,diag,1e-05,1,kmeans,100,7888954.303927114,7645706.0,-1350.6729,0.20840129256248474,479.23870849609375,1.9209118556333535,True,40,10
296,10,diag,1e-05,1,kmeans,300,7888954.303927114,7645706.0,-1350.6729,0.20840129256248474,479.23870849609375,1.9209118556333535,True,40,10
297,10,diag,1e-05,5,kmeans,100,7737961.303927114,7494713.0,-1323.7098,0.21908442676067352,571.03955078125,1.8024606257110887,True,14,10
298,10,diag,1e-05,5,kmeans,300,7737961.303927114,7494713.0,-1323.7098,0.21908442676067352,571.03955078125,1.8024606257110887,True,14,10
299,10,diag,0.0001,1,kmeans,100,7894699.303927114,7651451.0,-1351.6987,0.20837311446666718,479.2680969238281,1.921187996397482,True,39,10
300,10,diag,0.0001,1,kmeans,300,7894699.303927114,7651451.0,-1351.6987,0.20837311446666718,479.2680969238281,1.921187996397482,True,39,10
301,10,diag,0.0001,5,kmeans,100,7743709.303927114,7500461.0,-1324.7362,0.21908442676067352,571.03955078125,1.8024606257110887,True,15,10
302,10,diag,0.0001,5,kmeans,300,7743709.303927114,7500461.0,-1324.7362,0.21908442676067352,571.03955078125,1.8024606257110887,True,15,10
303,10,diag,0.001,1,kmeans,100,7913842.303927114,7670594.0,-1355.1172,0.20897234976291656,479.4935302734375,1.9199507888474652,True,32,10
304,10,diag,0.001,1,kmeans,300,7913842.303927114,7670594.0,-1355.1172,0.20897234976291656,479.4935302734375,1.9199507888474652,True,32,10
305,10,diag,0.001,5,kmeans,100,7762934.303927114,7519686.0,-1328.1693,0.2199474722146988,571.3079833984375,1.801659072375885,True,12,10
306,10,diag,0.001,5,kmeans,300,7762934.303927114,7519686.0,-1328.1693,0.2199474722146988,571.3079833984375,1.801659072375885,True,12,10
307,10,diag,1e-05,1,k-means++,100,7924116.803927114,7680868.5,-1356.9519,0.18109233677387238,451.2889099121094,2.0573889908400558,True,21,10
308,10,diag,1e-05,1,k-means++,300,7924116.803927114,7680868.5,-1356.9519,0.18109233677387238,451.2889099121094,2.0573889908400558,True,21,10
309,10,diag,1e-05,5,k-means++,100,7738147.803927114,7494899.5,-1323.7432,0.2197750359773636,571.7454223632812,1.7996143618833955,True,15,10
310,10,diag,1e-05,5,k-means++,300,7738147.803927114,7494899.5,-1323.7432,0.2197750359773636,571.7454223632812,1.7996143618833955,True,15,10
311,10,diag,0.0001,1,k-means++,100,8187378.303927114,7944130.0,-1403.9629,0.18437595665454865,504.9218444824219,1.8422731699234043,True,15,10
312,10,diag,0.0001,1,k-means++,300,8187378.303927114,7944130.0,-1403.9629,0.18437595665454865,504.9218444824219,1.8422731699234043,True,15,10
313,10,diag,0.0001,5,k-means++,100,7743709.303927114,7500461.0,-1324.7362,0.21908442676067352,571.03955078125,1.8024606257110887,True,19,10
314,10,diag,0.0001,5,k-means++,300,7743709.303927114,7500461.0,-1324.7362,0.21908442676067352,571.03955078125,1.8024606257110887,True,19,10
315,10,diag,0.001,1,k-means++,100,8206320.303927114,7963072.0,-1407.3453,0.18497711420059204,505.0896301269531,1.8418734003624215,True,12,10
316,10,diag,0.001,1,k-means++,300,8206320.303927114,7963072.0,-1407.3453,0.18497711420059204,505.0896301269531,1.8418734003624215,True,12,10
317,10,diag,0.001,5,k-means++,100,7762911.303927114,7519663.0,-1328.1652,0.21994036436080933,571.3048706054688,1.8012326594817665,True,14,10
318,10,diag,0.001,5,k-means++,300,7762911.303927114,7519663.0,-1328.1652,0.21994036436080933,571.3048706054688,1.8012326594817665,True,14,10
319,11,diag,1e-05,1,kmeans,100,7579813.728057295,7312240.0,-1289.6621,0.23397988080978394,564.2086791992188,1.7291402394614084,True,11,11
320,11,diag,1e-05,1,kmeans,300,7579813.728057295,7312240.0,-1289.6621,0.23397988080978394,564.2086791992188,1.7291402394614084,True,11,11
321,11,diag,1e-05,5,kmeans,100,7579813.728057295,7312240.0,-1289.6621,0.23397988080978394,564.2086791992188,1.7291402394614084,True,11,11
322,11,diag,1e-05,5,kmeans,300,7579813.728057295,7312240.0,-1289.6621,0.23397988080978394,564.2086791992188,1.7291402394614084,True,11,11
323,11,diag,0.0001,1,kmeans,100,7585561.228057295,7317987.5,-1290.6885,0.23397988080978394,564.2086791992188,1.7291402394614084,True,13,11
324,11,diag,0.0001,1,kmeans,300,7585561.228057295,7317987.5,-1290.6885,0.23397988080978394,564.2086791992188,1.7291402394614084,True,13,11
325,11,diag,0.0001,5,kmeans,100,7585561.228057295,7317987.5,-1290.6885,0.23397988080978394,564.2086791992188,1.7291402394614084,True,13,11
326,11,diag,0.0001,5,kmeans,300,7585561.228057295,7317987.5,-1290.6885,0.23397988080978394,564.2086791992188,1.7291402394614084,True,13,11
327,11,diag,0.001,1,kmeans,100,7604425.728057295,7336852.0,-1294.0571,0.2341206818819046,564.5838012695312,1.7292947793505737,True,23,11
328,11,diag,0.001,1,kmeans,300,7604425.728057295,7336852.0,-1294.0571,0.2341206818819046,564.5838012695312,1.7292947793505737,True,23,11
329,11,diag,0.001,5,kmeans,100,7604425.728057295,7336852.0,-1294.0571,0.2341206818819046,564.5838012695312,1.7292947793505737,True,23,11
330,11,diag,0.001,5,kmeans,300,7604425.728057295,7336852.0,-1294.0571,0.2341206818819046,564.5838012695312,1.7292947793505737,True,23,11
331,11,diag,1e-05,1,k-means++,100,7791839.228057295,7524265.5,-1327.5238,0.19072884321212769,512.7921752929688,1.8151284796024916,True,12,11
332,11,diag,1e-05,1,k-means++,300,7791839.228057295,7524265.5,-1327.5238,0.19072884321212769,512.7921752929688,1.8151284796024916,True,12,11
333,11,diag,1e-05,5,k-means++,100,7590459.228057295,7322885.5,-1291.5631,0.23250937461853027,557.0549926757812,1.6956773285989237,True,15,11
334,11,diag,1e-05,5,k-means++,300,7590459.228057295,7322885.5,-1291.5631,0.23250937461853027,557.0549926757812,1.6956773285989237,True,15,11
335,11,diag,0.0001,1,k-means++,100,7673894.728057295,7406321.0,-1306.4623,0.1943679302930832,520.76953125,1.7964734396608426,True,18,11
336,11,diag,0.0001,1,k-means++,300,7673894.728057295,7406321.0,-1306.4623,0.1943679302930832,520.76953125,1.7964734396608426,True,18,11
337,11,diag,0.0001,5,k-means++,100,7595473.228057295,7327899.5,-1292.4585,0.23414196074008942,558.0660400390625,1.7058883755528695,True,22,11
338,11,diag,0.0001,5,k-means++,300,7595473.228057295,7327899.5,-1292.4585,0.23414196074008942,558.0660400390625,1.7058883755528695,True,22,11
339,11,diag,0.001,1,k-means++,100,7694915.228057295,7427341.5,-1310.216,0.1941366195678711,518.650634765625,1.8003890716705255,True,19,11
340,11,diag,0.001,1,k-means++,300,7694915.228057295,7427341.5,-1310.216,0.1941366195678711,518.650634765625,1.8003890716705255,True,19,11
341,11,diag,0.001,5,k-means++,100,7603135.728057295,7335562.0,-1293.8268,0.23057277500629425,554.763427734375,1.7732846393460022,True,61,11
342,11,diag,0.001,5,k-means++,300,7603135.728057295,7335562.0,-1293.8268,0.23057277500629425,554.763427734375,1.7732846393460022,True,61,11
343,14,diag,1e-05,1,kmeans,100,7223238.000447838,6882688.0,-1208.5668,0.189774751663208,445.0321044921875,2.405689156966965,True,35,14
344,14,diag,1e-05,1,kmeans,300,7223238.000447838,6882688.0,-1208.5668,0.189774751663208,445.0321044921875,2.405689156966965,True,35,14
345,14,diag,1e-05,5,kmeans,100,7179637.000447838,6839087.0,-1200.7809,0.19881103932857513,443.0815124511719,2.3498009106287974,True,37,14
346,14,diag,1e-05,5,kmeans,300,7179637.000447838,6839087.0,-1200.7809,0.19881103932857513,443.0815124511719,2.3498009106287974,True,37,14
347,14,diag,0.0001,1,kmeans,100,7403717.000447838,7063167.0,-1240.7952,0.19351521134376526,467.3027038574219,1.8745209391776043,True,9,14
348,14,diag,0.0001,1,kmeans,300,7403717.000447838,7063167.0,-1240.7952,0.19351521134376526,467.3027038574219,1.8745209391776043,True,9,14
349,14,diag,0.0001,5,kmeans,100,7185399.000447838,6844849.0,-1201.8098,0.1989377737045288,443.1646423339844,2.3487992155318165,True,36,14
350,14,diag,0.0001,5,kmeans,300,7185399.000447838,6844849.0,-1201.8098,0.1989377737045288,443.1646423339844,2.3487992155318165,True,36,14
351,14,diag,0.001,1,kmeans,100,7249591.000447838,6909041.0,-1213.2727,0.1921805739402771,446.1286315917969,2.4012027743509816,True,32,14
352,14,diag,0.001,1,kmeans,300,7249591.000447838,6909041.0,-1213.2727,0.1921805739402771,446.1286315917969,2.4012027743509816,True,32,14
353,14,diag,0.001,5,kmeans,100,7206289.000447838,6865739.0,-1205.5402,0.2005954384803772,446.14300537109375,2.3618214198344596,True,35,14
354,14,diag,0.001,5,kmeans,300,7206289.000447838,6865739.0,-1205.5402,0.2005954384803772,446.14300537109375,2.3618214198344596,True,35,14
355,14,diag,1e-05,1,k-means++,100,7407422.000447838,7066872.0,-1241.4568,0.17794980108737946,461.5752868652344,1.8973642909602886,True,16,14
356,14,diag,1e-05,1,k-means++,300,7407422.000447838,7066872.0,-1241.4568,0.17794980108737946,461.5752868652344,1.8973642909602886,True,16,14
357,14,diag,1e-05,5,k-means++,100,7225380.500447838,6884830.5,-1208.9493,0.21065233647823334,449.29351806640625,2.369687246645459,True,41,14
358,14,diag,1e-05,5,k-means++,300,7225380.500447838,6884830.5,-1208.9493,0.21065233647823334,449.29351806640625,2.369687246645459,True,41,14
359,14,diag,0.0001,1,k-means++,100,7409969.000447838,7069419.0,-1241.9116,0.18569153547286987,462.6240539550781,1.8868216012452776,True,15,14
360,14,diag,0.0001,1,k-means++,300,7409969.000447838,7069419.0,-1241.9116,0.18569153547286987,462.6240539550781,1.8868216012452776,True,15,14
361,14,diag,0.0001,5,k-means++,100,7197481.000447838,6856931.0,-1203.9673,0.1839146614074707,442.7921142578125,2.4198138057845995,True,34,14
362,14,diag,0.0001,5,k-means++,300,7197481.000447838,6856931.0,-1203.9673,0.1839146614074707,442.7921142578125,2.4198138057845995,True,34,14
363,14,diag,0.001,1,k-means++,300,7428971.000447838,7088421.0,-1245.3048,0.18628861010074615,462.73419189453125,1.8868677406736207,False,300,14
364,14,diag,0.001,5,k-means++,100,7217251.500447838,6876701.5,-1207.4978,0.18614986538887024,443.1617431640625,2.4207333709907854,True,30,14
365,14,diag,0.001,5,k-means++,300,7217251.500447838,6876701.5,-1207.4978,0.18614986538887024,443.1617431640625,2.4207333709907854,True,30,14
366,17,diag,1e-05,1,kmeans,100,7157834.272838381,6744308.0,-1179.4664,0.15990924835205078,403.0197448730469,2.0343843124532546,True,24,17
367,17,diag,1e-05,1,kmeans,300,7157834.272838381,6744308.0,-1179.4664,0.15990924835205078,403.0197448730469,2.0343843124532546,True,24,17
368,17,diag,1e-05,5,kmeans,100,6988291.272838381,6574765.0,-1149.1909,0.18784816563129425,396.8871154785156,2.38212018534803,True,20,17
369,17,diag,1e-05,5,kmeans,300,6988291.272838381,6574765.0,-1149.1909,0.18784816563129425,396.8871154785156,2.38212018534803,True,20,17
370,17,diag,0.0001,1,kmeans,100,7164777.772838381,6751251.5,-1180.7063,0.16118811070919037,403.645263671875,2.0161663571062354,True,12,17
371,17,diag,0.0001,1,kmeans,300,7164777.772838381,6751251.5,-1180.7063,0.16118811070919037,403.645263671875,2.0161663571062354,True,12,17
372,17,diag,0.0001,5,kmeans,100,6993985.272838381,6580459.0,-1150.2076,0.18836656212806702,396.94464111328125,2.382098456335865,True,20,17
373,17,diag,0.0001,5,kmeans,300,6993985.272838381,6580459.0,-1150.2076,0.18836656212806702,396.94464111328125,2.382098456335865,True,20,17
374,17,diag,0.001,1,kmeans,100,7020208.272838381,6606682.0,-1154.8904,0.16110706329345703,392.0225830078125,2.466771067136851,True,32,17
375,17,diag,0.001,1,kmeans,300,7020208.272838381,6606682.0,-1154.8904,0.16110706329345703,392.0225830078125,2.466771067136851,True,32,17
376,17,diag,0.001,5,kmeans,100,7014078.772838381,6600552.5,-1153.7958,0.19070318341255188,397.37750244140625,2.3799724457371485,True,20,17
377,17,diag,0.001,5,kmeans,300,7014078.772838381,6600552.5,-1153.7958,0.19070318341255188,397.37750244140625,2.3799724457371485,True,20,17
378,17,diag,1e-05,1,k-means++,100,7121674.772838381,6708148.5,-1173.0094,0.15012019872665405,375.2821350097656,2.4197980533663803,True,31,17
379,17,diag,1e-05,1,k-means++,300,7121674.772838381,6708148.5,-1173.0094,0.15012019872665405,375.2821350097656,2.4197980533663803,True,31,17
380,17,diag,1e-05,5,k-means++,100,7005072.772838381,6591546.5,-1152.1876,0.14115209877490997,384.4289245605469,2.4847770953101596,True,29,17
381,17,diag,1e-05,5,k-means++,300,7005072.772838381,6591546.5,-1152.1876,0.14115209877490997,384.4289245605469,2.4847770953101596,True,29,17
382,17,diag,0.0001,1,k-means++,100,7293509.772838381,6879983.5,-1203.6942,0.15238241851329803,397.42816162109375,2.107055060535422,True,15,17
383,17,diag,0.0001,1,k-means++,300,7293509.772838381,6879983.5,-1203.6942,0.15238241851329803,397.42816162109375,2.107055060535422,True,15,17
384,17,diag,0.0001,5,k-means++,100,7015674.772838381,6602148.5,-1154.0808,0.1819005310535431,394.13629150390625,2.4964933433175283,True,18,17
385,17,diag,0.0001,5,k-means++,300,7015674.772838381,6602148.5,-1154.0808,0.1819005310535431,394.13629150390625,2.4964933433175283,True,18,17
386,17,diag,0.001,1,k-means++,100,7312575.772838381,6899049.5,-1207.0989,0.15248946845531464,397.60723876953125,2.1086064619099547,True,17,17
387,17,diag,0.001,1,k-means++,300,7312575.772838381,6899049.5,-1207.0989,0.15248946845531464,397.60723876953125,2.1086064619099547,True,17,17
388,17,diag,0.001,5,k-means++,100,7034312.772838381,6620786.5,-1157.409,0.18249236047267914,394.5459289550781,2.4918179246451175,True,23,17
389,17,diag,0.001,5,k-means++,300,7034312.772838381,6620786.5,-1157.409,0.18249236047267914,394.5459289550781,2.4918179246451175,True,23,17
390,20,diag,1e-05,1,kmeans,100,6849987.045228925,6363484.5,-1107.0726,0.1538863182067871,351.1917419433594,2.4313421881484762,True,30,20
391,20,diag,1e-05,1,kmeans,300,6849987.045228925,6363484.5,-1107.0726,0.1538863182067871,351.1917419433594,2.4313421881484762,True,30,20
392,20,diag,1e-05,5,kmeans,100,6849987.045228925,6363484.5,-1107.0726,0.1538863182067871,351.1917419433594,2.4313421881484762,True,30,20
393,20,diag,1e-05,5,kmeans,300,6849987.045228925,6363484.5,-1107.0726,0.1538863182067871,351.1917419433594,2.4313421881484762,True,30,20
394,20,diag,0.0001,1,kmeans,100,6855879.045228925,6369376.5,-1108.1248,0.15445564687252045,351.1902160644531,2.4330055346823083,True,25,20
395,20,diag,0.0001,1,kmeans,300,6855879.045228925,6369376.5,-1108.1248,0.15445564687252045,351.1902160644531,2.4330055346823083,True,25,20
396,20,diag,0.0001,5,kmeans,100,6855879.045228925,6369376.5,-1108.1248,0.15445564687252045,351.1902160644531,2.4330055346823083,True,25,20
397,20,diag,0.0001,5,kmeans,300,6855879.045228925,6369376.5,-1108.1248,0.15445564687252045,351.1902160644531,2.4330055346823083,True,25,20
398,20,diag,0.001,1,kmeans,100,6875191.545228925,6388689.0,-1111.5734,0.15459507703781128,351.5787658691406,2.432923325373909,True,36,20
399,20,diag,0.001,1,kmeans,300,6875191.545228925,6388689.0,-1111.5734,0.15459507703781128,351.5787658691406,2.432923325373909,True,36,20
400,20,diag,0.001,5,kmeans,100,6875191.545228925,6388689.0,-1111.5734,0.15459507703781128,351.5787658691406,2.432923325373909,True,36,20
401,20,diag,0.001,5,kmeans,300,6875191.545228925,6388689.0,-1111.5734,0.15459507703781128,351.5787658691406,2.432923325373909,True,36,20
402,20,diag,1e-05,1,k-means++,100,6978855.045228925,6492352.5,-1130.0847,0.13519038259983063,338.322509765625,2.5026143875581077,True,24,20
403,20,diag,1e-05,1,k-means++,300,6978855.045228925,6492352.5,-1130.0847,0.13519038259983063,338.322509765625,2.5026143875581077,True,24,20
404,20,diag,1e-05,5,k-means++,100,6897127.045228925,6410624.5,-1115.4905,0.13251666724681854,352.5394592285156,2.4669189695674225,True,42,20
405,20,diag,1e-05,5,k-means++,300,6897127.045228925,6410624.5,-1115.4905,0.13251666724681854,352.5394592285156,2.4669189695674225,True,42,20
406,20,diag,0.0001,1,k-means++,100,7011968.045228925,6525465.5,-1135.9978,0.14400699734687805,344.567138671875,2.517887865440349,True,30,20
407,20,diag,0.0001,1,k-means++,300,7011968.045228925,6525465.5,-1135.9978,0.14400699734687805,344.567138671875,2.517887865440349,True,30,20
408,20,diag,0.0001,5,k-means++,100,6905988.545228925,6419486.0,-1117.0729,0.13107705116271973,351.8740234375,2.4956842864961937,True,36,20
409,20,diag,0.0001,5,k-means++,300,6905988.545228925,6419486.0,-1117.0729,0.13107705116271973,351.8740234375,2.4956842864961937,True,36,20
410,20,diag,0.001,1,k-means++,100,7031180.545228925,6544678.0,-1139.4286,0.14613750576972961,345.2534484863281,2.516432567197497,True,27,20
411,20,diag,0.001,1,k-means++,300,7031180.545228925,6544678.0,-1139.4286,0.14613750576972961,345.2534484863281,2.516432567197497,True,27,20
412,20,diag,0.001,5,k-means++,100,6918391.545228925,6431889.0,-1119.2877,0.13308578729629517,351.49005126953125,2.474649164658472,True,35,20
413,20,diag,0.001,5,k-means++,300,6918391.545228925,6431889.0,-1119.2877,0.13308578729629517,351.49005126953125,2.474649164658472,True,35,20
1 trial_id n_components covariance_type reg_covar n_init init_params max_iter bic_score aic_score log_likelihood silhouette_score calinski_harabasz_score davies_bouldin_score converged n_iter unique_clusters
2 1 2 full 0.0001 1 kmeans 100 17260132.605124418 -7679507.0 2871.501 0.36924269795417786 1331.6861572265625 1.080913887973297 True 2 2
3 2 2 full 0.0001 1 kmeans 300 17260132.605124418 -7679507.0 2871.501 0.36924269795417786 1331.6861572265625 1.080913887973297 True 2 2
4 3 2 full 0.0001 5 kmeans 100 17260132.605124418 -7679507.0 2871.501 0.36924269795417786 1331.6861572265625 1.080913887973297 True 2 2
5 4 2 full 0.0001 5 kmeans 300 17260132.605124418 -7679507.0 2871.501 0.36924269795417786 1331.6861572265625 1.080913887973297 True 2 2
6 5 2 full 0.001 1 kmeans 100 20844797.605124418 -4094842.0 2231.382 0.36924269795417786 1331.6861572265625 1.080913887973297 True 2 2
7 6 2 full 0.001 1 kmeans 300 20844797.605124418 -4094842.0 2231.382 0.36924269795417786 1331.6861572265625 1.080913887973297 True 2 2
8 7 2 full 0.001 5 kmeans 100 20844797.605124418 -4094842.0 2231.382 0.36924269795417786 1331.6861572265625 1.080913887973297 True 2 2
9 8 2 full 0.001 5 kmeans 300 20844797.605124418 -4094842.0 2231.382 0.36924269795417786 1331.6861572265625 1.080913887973297 True 2 2
10 9 2 full 0.0001 1 k-means++ 100 17370120.605124418 -7569519.0 2851.86 0.3683019280433655 1320.3240966796875 1.0772816604479254 True 3 2
11 10 2 full 0.0001 1 k-means++ 300 17370120.605124418 -7569519.0 2851.86 0.3683019280433655 1320.3240966796875 1.0772816604479254 True 3 2
12 11 2 full 0.0001 5 k-means++ 100 17270534.605124418 -7669105.0 2869.6433 0.3693194091320038 1331.4493408203125 1.0799693510874797 True 3 2
13 12 2 full 0.0001 5 k-means++ 300 17270534.605124418 -7669105.0 2869.6433 0.3693194091320038 1331.4493408203125 1.0799693510874797 True 3 2
14 13 2 full 0.001 1 k-means++ 100 20919727.605124418 -4019912.0 2218.0017 0.3683019280433655 1320.3240966796875 1.0772816604479254 True 3 2
15 14 2 full 0.001 1 k-means++ 300 20919727.605124418 -4019912.0 2218.0017 0.3683019280433655 1320.3240966796875 1.0772816604479254 True 3 2
16 15 2 full 0.001 5 k-means++ 100 20851959.605124418 -4087680.0 2230.1033 0.3693194091320038 1331.4493408203125 1.0799693510874797 True 3 2
17 16 2 full 0.001 5 k-means++ 300 20851959.605124418 -4087680.0 2230.1033 0.3693194091320038 1331.4493408203125 1.0799693510874797 True 3 2
18 17 3 full 0.0001 1 kmeans 100 33833558.37637398 -3575904.0 2888.795 0.37564584612846375 781.5426635742188 0.7905502894819209 True 2 3
19 18 3 full 0.0001 1 kmeans 300 33833558.37637398 -3575904.0 2888.795 0.37564584612846375 781.5426635742188 0.7905502894819209 True 2 3
20 19 3 full 0.0001 5 kmeans 100 26462676.376373976 -10946786.0 4205.024 0.2200196236371994 918.8184814453125 1.8325651201939497 True 2 3
21 20 3 full 0.0001 5 kmeans 300 26462676.376373976 -10946786.0 4205.024 0.2200196236371994 918.8184814453125 1.8325651201939497 True 2 3
22 21 3 full 0.001 1 kmeans 100 37452100.37637398 42638.0 2242.6267 0.37564584612846375 781.5426635742188 0.7905502894819209 True 2 3
23 22 3 full 0.001 1 kmeans 300 37452100.37637398 42638.0 2242.6267 0.37564584612846375 781.5426635742188 0.7905502894819209 True 2 3
24 23 3 full 0.001 5 kmeans 100 33411843.376373976 -3997619.0 2964.1013 0.2200196236371994 918.8184814453125 1.8325651201939497 True 2 3
25 24 3 full 0.001 5 kmeans 300 33411843.376373976 -3997619.0 2964.1013 0.2200196236371994 918.8184814453125 1.8325651201939497 True 2 3
26 25 3 full 0.001 1 k-means++ 100 37089716.37637398 -319746.0 2307.3381 0.3671606779098511 710.0538940429688 1.7711288790751016 True 3 3
27 26 3 full 0.001 1 k-means++ 300 37089716.37637398 -319746.0 2307.3381 0.3671606779098511 710.0538940429688 1.7711288790751016 True 3 3
28 27 3 full 0.001 5 k-means++ 100 33366158.376373976 -4043304.0 2972.2593 0.19688381254673004 834.6746215820312 2.265938022603405 True 3 3
29 28 3 full 0.001 5 k-means++ 300 33366158.376373976 -4043304.0 2972.2593 0.19688381254673004 834.6746215820312 2.265938022603405 True 3 3
30 29 4 full 0.0001 1 kmeans 100 44240183.14762353 -5639102.0 4007.3035 0.310958594083786 829.0513916015625 1.0906616124036408 True 2 4
31 30 4 full 0.0001 1 kmeans 300 44240183.14762353 -5639102.0 4007.3035 0.310958594083786 829.0513916015625 1.0906616124036408 True 2 4
32 31 4 full 0.0001 5 kmeans 100 38759701.14762353 -11119584.0 4985.961 0.2639731168746948 873.2352905273438 1.6723937598752374 True 2 4
33 32 4 full 0.0001 5 kmeans 300 38759701.14762353 -11119584.0 4985.961 0.2639731168746948 873.2352905273438 1.6723937598752374 True 2 4
34 33 4 full 0.001 1 kmeans 100 50532988.14762353 653703.0 2883.5884 0.310958594083786 829.0513916015625 1.0906616124036408 True 2 4
35 34 4 full 0.001 1 kmeans 300 50532988.14762353 653703.0 2883.5884 0.310958594083786 829.0513916015625 1.0906616124036408 True 2 4
36 35 4 full 0.001 5 kmeans 100 47456739.14762353 -2422546.0 3432.9185 0.2639731168746948 873.2352905273438 1.6723937598752374 True 2 4
37 36 4 full 0.001 5 kmeans 300 47456739.14762353 -2422546.0 3432.9185 0.2639731168746948 873.2352905273438 1.6723937598752374 True 2 4
38 37 4 full 0.001 1 k-means++ 100 50369343.14762353 490058.0 2912.8108 0.16492997109889984 579.3992309570312 2.14603204385876 True 3 4
39 38 4 full 0.001 1 k-means++ 300 50369343.14762353 490058.0 2912.8108 0.16492997109889984 579.3992309570312 2.14603204385876 True 3 4
40 39 4 full 0.001 5 k-means++ 100 48104059.14762353 -1775226.0 3317.3257 0.19116489589214325 729.421630859375 2.391271825318095 True 3 4
41 40 4 full 0.001 5 k-means++ 300 48104059.14762353 -1775226.0 3317.3257 0.19116489589214325 729.421630859375 2.391271825318095 True 3 4
42 41 5 full 0.0001 1 kmeans 100 60034171.91887309 -2314936.0 4163.7827 0.3162730038166046 780.908935546875 1.0143329238161003 True 2 5
43 42 5 full 0.0001 1 kmeans 300 60034171.91887309 -2314936.0 4163.7827 0.3162730038166046 780.908935546875 1.0143329238161003 True 2 5
44 43 5 full 0.0001 5 kmeans 100 54230057.91887309 -8119050.0 5200.232 0.25592249631881714 721.9691162109375 1.585169064053077 True 2 5
45 44 5 full 0.0001 5 kmeans 300 54230057.91887309 -8119050.0 5200.232 0.25592249631881714 721.9691162109375 1.585169064053077 True 2 5
46 45 5 full 0.001 1 kmeans 100 66698922.91887309 4349815.0 2973.6487 0.3162730038166046 780.908935546875 1.0143329238161003 True 2 5
47 46 5 full 0.001 1 kmeans 300 66698922.91887309 4349815.0 2973.6487 0.3162730038166046 780.908935546875 1.0143329238161003 True 2 5
48 47 5 full 0.001 5 kmeans 100 63375271.91887309 1026164.0 3567.158 0.25592249631881714 721.9691162109375 1.585169064053077 True 2 5
49 48 5 full 0.001 5 kmeans 300 63375271.91887309 1026164.0 3567.158 0.25592249631881714 721.9691162109375 1.585169064053077 True 2 5
50 49 5 full 0.001 1 k-means++ 100 66517075.91887309 4167968.0 3006.1213 0.16880138218402863 549.265625 1.8881643569801063 True 3 5
51 50 5 full 0.001 1 k-means++ 300 66517075.91887309 4167968.0 3006.1213 0.16880138218402863 549.265625 1.8881643569801063 True 3 5
52 51 5 full 0.001 5 k-means++ 100 63364071.91887309 1014964.0 3569.158 0.25286975502967834 715.984619140625 1.6093410042807197 True 3 5
53 52 5 full 0.001 5 k-means++ 300 63364071.91887309 1014964.0 3569.158 0.25286975502967834 715.984619140625 1.6093410042807197 True 3 5
54 53 6 full 0.0001 1 kmeans 100 73062550.69012265 -1756380.0 4814.121 0.24853873252868652 668.8661499023438 1.646429379523011 True 2 6
55 54 6 full 0.0001 1 kmeans 300 73062550.69012265 -1756380.0 4814.121 0.24853873252868652 668.8661499023438 1.646429379523011 True 2 6
56 55 6 full 0.0001 5 kmeans 100 69890932.69012265 -4927998.0 5380.4814 0.27074411511421204 655.6273193359375 1.6294192539951549 True 2 6
57 56 6 full 0.0001 5 kmeans 300 69890932.69012265 -4927998.0 5380.4814 0.27074411511421204 655.6273193359375 1.6294192539951549 True 2 6
58 57 6 full 0.001 1 kmeans 100 81179056.69012265 6360126.0 3364.745 0.24853873252868652 668.8661499023438 1.646429379523011 True 2 6
59 58 6 full 0.001 1 kmeans 300 81179056.69012265 6360126.0 3364.745 0.24853873252868652 668.8661499023438 1.646429379523011 True 2 6
60 59 6 full 0.001 5 kmeans 100 79356620.69012265 4537690.0 3690.18 0.27074411511421204 655.6273193359375 1.6294192539951549 True 2 6
61 60 6 full 0.001 5 kmeans 300 79356620.69012265 4537690.0 3690.18 0.27074411511421204 655.6273193359375 1.6294192539951549 True 2 6
62 61 6 full 1e-05 1 k-means++ 100 68753328.69012265 -6065602.0 5583.625 0.17110876739025116 480.8229675292969 1.5572656008570327 True 3 6
63 62 6 full 1e-05 1 k-means++ 300 68753328.69012265 -6065602.0 5583.625 0.17110876739025116 480.8229675292969 1.5572656008570327 True 3 6
64 63 6 full 0.0001 1 k-means++ 100 75948732.69012265 1129802.0 4298.7314 0.17110876739025116 480.8229675292969 1.5572656008570327 True 3 6
65 64 6 full 0.0001 1 k-means++ 300 75948732.69012265 1129802.0 4298.7314 0.17110876739025116 480.8229675292969 1.5572656008570327 True 3 6
66 65 6 full 0.0001 5 k-means++ 100 69381502.69012265 -5437428.0 5471.451 0.21281521022319794 580.2518920898438 2.180165862436555 True 3 6
67 66 6 full 0.0001 5 k-means++ 300 69381502.69012265 -5437428.0 5471.451 0.21281521022319794 580.2518920898438 2.180165862436555 True 3 6
68 67 6 full 0.001 1 k-means++ 100 83185656.69012265 8366726.0 3006.4236 0.17110876739025116 480.8229675292969 1.5572656008570327 True 3 6
69 68 6 full 0.001 1 k-means++ 300 83185656.69012265 8366726.0 3006.4236 0.17110876739025116 480.8229675292969 1.5572656008570327 True 3 6
70 69 6 full 0.001 5 k-means++ 100 79079084.69012265 4260154.0 3739.74 0.21281521022319794 580.2518920898438 2.180165862436555 True 3 6
71 70 6 full 0.001 5 k-means++ 300 79079084.69012265 4260154.0 3739.74 0.21281521022319794 580.2518920898438 2.180165862436555 True 3 6
72 71 8 full 0.0001 1 kmeans 100 101322900.23262176 1564326.0 5721.299 0.2680038809776306 557.3214721679688 1.5652706740038278 True 2 8
73 72 8 full 0.0001 1 kmeans 300 101322900.23262176 1564326.0 5721.299 0.2680038809776306 557.3214721679688 1.5652706740038278 True 2 8
74 73 8 full 0.0001 5 kmeans 100 100810002.23262176 1051428.0 5812.8877 0.27481919527053833 642.9092407226562 1.6967101819134367 True 2 8
75 74 8 full 0.0001 5 kmeans 300 100810002.23262176 1051428.0 5812.8877 0.27481919527053833 642.9092407226562 1.6967101819134367 True 2 8
76 75 8 full 0.001 1 kmeans 100 111448618.23262176 11690044.0 3913.135 0.2680038809776306 557.3214721679688 1.5652706740038278 True 2 8
77 76 8 full 0.001 1 kmeans 300 111448618.23262176 11690044.0 3913.135 0.2680038809776306 557.3214721679688 1.5652706740038278 True 2 8
78 77 8 full 0.001 5 kmeans 100 111172686.23262176 11414112.0 3962.4087 0.27481919527053833 642.9092407226562 1.6967101819134367 True 2 8
79 78 8 full 0.001 5 kmeans 300 111172686.23262176 11414112.0 3962.4087 0.27481919527053833 642.9092407226562 1.6967101819134367 True 2 8
80 79 8 full 0.001 1 k-means++ 100 111979964.23262176 12221390.0 3818.2517 0.2020130306482315 465.200927734375 1.9463124697846808 True 3 8
81 80 8 full 0.001 1 k-means++ 300 111979964.23262176 12221390.0 3818.2517 0.2020130306482315 465.200927734375 1.9463124697846808 True 3 8
82 81 8 full 0.001 5 k-means++ 100 111327662.23262176 11569088.0 3934.7344 0.2736768126487732 617.4371948242188 1.7398856934277325 True 3 8
83 82 8 full 0.001 5 k-means++ 300 111327662.23262176 11569088.0 3934.7344 0.2736768126487732 617.4371948242188 1.7398856934277325 True 3 8
84 83 10 full 0.0001 1 kmeans 100 133265705.77512088 8567482.0 5970.8955 0.24388161301612854 576.40185546875 1.5385559411472558 True 2 10
85 84 10 full 0.0001 1 kmeans 300 133265705.77512088 8567482.0 5970.8955 0.24388161301612854 576.40185546875 1.5385559411472558 True 2 10
86 85 10 full 0.0001 5 kmeans 100 132892239.77512088 8194016.0 6037.586 0.28627628087997437 557.144775390625 1.6716653781194553 True 2 10
87 86 10 full 0.0001 5 kmeans 300 132892239.77512088 8194016.0 6037.586 0.28627628087997437 557.144775390625 1.6716653781194553 True 2 10
88 87 10 full 0.001 1 kmeans 100 143970687.77512088 19272464.0 4059.2915 0.24388161301612854 576.40185546875 1.5385559411472558 True 2 10
89 88 10 full 0.001 1 kmeans 300 143970687.77512088 19272464.0 4059.2915 0.24388161301612854 576.40185546875 1.5385559411472558 True 2 10
90 89 10 full 0.001 5 kmeans 100 143652495.77512088 18954272.0 4116.1113 0.28627628087997437 557.144775390625 1.6716653781194553 True 2 10
91 90 10 full 0.001 5 kmeans 300 143652495.77512088 18954272.0 4116.1113 0.28627628087997437 557.144775390625 1.6716653781194553 True 2 10
92 91 10 full 0.001 1 k-means++ 100 144482919.77512088 19784696.0 3967.8215 0.17508849501609802 474.4588928222656 1.846488092509191 True 3 10
93 92 10 full 0.001 1 k-means++ 300 144482919.77512088 19784696.0 3967.8215 0.17508849501609802 474.4588928222656 1.846488092509191 True 3 10
94 93 10 full 0.001 5 k-means++ 100 144071547.77512088 19373324.0 4041.2808 0.22849640250205994 521.3035278320312 1.9523215129883376 True 3 10
95 94 10 full 0.001 5 k-means++ 300 144071547.77512088 19373324.0 4041.2808 0.22849640250205994 521.3035278320312 1.9523215129883376 True 3 10
96 95 11 full 0.0001 1 kmeans 100 149128048.54637042 11960004.0 6115.1685 0.2577499449253082 598.6676635742188 1.5193188313170118 True 2 11
97 96 11 full 0.0001 1 kmeans 300 149128048.54637042 11960004.0 6115.1685 0.2577499449253082 598.6676635742188 1.5193188313170118 True 2 11
98 97 11 full 0.0001 5 kmeans 100 149128048.54637042 11960004.0 6115.1685 0.2577499449253082 598.6676635742188 1.5193188313170118 True 2 11
99 98 11 full 0.0001 5 kmeans 300 149128048.54637042 11960004.0 6115.1685 0.2577499449253082 598.6676635742188 1.5193188313170118 True 2 11
100 99 11 full 0.001 1 kmeans 100 160074600.54637042 22906556.0 4160.4272 0.2577499449253082 598.6676635742188 1.5193188313170118 True 2 11
101 100 11 full 0.001 1 kmeans 300 160074600.54637042 22906556.0 4160.4272 0.2577499449253082 598.6676635742188 1.5193188313170118 True 2 11
102 101 11 full 0.001 5 kmeans 100 160074600.54637042 22906556.0 4160.4272 0.2577499449253082 598.6676635742188 1.5193188313170118 True 2 11
103 102 11 full 0.001 5 kmeans 300 160074600.54637042 22906556.0 4160.4272 0.2577499449253082 598.6676635742188 1.5193188313170118 True 2 11
104 103 11 full 0.001 1 k-means++ 100 160636770.54637042 23468726.0 4060.0396 0.18649740517139435 485.63348388671875 1.8009971426865101 True 3 11
105 104 11 full 0.001 1 k-means++ 300 160636770.54637042 23468726.0 4060.0396 0.18649740517139435 485.63348388671875 1.8009971426865101 True 3 11
106 105 11 full 0.001 5 k-means++ 100 160636770.54637042 23468726.0 4060.0396 0.18649740517139435 485.63348388671875 1.8009971426865101 True 3 11
107 106 11 full 0.001 5 k-means++ 300 160636770.54637042 23468726.0 4060.0396 0.18649740517139435 485.63348388671875 1.8009971426865101 True 3 11
108 107 14 full 0.0001 1 kmeans 100 198149922.8601191 23572408.0 6291.7656 0.21188320219516754 491.18792724609375 1.7082811638393387 True 2 14
109 108 14 full 0.0001 1 kmeans 300 198149922.8601191 23572408.0 6291.7656 0.21188320219516754 491.18792724609375 1.7082811638393387 True 2 14
110 109 14 full 0.0001 5 kmeans 100 197540314.8601191 22962800.0 6400.6245 0.20964229106903076 496.4472351074219 1.950038464238459 True 2 14
111 110 14 full 0.0001 5 kmeans 300 197540314.8601191 22962800.0 6400.6245 0.20964229106903076 496.4472351074219 1.950038464238459 True 2 14
112 111 14 full 0.001 1 kmeans 100 209401674.8601191 34824160.0 4282.5244 0.21188320219516754 491.18792724609375 1.7082811638393387 True 2 14
113 112 14 full 0.001 1 kmeans 300 209401674.8601191 34824160.0 4282.5244 0.21188320219516754 491.18792724609375 1.7082811638393387 True 2 14
114 113 14 full 0.001 5 kmeans 100 208994740.8601191 34417224.0 4355.191 0.20964229106903076 496.4472351074219 1.950038464238459 True 2 14
115 114 14 full 0.001 5 kmeans 300 208994740.8601191 34417224.0 4355.191 0.20964229106903076 496.4472351074219 1.950038464238459 True 2 14
116 115 14 full 0.0001 1 k-means++ 100 197987434.8601191 23409920.0 6320.7812 0.16694776713848114 449.8548889160156 1.8331922544784534 True 3 14
117 116 14 full 0.0001 1 k-means++ 300 197987434.8601191 23409920.0 6320.7812 0.16694776713848114 449.8548889160156 1.8331922544784534 True 3 14
118 117 14 full 0.0001 5 k-means++ 100 197987434.8601191 23409920.0 6320.7812 0.16694776713848114 449.8548889160156 1.8331922544784534 True 3 14
119 118 14 full 0.0001 5 k-means++ 300 197987434.8601191 23409920.0 6320.7812 0.16694776713848114 449.8548889160156 1.8331922544784534 True 3 14
120 119 14 full 0.001 1 k-means++ 100 209335602.8601191 34758090.0 4294.3228 0.16694776713848114 449.8548889160156 1.8331922544784534 True 3 14
121 120 14 full 0.001 1 k-means++ 300 209335602.8601191 34758090.0 4294.3228 0.16694776713848114 449.8548889160156 1.8331922544784534 True 3 14
122 121 14 full 0.001 5 k-means++ 100 209335602.8601191 34758090.0 4294.3228 0.16694776713848114 449.8548889160156 1.8331922544784534 True 3 14
123 122 14 full 0.001 5 k-means++ 300 209335602.8601191 34758090.0 4294.3228 0.16694776713848114 449.8548889160156 1.8331922544784534 True 3 14
124 123 17 full 0.0001 1 kmeans 100 247173509.17386776 35186530.0 6468.057 0.1834743171930313 427.35272216796875 1.8956740098304399 True 2 17
125 124 17 full 0.0001 1 kmeans 300 247173509.17386776 35186530.0 6468.057 0.1834743171930313 427.35272216796875 1.8956740098304399 True 2 17
126 125 17 full 0.0001 5 kmeans 100 246850361.17386776 34863380.0 6525.762 0.2085043042898178 427.7763977050781 1.9588585142518828 True 2 17
127 126 17 full 0.0001 5 kmeans 300 246850361.17386776 34863380.0 6525.762 0.2085043042898178 427.7763977050781 1.9588585142518828 True 2 17
128 127 17 full 0.001 1 kmeans 100 258736973.17386776 46749990.0 4403.153 0.1834743171930313 427.35272216796875 1.8956740098304399 True 2 17
129 128 17 full 0.001 1 kmeans 300 258736973.17386776 46749990.0 4403.153 0.1834743171930313 427.35272216796875 1.8956740098304399 True 2 17
130 129 17 full 0.001 5 kmeans 100 258504801.17386776 46517820.0 4444.6123 0.2085043042898178 427.7763977050781 1.9588585142518828 True 2 17
131 130 17 full 0.001 5 kmeans 300 258504801.17386776 46517820.0 4444.6123 0.2085043042898178 427.7763977050781 1.9588585142518828 True 2 17
132 131 17 full 0.0001 1 k-means++ 100 247607397.17386776 35620416.0 6390.577 0.14455115795135498 384.99053955078125 2.108500185002096 True 3 17
133 132 17 full 0.0001 1 k-means++ 300 247607397.17386776 35620416.0 6390.577 0.14455115795135498 384.99053955078125 2.108500185002096 True 3 17
134 133 17 full 0.0001 5 k-means++ 100 246784997.17386776 34798016.0 6537.434 0.13458234071731567 386.3608093261719 2.3614049227531075 True 3 17
135 134 17 full 0.0001 5 k-means++ 300 246784997.17386776 34798016.0 6537.434 0.13458234071731567 386.3608093261719 2.3614049227531075 True 3 17
136 135 17 full 0.001 1 k-means++ 100 259055585.17386776 47068604.0 4346.258 0.14455115795135498 384.99053955078125 2.108500185002096 True 3 17
137 136 17 full 0.001 1 k-means++ 300 259055585.17386776 47068604.0 4346.258 0.14455115795135498 384.99053955078125 2.108500185002096 True 3 17
138 137 17 full 0.001 5 k-means++ 100 258522869.17386776 46535890.0 4441.3857 0.13458234071731567 386.3608093261719 2.3614049227531075 True 3 17
139 138 17 full 0.001 5 k-means++ 300 258522869.17386776 46535890.0 4441.3857 0.13458234071731567 386.3608093261719 2.3614049227531075 True 3 17
140 139 20 full 0.0001 1 kmeans 100 296473639.4876164 47077190.0 6594.966 0.1770476996898651 382.437744140625 1.8608292401058428 True 2 20
141 140 20 full 0.0001 1 kmeans 300 296473639.4876164 47077190.0 6594.966 0.1770476996898651 382.437744140625 1.8608292401058428 True 2 20
142 141 20 full 0.001 1 kmeans 100 308235301.4876164 58838856.0 4494.669 0.1770476996898651 382.437744140625 1.8608292401058428 True 2 20
143 142 20 full 0.001 1 kmeans 300 308235301.4876164 58838856.0 4494.669 0.1770476996898651 382.437744140625 1.8608292401058428 True 2 20
144 143 20 full 0.001 5 kmeans 100 307947927.4876164 58551480.0 4545.986 0.12884767353534698 377.9795227050781 2.0180962938149367 True 2 20
145 144 20 full 0.001 5 kmeans 300 307947927.4876164 58551480.0 4545.986 0.12884767353534698 377.9795227050781 2.0180962938149367 True 2 20
146 145 20 full 0.0001 1 k-means++ 100 297139767.4876164 47743320.0 6476.014 0.13996723294258118 336.5575866699219 2.2953358196957456 True 3 20
147 146 20 full 0.0001 1 k-means++ 300 297139767.4876164 47743320.0 6476.014 0.13996723294258118 336.5575866699219 2.2953358196957456 True 3 20
148 147 20 full 0.001 1 k-means++ 100 308712155.4876164 59315708.0 4409.5166 0.13996723294258118 336.5575866699219 2.2953358196957456 True 3 20
149 148 20 full 0.001 1 k-means++ 300 308712155.4876164 59315708.0 4409.5166 0.13996723294258118 336.5575866699219 2.2953358196957456 True 3 20
150 149 20 full 0.001 5 k-means++ 100 308599855.4876164 59203410.0 4429.57 0.15204866230487823 341.3536376953125 2.231048217195437 True 3 20
151 150 20 full 0.001 5 k-means++ 300 308599855.4876164 59203410.0 4429.57 0.15204866230487823 341.3536376953125 2.231048217195437 True 3 20
152 151 2 diag 1e-05 1 kmeans 100 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
153 152 2 diag 1e-05 1 kmeans 300 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
154 153 2 diag 1e-05 5 kmeans 100 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
155 154 2 diag 1e-05 5 kmeans 300 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
156 155 2 diag 0.0001 1 kmeans 100 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
157 156 2 diag 0.0001 1 kmeans 300 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
158 157 2 diag 0.0001 5 kmeans 100 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
159 158 2 diag 0.0001 5 kmeans 300 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
160 159 2 diag 0.001 1 kmeans 100 13089203.910885666 13040559.0 -2325.745 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
161 160 2 diag 0.001 1 kmeans 300 13089203.910885666 13040559.0 -2325.745 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
162 161 2 diag 0.001 5 kmeans 100 13089203.910885666 13040559.0 -2325.745 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
163 162 2 diag 0.001 5 kmeans 300 13089203.910885666 13040559.0 -2325.745 0.36971479654312134 1327.397216796875 1.073152783729392 True 4 2
164 163 2 diag 1e-05 1 k-means++ 100 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 6 2
165 164 2 diag 1e-05 1 k-means++ 300 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 6 2
166 165 2 diag 1e-05 5 k-means++ 100 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 6 2
167 166 2 diag 1e-05 5 k-means++ 300 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 6 2
168 167 2 diag 0.0001 1 k-means++ 100 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 5 2
169 168 2 diag 0.0001 1 k-means++ 300 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 5 2
170 169 2 diag 0.0001 5 k-means++ 100 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 5 2
171 170 2 diag 0.0001 5 k-means++ 300 13089173.910885666 13040529.0 -2325.7397 0.36971479654312134 1327.397216796875 1.073152783729392 True 5 2
172 171 2 diag 0.001 1 k-means++ 100 13089203.910885666 13040559.0 -2325.745 0.36971479654312134 1327.397216796875 1.073152783729392 True 5 2
173 172 2 diag 0.001 1 k-means++ 300 13089203.910885666 13040559.0 -2325.745 0.36971479654312134 1327.397216796875 1.073152783729392 True 5 2
174 173 2 diag 0.001 5 k-means++ 100 13089203.910885666 13040559.0 -2325.745 0.36971479654312134 1327.397216796875 1.073152783729392 True 5 2
175 174 2 diag 0.001 5 k-means++ 300 13089203.910885666 13040559.0 -2325.745 0.36971479654312134 1327.397216796875 1.073152783729392 True 5 2
176 175 3 diag 1e-05 1 kmeans 100 12693850.335015846 12620880.0 -2249.3394 0.3760926127433777 779.2965087890625 0.7860265546274455 True 6 3
177 176 3 diag 1e-05 1 kmeans 300 12693850.335015846 12620880.0 -2249.3394 0.3760926127433777 779.2965087890625 0.7860265546274455 True 6 3
178 177 3 diag 1e-05 5 kmeans 100 11770626.335015846 11697656.0 -2084.4778 0.1531982570886612 837.4287719726562 1.6999940518251055 True 19 3
179 178 3 diag 1e-05 5 kmeans 300 11770626.335015846 11697656.0 -2084.4778 0.1531982570886612 837.4287719726562 1.6999940518251055 True 19 3
180 179 3 diag 0.0001 1 kmeans 100 12699627.335015846 12626657.0 -2250.3708 0.3760926127433777 779.2965087890625 0.7860265546274455 True 6 3
181 180 3 diag 0.0001 1 kmeans 300 12699627.335015846 12626657.0 -2250.3708 0.3760926127433777 779.2965087890625 0.7860265546274455 True 6 3
182 181 3 diag 0.0001 5 kmeans 100 11770626.335015846 11697656.0 -2084.4778 0.1531982570886612 837.4287719726562 1.6999940518251055 True 20 3
183 182 3 diag 0.0001 5 kmeans 300 11770626.335015846 11697656.0 -2084.4778 0.1531982570886612 837.4287719726562 1.6999940518251055 True 20 3
184 183 3 diag 0.001 1 kmeans 100 12718245.335015846 12645275.0 -2253.6956 0.3760926127433777 779.2965087890625 0.7860265546274455 True 7 3
185 184 3 diag 0.001 1 kmeans 300 12718245.335015846 12645275.0 -2253.6956 0.3760926127433777 779.2965087890625 0.7860265546274455 True 7 3
186 185 3 diag 0.001 5 kmeans 100 11770859.335015846 11697889.0 -2084.5195 0.15369778871536255 838.0372924804688 1.7007544548321498 True 19 3
187 186 3 diag 0.001 5 kmeans 300 11770859.335015846 11697889.0 -2084.5195 0.15369778871536255 838.0372924804688 1.7007544548321498 True 19 3
188 187 3 diag 1e-05 1 k-means++ 100 11686081.335015846 11613111.0 -2069.3806 0.2351498007774353 882.5064086914062 2.071681816869212 True 19 3
189 188 3 diag 1e-05 1 k-means++ 300 11686081.335015846 11613111.0 -2069.3806 0.2351498007774353 882.5064086914062 2.071681816869212 True 19 3
190 189 3 diag 1e-05 5 k-means++ 100 11686081.335015846 11613111.0 -2069.3806 0.2351498007774353 882.5064086914062 2.071681816869212 True 19 3
191 190 3 diag 1e-05 5 k-means++ 300 11686081.335015846 11613111.0 -2069.3806 0.2351498007774353 882.5064086914062 2.071681816869212 True 19 3
192 191 3 diag 0.0001 1 k-means++ 100 11686083.335015846 11613113.0 -2069.3809 0.2351498007774353 882.5064086914062 2.071681816869212 True 21 3
193 192 3 diag 0.0001 1 k-means++ 300 11686083.335015846 11613113.0 -2069.3809 0.2351498007774353 882.5064086914062 2.071681816869212 True 21 3
194 193 3 diag 0.0001 5 k-means++ 100 11686083.335015846 11613113.0 -2069.3809 0.2351498007774353 882.5064086914062 2.071681816869212 True 21 3
195 194 3 diag 0.0001 5 k-means++ 300 11686083.335015846 11613113.0 -2069.3809 0.2351498007774353 882.5064086914062 2.071681816869212 True 21 3
196 195 3 diag 0.001 1 k-means++ 100 11686162.335015846 11613192.0 -2069.395 0.2351498007774353 882.5064086914062 2.071681816869212 True 22 3
197 196 3 diag 0.001 1 k-means++ 300 11686162.335015846 11613192.0 -2069.395 0.2351498007774353 882.5064086914062 2.071681816869212 True 22 3
198 197 3 diag 0.001 5 k-means++ 100 11686154.335015846 11613184.0 -2069.3936 0.2351498007774353 882.5064697265625 2.071681816869212 True 13 3
199 198 3 diag 0.001 5 k-means++ 300 11686154.335015846 11613184.0 -2069.3936 0.2351498007774353 882.5064697265625 2.071681816869212 True 13 3
200 199 4 diag 1e-05 1 kmeans 100 11525150.759146027 11427855.0 -2034.8359 0.3090108335018158 828.0037841796875 1.0965690514458653 True 4 4
201 200 4 diag 1e-05 1 kmeans 300 11525150.759146027 11427855.0 -2034.8359 0.3090108335018158 828.0037841796875 1.0965690514458653 True 4 4
202 201 4 diag 1e-05 5 kmeans 100 10872145.759146027 10774850.0 -1918.2279 0.22238799929618835 665.7476806640625 2.503304023275595 True 17 4
203 202 4 diag 1e-05 5 kmeans 300 10872145.759146027 10774850.0 -1918.2279 0.22238799929618835 665.7476806640625 2.503304023275595 True 17 4
204 203 4 diag 0.0001 1 kmeans 100 11530927.759146027 11433632.0 -2035.8676 0.3090108335018158 828.0037841796875 1.0965690514458653 True 4 4
205 204 4 diag 0.0001 1 kmeans 300 11530927.759146027 11433632.0 -2035.8676 0.3090108335018158 828.0037841796875 1.0965690514458653 True 4 4
206 205 4 diag 0.0001 5 kmeans 100 10872147.759146027 10774852.0 -1918.2283 0.22238799929618835 665.7476806640625 2.503304023275595 True 16 4
207 206 4 diag 0.0001 5 kmeans 300 10872147.759146027 10774852.0 -1918.2283 0.22238799929618835 665.7476806640625 2.503304023275595 True 16 4
208 207 4 diag 0.001 1 kmeans 100 11549555.759146027 11452260.0 -2039.194 0.3090108335018158 828.0037841796875 1.0965690514458653 True 4 4
209 208 4 diag 0.001 1 kmeans 300 11549555.759146027 11452260.0 -2039.194 0.3090108335018158 828.0037841796875 1.0965690514458653 True 4 4
210 209 4 diag 0.001 5 kmeans 100 10872103.759146027 10774808.0 -1918.2203 0.2233457714319229 667.0189208984375 2.500263810780557 True 18 4
211 210 4 diag 0.001 5 kmeans 300 10872103.759146027 10774808.0 -1918.2203 0.2233457714319229 667.0189208984375 2.500263810780557 True 18 4
212 211 4 diag 1e-05 1 k-means++ 100 10871971.759146027 10774676.0 -1918.1968 0.22509750723838806 669.1204223632812 2.4944239618747446 True 17 4
213 212 4 diag 1e-05 1 k-means++ 300 10871971.759146027 10774676.0 -1918.1968 0.22509750723838806 669.1204223632812 2.4944239618747446 True 17 4
214 213 4 diag 1e-05 5 k-means++ 100 10865268.759146027 10767973.0 -1916.9999 0.18924137949943542 715.3873291015625 1.9068310882928445 True 23 4
215 214 4 diag 1e-05 5 k-means++ 300 10865268.759146027 10767973.0 -1916.9999 0.18924137949943542 715.3873291015625 1.9068310882928445 True 23 4
216 215 4 diag 0.0001 1 k-means++ 100 11379467.759146027 11282172.0 -2008.821 0.23593758046627045 682.464599609375 1.5970337323460135 True 12 4
217 216 4 diag 0.0001 1 k-means++ 300 11379467.759146027 11282172.0 -2008.821 0.23593758046627045 682.464599609375 1.5970337323460135 True 12 4
218 217 4 diag 0.0001 5 k-means++ 100 10872147.759146027 10774852.0 -1918.2283 0.22238799929618835 665.7476806640625 2.503304023275595 True 26 4
219 218 4 diag 0.0001 5 k-means++ 300 10872147.759146027 10774852.0 -1918.2283 0.22238799929618835 665.7476806640625 2.503304023275595 True 26 4
220 219 4 diag 0.001 1 k-means++ 100 11398228.759146027 11300933.0 -2012.1713 0.23644769191741943 683.3140869140625 1.5953322750132166 True 13 4
221 220 4 diag 0.001 1 k-means++ 300 11398228.759146027 11300933.0 -2012.1713 0.23644769191741943 683.3140869140625 1.5953322750132166 True 13 4
222 221 4 diag 0.001 5 k-means++ 100 10872234.759146027 10774939.0 -1918.2438 0.22265465557575226 666.09033203125 2.5026321909350457 True 24 4
223 222 4 diag 0.001 5 k-means++ 300 10872234.759146027 10774939.0 -1918.2438 0.22265465557575226 666.09033203125 2.5026321909350457 True 24 4
224 223 5 diag 1e-05 1 kmeans 100 10641753.183276208 10520132.0 -1871.2793 0.3118983507156372 774.8809814453125 1.0195520325095044 True 5 5
225 224 5 diag 1e-05 1 kmeans 300 10641753.183276208 10520132.0 -1871.2793 0.3118983507156372 774.8809814453125 1.0195520325095044 True 5 5
226 225 5 diag 1e-05 5 kmeans 100 10324953.183276208 10203332.0 -1814.7079 0.21296893060207367 639.6068115234375 1.5943357881476847 True 20 5
227 226 5 diag 1e-05 5 kmeans 300 10324953.183276208 10203332.0 -1814.7079 0.21296893060207367 639.6068115234375 1.5943357881476847 True 20 5
228 227 5 diag 0.0001 1 kmeans 100 10647529.183276208 10525908.0 -1872.3107 0.3118983507156372 774.8809814453125 1.0195520325095044 True 5 5
229 228 5 diag 0.0001 1 kmeans 300 10647529.183276208 10525908.0 -1872.3107 0.3118983507156372 774.8809814453125 1.0195520325095044 True 5 5
230 229 5 diag 0.0001 5 kmeans 100 10324954.183276208 10203333.0 -1814.708 0.21296893060207367 639.6068115234375 1.5943357881476847 True 20 5
231 230 5 diag 0.0001 5 kmeans 300 10324954.183276208 10203333.0 -1814.708 0.21296893060207367 639.6068115234375 1.5943357881476847 True 20 5
232 231 5 diag 0.001 1 kmeans 100 10666196.183276208 10544575.0 -1875.6442 0.3118983507156372 774.8809814453125 1.0195520325095044 True 5 5
233 232 5 diag 0.001 1 kmeans 300 10666196.183276208 10544575.0 -1875.6442 0.3118983507156372 774.8809814453125 1.0195520325095044 True 5 5
234 233 5 diag 0.001 5 kmeans 100 10327782.183276208 10206161.0 -1815.213 0.2155037522315979 645.0463256835938 1.5864905576853905 True 23 5
235 234 5 diag 0.001 5 kmeans 300 10327782.183276208 10206161.0 -1815.213 0.2155037522315979 645.0463256835938 1.5864905576853905 True 23 5
236 235 5 diag 1e-05 1 k-means++ 100 9931250.183276208 9809629.0 -1744.4038 0.2225552350282669 602.316162109375 2.189639810006293 True 17 5
237 236 5 diag 1e-05 1 k-means++ 300 9931250.183276208 9809629.0 -1744.4038 0.2225552350282669 602.316162109375 2.189639810006293 True 17 5
238 237 5 diag 1e-05 5 k-means++ 100 9931250.183276208 9809629.0 -1744.4038 0.2225552350282669 602.316162109375 2.189639810006293 True 17 5
239 238 5 diag 1e-05 5 k-means++ 300 9931250.183276208 9809629.0 -1744.4038 0.2225552350282669 602.316162109375 2.189639810006293 True 17 5
240 239 5 diag 0.0001 1 k-means++ 100 10466296.183276208 10344675.0 -1839.9476 0.24029850959777832 631.74609375 1.4320268333089838 True 14 5
241 240 5 diag 0.0001 1 k-means++ 300 10466296.183276208 10344675.0 -1839.9476 0.24029850959777832 631.74609375 1.4320268333089838 True 14 5
242 241 5 diag 0.0001 5 k-means++ 100 9948819.183276208 9827198.0 -1747.541 0.19564315676689148 662.848876953125 1.6781877684435718 True 17 5
243 242 5 diag 0.0001 5 k-means++ 300 9948819.183276208 9827198.0 -1747.541 0.19564315676689148 662.848876953125 1.6781877684435718 True 17 5
244 243 5 diag 0.001 1 k-means++ 100 10485035.183276208 10363414.0 -1843.294 0.24058617651462555 631.865478515625 1.432154375281304 True 17 5
245 244 5 diag 0.001 1 k-means++ 300 10485035.183276208 10363414.0 -1843.294 0.24058617651462555 631.865478515625 1.432154375281304 True 17 5
246 245 5 diag 0.001 5 k-means++ 100 9949011.183276208 9827390.0 -1747.5753 0.19609026610851288 663.4921264648438 1.6774180513139325 True 19 5
247 246 5 diag 0.001 5 k-means++ 300 9949011.183276208 9827390.0 -1747.5753 0.19609026610851288 663.4921264648438 1.6774180513139325 True 19 5
248 247 6 diag 1e-05 1 kmeans 100 9799004.60740639 9653058.0 -1714.9814 0.26129186153411865 629.4482421875 1.9915981688116708 True 15 6
249 248 6 diag 1e-05 1 kmeans 300 9799004.60740639 9653058.0 -1714.9814 0.26129186153411865 629.4482421875 1.9915981688116708 True 15 6
250 249 6 diag 1e-05 5 kmeans 100 9102218.60740639 8956272.0 -1590.5553 0.21352295577526093 566.8534545898438 2.13510416879353 True 19 6
251 250 6 diag 1e-05 5 kmeans 300 9102218.60740639 8956272.0 -1590.5553 0.21352295577526093 566.8534545898438 2.13510416879353 True 19 6
252 251 6 diag 0.0001 1 kmeans 100 9804784.60740639 9658838.0 -1716.0135 0.26129186153411865 629.4482421875 1.9915981688116708 True 15 6
253 252 6 diag 0.0001 1 kmeans 300 9804784.60740639 9658838.0 -1716.0135 0.26129186153411865 629.4482421875 1.9915981688116708 True 15 6
254 253 6 diag 0.0001 5 kmeans 100 9102220.60740639 8956274.0 -1590.5557 0.21352295577526093 566.8534545898438 2.13510416879353 True 19 6
255 254 6 diag 0.0001 5 kmeans 300 9102220.60740639 8956274.0 -1590.5557 0.21352295577526093 566.8534545898438 2.13510416879353 True 19 6
256 255 6 diag 0.001 1 kmeans 100 9823540.60740639 9677594.0 -1719.3629 0.26235219836235046 629.54638671875 1.991657340604508 True 16 6
257 256 6 diag 0.001 1 kmeans 300 9823540.60740639 9677594.0 -1719.3629 0.26235219836235046 629.54638671875 1.991657340604508 True 16 6
258 257 6 diag 0.001 5 kmeans 100 9102520.60740639 8956574.0 -1590.6093 0.21336553990840912 567.1337890625 2.137562111985855 True 28 6
259 258 6 diag 0.001 5 kmeans 300 9102520.60740639 8956574.0 -1590.6093 0.21336553990840912 567.1337890625 2.137562111985855 True 28 6
260 259 6 diag 1e-05 1 k-means++ 100 9646063.60740639 9500117.0 -1687.6705 0.20277422666549683 612.2411499023438 1.4227889323530194 True 20 6
261 260 6 diag 1e-05 1 k-means++ 300 9646063.60740639 9500117.0 -1687.6705 0.20277422666549683 612.2411499023438 1.4227889323530194 True 20 6
262 261 6 diag 1e-05 5 k-means++ 100 9102296.60740639 8956350.0 -1590.5693 0.21240639686584473 565.2139282226562 2.136853531967839 True 21 6
263 262 6 diag 1e-05 5 k-means++ 300 9102296.60740639 8956350.0 -1590.5693 0.21240639686584473 565.2139282226562 2.136853531967839 True 21 6
264 263 6 diag 0.0001 1 k-means++ 100 9682908.60740639 9536962.0 -1694.25 0.17942221462726593 527.6393432617188 1.9590089629656866 True 34 6
265 264 6 diag 0.0001 1 k-means++ 300 9682908.60740639 9536962.0 -1694.25 0.17942221462726593 527.6393432617188 1.9590089629656866 True 34 6
266 265 6 diag 0.0001 5 k-means++ 100 9102298.60740639 8956352.0 -1590.5697 0.21240639686584473 565.2139282226562 2.136853531967839 True 24 6
267 266 6 diag 0.0001 5 k-means++ 300 9102298.60740639 8956352.0 -1590.5697 0.21240639686584473 565.2139282226562 2.136853531967839 True 24 6
268 267 6 diag 0.001 1 k-means++ 100 9701922.60740639 9555976.0 -1697.6454 0.18013142049312592 529.0560913085938 1.9546049777626981 True 31 6
269 268 6 diag 0.001 1 k-means++ 300 9701922.60740639 9555976.0 -1697.6454 0.18013142049312592 529.0560913085938 1.9546049777626981 True 31 6
270 269 6 diag 0.001 5 k-means++ 100 9102520.60740639 8956574.0 -1590.6093 0.21336553990840912 567.1337890625 2.137562111985855 True 31 6
271 270 6 diag 0.001 5 k-means++ 300 9102520.60740639 8956574.0 -1590.6093 0.21336553990840912 567.1337890625 2.137562111985855 True 31 6
272 271 8 diag 1e-05 1 kmeans 100 9403674.455666753 9209077.0 -1632.7727 0.25722020864486694 545.548095703125 1.7082735900456691 True 9 8
273 272 8 diag 1e-05 1 kmeans 300 9403674.455666753 9209077.0 -1632.7727 0.25722020864486694 545.548095703125 1.7082735900456691 True 9 8
274 273 8 diag 1e-05 5 kmeans 100 8401628.455666753 8207031.0 -1453.8359 0.24377639591693878 568.4773559570312 2.056540191275295 True 13 8
275 274 8 diag 1e-05 5 kmeans 300 8401628.455666753 8207031.0 -1453.8359 0.24377639591693878 568.4773559570312 2.056540191275295 True 13 8
276 275 8 diag 0.0001 1 kmeans 100 9409411.455666753 9214814.0 -1633.7971 0.25722020864486694 545.548095703125 1.7082735900456691 True 9 8
277 276 8 diag 0.0001 1 kmeans 300 9409411.455666753 9214814.0 -1633.7971 0.25722020864486694 545.548095703125 1.7082735900456691 True 9 8
278 277 8 diag 0.0001 5 kmeans 100 8401766.955666753 8207169.5 -1453.8606 0.24412217736244202 568.5999755859375 2.0555351393061194 True 13 8
279 278 8 diag 0.0001 5 kmeans 300 8401766.955666753 8207169.5 -1453.8606 0.24412217736244202 568.5999755859375 2.0555351393061194 True 13 8
280 279 8 diag 0.001 1 kmeans 100 9428169.455666753 9233572.0 -1637.1467 0.2572267949581146 545.4515991210938 1.707896480449314 True 12 8
281 280 8 diag 0.001 1 kmeans 300 9428169.455666753 9233572.0 -1637.1467 0.2572267949581146 545.4515991210938 1.707896480449314 True 12 8
282 281 8 diag 0.001 5 kmeans 100 8402030.455666753 8207433.0 -1453.9077 0.24412217736244202 568.5999755859375 2.0555351393061194 True 19 8
283 282 8 diag 0.001 5 kmeans 300 8402030.455666753 8207433.0 -1453.9077 0.24412217736244202 568.5999755859375 2.0555351393061194 True 19 8
284 283 8 diag 1e-05 1 k-means++ 100 9222857.455666753 9028260.0 -1600.4839 0.20888392627239227 437.5425109863281 2.1599292696306343 True 22 8
285 284 8 diag 1e-05 1 k-means++ 300 9222857.455666753 9028260.0 -1600.4839 0.20888392627239227 437.5425109863281 2.1599292696306343 True 22 8
286 285 8 diag 1e-05 5 k-means++ 100 8425934.455666753 8231337.0 -1458.1763 0.240326389670372 491.6292724609375 2.0912175194979867 True 18 8
287 286 8 diag 1e-05 5 k-means++ 300 8425934.455666753 8231337.0 -1458.1763 0.240326389670372 491.6292724609375 2.0912175194979867 True 18 8
288 287 8 diag 0.0001 1 k-means++ 100 9489607.455666753 9295010.0 -1648.1178 0.214387446641922 498.1647033691406 1.8502738691794258 True 15 8
289 288 8 diag 0.0001 1 k-means++ 300 9489607.455666753 9295010.0 -1648.1178 0.214387446641922 498.1647033691406 1.8502738691794258 True 15 8
290 289 8 diag 0.0001 5 k-means++ 100 8401862.455666753 8207265.0 -1453.8777 0.24387237429618835 568.502197265625 2.056662610079275 True 14 8
291 290 8 diag 0.0001 5 k-means++ 300 8401862.455666753 8207265.0 -1453.8777 0.24387237429618835 568.502197265625 2.056662610079275 True 14 8
292 291 8 diag 0.001 1 k-means++ 100 9508329.455666753 9313732.0 -1651.461 0.21473833918571472 498.2613830566406 1.850583105515141 True 13 8
293 292 8 diag 0.001 1 k-means++ 300 9508329.455666753 9313732.0 -1651.461 0.21473833918571472 498.2613830566406 1.850583105515141 True 13 8
294 293 8 diag 0.001 5 k-means++ 100 8402078.455666753 8207481.0 -1453.9163 0.2442118227481842 568.6036987304688 2.0567161321422094 True 17 8
295 294 8 diag 0.001 5 k-means++ 300 8402078.455666753 8207481.0 -1453.9163 0.2442118227481842 568.6036987304688 2.0567161321422094 True 17 8
296 295 10 diag 1e-05 1 kmeans 100 7888954.303927114 7645706.0 -1350.6729 0.20840129256248474 479.23870849609375 1.9209118556333535 True 40 10
297 296 10 diag 1e-05 1 kmeans 300 7888954.303927114 7645706.0 -1350.6729 0.20840129256248474 479.23870849609375 1.9209118556333535 True 40 10
298 297 10 diag 1e-05 5 kmeans 100 7737961.303927114 7494713.0 -1323.7098 0.21908442676067352 571.03955078125 1.8024606257110887 True 14 10
299 298 10 diag 1e-05 5 kmeans 300 7737961.303927114 7494713.0 -1323.7098 0.21908442676067352 571.03955078125 1.8024606257110887 True 14 10
300 299 10 diag 0.0001 1 kmeans 100 7894699.303927114 7651451.0 -1351.6987 0.20837311446666718 479.2680969238281 1.921187996397482 True 39 10
301 300 10 diag 0.0001 1 kmeans 300 7894699.303927114 7651451.0 -1351.6987 0.20837311446666718 479.2680969238281 1.921187996397482 True 39 10
302 301 10 diag 0.0001 5 kmeans 100 7743709.303927114 7500461.0 -1324.7362 0.21908442676067352 571.03955078125 1.8024606257110887 True 15 10
303 302 10 diag 0.0001 5 kmeans 300 7743709.303927114 7500461.0 -1324.7362 0.21908442676067352 571.03955078125 1.8024606257110887 True 15 10
304 303 10 diag 0.001 1 kmeans 100 7913842.303927114 7670594.0 -1355.1172 0.20897234976291656 479.4935302734375 1.9199507888474652 True 32 10
305 304 10 diag 0.001 1 kmeans 300 7913842.303927114 7670594.0 -1355.1172 0.20897234976291656 479.4935302734375 1.9199507888474652 True 32 10
306 305 10 diag 0.001 5 kmeans 100 7762934.303927114 7519686.0 -1328.1693 0.2199474722146988 571.3079833984375 1.801659072375885 True 12 10
307 306 10 diag 0.001 5 kmeans 300 7762934.303927114 7519686.0 -1328.1693 0.2199474722146988 571.3079833984375 1.801659072375885 True 12 10
308 307 10 diag 1e-05 1 k-means++ 100 7924116.803927114 7680868.5 -1356.9519 0.18109233677387238 451.2889099121094 2.0573889908400558 True 21 10
309 308 10 diag 1e-05 1 k-means++ 300 7924116.803927114 7680868.5 -1356.9519 0.18109233677387238 451.2889099121094 2.0573889908400558 True 21 10
310 309 10 diag 1e-05 5 k-means++ 100 7738147.803927114 7494899.5 -1323.7432 0.2197750359773636 571.7454223632812 1.7996143618833955 True 15 10
311 310 10 diag 1e-05 5 k-means++ 300 7738147.803927114 7494899.5 -1323.7432 0.2197750359773636 571.7454223632812 1.7996143618833955 True 15 10
312 311 10 diag 0.0001 1 k-means++ 100 8187378.303927114 7944130.0 -1403.9629 0.18437595665454865 504.9218444824219 1.8422731699234043 True 15 10
313 312 10 diag 0.0001 1 k-means++ 300 8187378.303927114 7944130.0 -1403.9629 0.18437595665454865 504.9218444824219 1.8422731699234043 True 15 10
314 313 10 diag 0.0001 5 k-means++ 100 7743709.303927114 7500461.0 -1324.7362 0.21908442676067352 571.03955078125 1.8024606257110887 True 19 10
315 314 10 diag 0.0001 5 k-means++ 300 7743709.303927114 7500461.0 -1324.7362 0.21908442676067352 571.03955078125 1.8024606257110887 True 19 10
316 315 10 diag 0.001 1 k-means++ 100 8206320.303927114 7963072.0 -1407.3453 0.18497711420059204 505.0896301269531 1.8418734003624215 True 12 10
317 316 10 diag 0.001 1 k-means++ 300 8206320.303927114 7963072.0 -1407.3453 0.18497711420059204 505.0896301269531 1.8418734003624215 True 12 10
318 317 10 diag 0.001 5 k-means++ 100 7762911.303927114 7519663.0 -1328.1652 0.21994036436080933 571.3048706054688 1.8012326594817665 True 14 10
319 318 10 diag 0.001 5 k-means++ 300 7762911.303927114 7519663.0 -1328.1652 0.21994036436080933 571.3048706054688 1.8012326594817665 True 14 10
320 319 11 diag 1e-05 1 kmeans 100 7579813.728057295 7312240.0 -1289.6621 0.23397988080978394 564.2086791992188 1.7291402394614084 True 11 11
321 320 11 diag 1e-05 1 kmeans 300 7579813.728057295 7312240.0 -1289.6621 0.23397988080978394 564.2086791992188 1.7291402394614084 True 11 11
322 321 11 diag 1e-05 5 kmeans 100 7579813.728057295 7312240.0 -1289.6621 0.23397988080978394 564.2086791992188 1.7291402394614084 True 11 11
323 322 11 diag 1e-05 5 kmeans 300 7579813.728057295 7312240.0 -1289.6621 0.23397988080978394 564.2086791992188 1.7291402394614084 True 11 11
324 323 11 diag 0.0001 1 kmeans 100 7585561.228057295 7317987.5 -1290.6885 0.23397988080978394 564.2086791992188 1.7291402394614084 True 13 11
325 324 11 diag 0.0001 1 kmeans 300 7585561.228057295 7317987.5 -1290.6885 0.23397988080978394 564.2086791992188 1.7291402394614084 True 13 11
326 325 11 diag 0.0001 5 kmeans 100 7585561.228057295 7317987.5 -1290.6885 0.23397988080978394 564.2086791992188 1.7291402394614084 True 13 11
327 326 11 diag 0.0001 5 kmeans 300 7585561.228057295 7317987.5 -1290.6885 0.23397988080978394 564.2086791992188 1.7291402394614084 True 13 11
328 327 11 diag 0.001 1 kmeans 100 7604425.728057295 7336852.0 -1294.0571 0.2341206818819046 564.5838012695312 1.7292947793505737 True 23 11
329 328 11 diag 0.001 1 kmeans 300 7604425.728057295 7336852.0 -1294.0571 0.2341206818819046 564.5838012695312 1.7292947793505737 True 23 11
330 329 11 diag 0.001 5 kmeans 100 7604425.728057295 7336852.0 -1294.0571 0.2341206818819046 564.5838012695312 1.7292947793505737 True 23 11
331 330 11 diag 0.001 5 kmeans 300 7604425.728057295 7336852.0 -1294.0571 0.2341206818819046 564.5838012695312 1.7292947793505737 True 23 11
332 331 11 diag 1e-05 1 k-means++ 100 7791839.228057295 7524265.5 -1327.5238 0.19072884321212769 512.7921752929688 1.8151284796024916 True 12 11
333 332 11 diag 1e-05 1 k-means++ 300 7791839.228057295 7524265.5 -1327.5238 0.19072884321212769 512.7921752929688 1.8151284796024916 True 12 11
334 333 11 diag 1e-05 5 k-means++ 100 7590459.228057295 7322885.5 -1291.5631 0.23250937461853027 557.0549926757812 1.6956773285989237 True 15 11
335 334 11 diag 1e-05 5 k-means++ 300 7590459.228057295 7322885.5 -1291.5631 0.23250937461853027 557.0549926757812 1.6956773285989237 True 15 11
336 335 11 diag 0.0001 1 k-means++ 100 7673894.728057295 7406321.0 -1306.4623 0.1943679302930832 520.76953125 1.7964734396608426 True 18 11
337 336 11 diag 0.0001 1 k-means++ 300 7673894.728057295 7406321.0 -1306.4623 0.1943679302930832 520.76953125 1.7964734396608426 True 18 11
338 337 11 diag 0.0001 5 k-means++ 100 7595473.228057295 7327899.5 -1292.4585 0.23414196074008942 558.0660400390625 1.7058883755528695 True 22 11
339 338 11 diag 0.0001 5 k-means++ 300 7595473.228057295 7327899.5 -1292.4585 0.23414196074008942 558.0660400390625 1.7058883755528695 True 22 11
340 339 11 diag 0.001 1 k-means++ 100 7694915.228057295 7427341.5 -1310.216 0.1941366195678711 518.650634765625 1.8003890716705255 True 19 11
341 340 11 diag 0.001 1 k-means++ 300 7694915.228057295 7427341.5 -1310.216 0.1941366195678711 518.650634765625 1.8003890716705255 True 19 11
342 341 11 diag 0.001 5 k-means++ 100 7603135.728057295 7335562.0 -1293.8268 0.23057277500629425 554.763427734375 1.7732846393460022 True 61 11
343 342 11 diag 0.001 5 k-means++ 300 7603135.728057295 7335562.0 -1293.8268 0.23057277500629425 554.763427734375 1.7732846393460022 True 61 11
344 343 14 diag 1e-05 1 kmeans 100 7223238.000447838 6882688.0 -1208.5668 0.189774751663208 445.0321044921875 2.405689156966965 True 35 14
345 344 14 diag 1e-05 1 kmeans 300 7223238.000447838 6882688.0 -1208.5668 0.189774751663208 445.0321044921875 2.405689156966965 True 35 14
346 345 14 diag 1e-05 5 kmeans 100 7179637.000447838 6839087.0 -1200.7809 0.19881103932857513 443.0815124511719 2.3498009106287974 True 37 14
347 346 14 diag 1e-05 5 kmeans 300 7179637.000447838 6839087.0 -1200.7809 0.19881103932857513 443.0815124511719 2.3498009106287974 True 37 14
348 347 14 diag 0.0001 1 kmeans 100 7403717.000447838 7063167.0 -1240.7952 0.19351521134376526 467.3027038574219 1.8745209391776043 True 9 14
349 348 14 diag 0.0001 1 kmeans 300 7403717.000447838 7063167.0 -1240.7952 0.19351521134376526 467.3027038574219 1.8745209391776043 True 9 14
350 349 14 diag 0.0001 5 kmeans 100 7185399.000447838 6844849.0 -1201.8098 0.1989377737045288 443.1646423339844 2.3487992155318165 True 36 14
351 350 14 diag 0.0001 5 kmeans 300 7185399.000447838 6844849.0 -1201.8098 0.1989377737045288 443.1646423339844 2.3487992155318165 True 36 14
352 351 14 diag 0.001 1 kmeans 100 7249591.000447838 6909041.0 -1213.2727 0.1921805739402771 446.1286315917969 2.4012027743509816 True 32 14
353 352 14 diag 0.001 1 kmeans 300 7249591.000447838 6909041.0 -1213.2727 0.1921805739402771 446.1286315917969 2.4012027743509816 True 32 14
354 353 14 diag 0.001 5 kmeans 100 7206289.000447838 6865739.0 -1205.5402 0.2005954384803772 446.14300537109375 2.3618214198344596 True 35 14
355 354 14 diag 0.001 5 kmeans 300 7206289.000447838 6865739.0 -1205.5402 0.2005954384803772 446.14300537109375 2.3618214198344596 True 35 14
356 355 14 diag 1e-05 1 k-means++ 100 7407422.000447838 7066872.0 -1241.4568 0.17794980108737946 461.5752868652344 1.8973642909602886 True 16 14
357 356 14 diag 1e-05 1 k-means++ 300 7407422.000447838 7066872.0 -1241.4568 0.17794980108737946 461.5752868652344 1.8973642909602886 True 16 14
358 357 14 diag 1e-05 5 k-means++ 100 7225380.500447838 6884830.5 -1208.9493 0.21065233647823334 449.29351806640625 2.369687246645459 True 41 14
359 358 14 diag 1e-05 5 k-means++ 300 7225380.500447838 6884830.5 -1208.9493 0.21065233647823334 449.29351806640625 2.369687246645459 True 41 14
360 359 14 diag 0.0001 1 k-means++ 100 7409969.000447838 7069419.0 -1241.9116 0.18569153547286987 462.6240539550781 1.8868216012452776 True 15 14
361 360 14 diag 0.0001 1 k-means++ 300 7409969.000447838 7069419.0 -1241.9116 0.18569153547286987 462.6240539550781 1.8868216012452776 True 15 14
362 361 14 diag 0.0001 5 k-means++ 100 7197481.000447838 6856931.0 -1203.9673 0.1839146614074707 442.7921142578125 2.4198138057845995 True 34 14
363 362 14 diag 0.0001 5 k-means++ 300 7197481.000447838 6856931.0 -1203.9673 0.1839146614074707 442.7921142578125 2.4198138057845995 True 34 14
364 363 14 diag 0.001 1 k-means++ 300 7428971.000447838 7088421.0 -1245.3048 0.18628861010074615 462.73419189453125 1.8868677406736207 False 300 14
365 364 14 diag 0.001 5 k-means++ 100 7217251.500447838 6876701.5 -1207.4978 0.18614986538887024 443.1617431640625 2.4207333709907854 True 30 14
366 365 14 diag 0.001 5 k-means++ 300 7217251.500447838 6876701.5 -1207.4978 0.18614986538887024 443.1617431640625 2.4207333709907854 True 30 14
367 366 17 diag 1e-05 1 kmeans 100 7157834.272838381 6744308.0 -1179.4664 0.15990924835205078 403.0197448730469 2.0343843124532546 True 24 17
368 367 17 diag 1e-05 1 kmeans 300 7157834.272838381 6744308.0 -1179.4664 0.15990924835205078 403.0197448730469 2.0343843124532546 True 24 17
369 368 17 diag 1e-05 5 kmeans 100 6988291.272838381 6574765.0 -1149.1909 0.18784816563129425 396.8871154785156 2.38212018534803 True 20 17
370 369 17 diag 1e-05 5 kmeans 300 6988291.272838381 6574765.0 -1149.1909 0.18784816563129425 396.8871154785156 2.38212018534803 True 20 17
371 370 17 diag 0.0001 1 kmeans 100 7164777.772838381 6751251.5 -1180.7063 0.16118811070919037 403.645263671875 2.0161663571062354 True 12 17
372 371 17 diag 0.0001 1 kmeans 300 7164777.772838381 6751251.5 -1180.7063 0.16118811070919037 403.645263671875 2.0161663571062354 True 12 17
373 372 17 diag 0.0001 5 kmeans 100 6993985.272838381 6580459.0 -1150.2076 0.18836656212806702 396.94464111328125 2.382098456335865 True 20 17
374 373 17 diag 0.0001 5 kmeans 300 6993985.272838381 6580459.0 -1150.2076 0.18836656212806702 396.94464111328125 2.382098456335865 True 20 17
375 374 17 diag 0.001 1 kmeans 100 7020208.272838381 6606682.0 -1154.8904 0.16110706329345703 392.0225830078125 2.466771067136851 True 32 17
376 375 17 diag 0.001 1 kmeans 300 7020208.272838381 6606682.0 -1154.8904 0.16110706329345703 392.0225830078125 2.466771067136851 True 32 17
377 376 17 diag 0.001 5 kmeans 100 7014078.772838381 6600552.5 -1153.7958 0.19070318341255188 397.37750244140625 2.3799724457371485 True 20 17
378 377 17 diag 0.001 5 kmeans 300 7014078.772838381 6600552.5 -1153.7958 0.19070318341255188 397.37750244140625 2.3799724457371485 True 20 17
379 378 17 diag 1e-05 1 k-means++ 100 7121674.772838381 6708148.5 -1173.0094 0.15012019872665405 375.2821350097656 2.4197980533663803 True 31 17
380 379 17 diag 1e-05 1 k-means++ 300 7121674.772838381 6708148.5 -1173.0094 0.15012019872665405 375.2821350097656 2.4197980533663803 True 31 17
381 380 17 diag 1e-05 5 k-means++ 100 7005072.772838381 6591546.5 -1152.1876 0.14115209877490997 384.4289245605469 2.4847770953101596 True 29 17
382 381 17 diag 1e-05 5 k-means++ 300 7005072.772838381 6591546.5 -1152.1876 0.14115209877490997 384.4289245605469 2.4847770953101596 True 29 17
383 382 17 diag 0.0001 1 k-means++ 100 7293509.772838381 6879983.5 -1203.6942 0.15238241851329803 397.42816162109375 2.107055060535422 True 15 17
384 383 17 diag 0.0001 1 k-means++ 300 7293509.772838381 6879983.5 -1203.6942 0.15238241851329803 397.42816162109375 2.107055060535422 True 15 17
385 384 17 diag 0.0001 5 k-means++ 100 7015674.772838381 6602148.5 -1154.0808 0.1819005310535431 394.13629150390625 2.4964933433175283 True 18 17
386 385 17 diag 0.0001 5 k-means++ 300 7015674.772838381 6602148.5 -1154.0808 0.1819005310535431 394.13629150390625 2.4964933433175283 True 18 17
387 386 17 diag 0.001 1 k-means++ 100 7312575.772838381 6899049.5 -1207.0989 0.15248946845531464 397.60723876953125 2.1086064619099547 True 17 17
388 387 17 diag 0.001 1 k-means++ 300 7312575.772838381 6899049.5 -1207.0989 0.15248946845531464 397.60723876953125 2.1086064619099547 True 17 17
389 388 17 diag 0.001 5 k-means++ 100 7034312.772838381 6620786.5 -1157.409 0.18249236047267914 394.5459289550781 2.4918179246451175 True 23 17
390 389 17 diag 0.001 5 k-means++ 300 7034312.772838381 6620786.5 -1157.409 0.18249236047267914 394.5459289550781 2.4918179246451175 True 23 17
391 390 20 diag 1e-05 1 kmeans 100 6849987.045228925 6363484.5 -1107.0726 0.1538863182067871 351.1917419433594 2.4313421881484762 True 30 20
392 391 20 diag 1e-05 1 kmeans 300 6849987.045228925 6363484.5 -1107.0726 0.1538863182067871 351.1917419433594 2.4313421881484762 True 30 20
393 392 20 diag 1e-05 5 kmeans 100 6849987.045228925 6363484.5 -1107.0726 0.1538863182067871 351.1917419433594 2.4313421881484762 True 30 20
394 393 20 diag 1e-05 5 kmeans 300 6849987.045228925 6363484.5 -1107.0726 0.1538863182067871 351.1917419433594 2.4313421881484762 True 30 20
395 394 20 diag 0.0001 1 kmeans 100 6855879.045228925 6369376.5 -1108.1248 0.15445564687252045 351.1902160644531 2.4330055346823083 True 25 20
396 395 20 diag 0.0001 1 kmeans 300 6855879.045228925 6369376.5 -1108.1248 0.15445564687252045 351.1902160644531 2.4330055346823083 True 25 20
397 396 20 diag 0.0001 5 kmeans 100 6855879.045228925 6369376.5 -1108.1248 0.15445564687252045 351.1902160644531 2.4330055346823083 True 25 20
398 397 20 diag 0.0001 5 kmeans 300 6855879.045228925 6369376.5 -1108.1248 0.15445564687252045 351.1902160644531 2.4330055346823083 True 25 20
399 398 20 diag 0.001 1 kmeans 100 6875191.545228925 6388689.0 -1111.5734 0.15459507703781128 351.5787658691406 2.432923325373909 True 36 20
400 399 20 diag 0.001 1 kmeans 300 6875191.545228925 6388689.0 -1111.5734 0.15459507703781128 351.5787658691406 2.432923325373909 True 36 20
401 400 20 diag 0.001 5 kmeans 100 6875191.545228925 6388689.0 -1111.5734 0.15459507703781128 351.5787658691406 2.432923325373909 True 36 20
402 401 20 diag 0.001 5 kmeans 300 6875191.545228925 6388689.0 -1111.5734 0.15459507703781128 351.5787658691406 2.432923325373909 True 36 20
403 402 20 diag 1e-05 1 k-means++ 100 6978855.045228925 6492352.5 -1130.0847 0.13519038259983063 338.322509765625 2.5026143875581077 True 24 20
404 403 20 diag 1e-05 1 k-means++ 300 6978855.045228925 6492352.5 -1130.0847 0.13519038259983063 338.322509765625 2.5026143875581077 True 24 20
405 404 20 diag 1e-05 5 k-means++ 100 6897127.045228925 6410624.5 -1115.4905 0.13251666724681854 352.5394592285156 2.4669189695674225 True 42 20
406 405 20 diag 1e-05 5 k-means++ 300 6897127.045228925 6410624.5 -1115.4905 0.13251666724681854 352.5394592285156 2.4669189695674225 True 42 20
407 406 20 diag 0.0001 1 k-means++ 100 7011968.045228925 6525465.5 -1135.9978 0.14400699734687805 344.567138671875 2.517887865440349 True 30 20
408 407 20 diag 0.0001 1 k-means++ 300 7011968.045228925 6525465.5 -1135.9978 0.14400699734687805 344.567138671875 2.517887865440349 True 30 20
409 408 20 diag 0.0001 5 k-means++ 100 6905988.545228925 6419486.0 -1117.0729 0.13107705116271973 351.8740234375 2.4956842864961937 True 36 20
410 409 20 diag 0.0001 5 k-means++ 300 6905988.545228925 6419486.0 -1117.0729 0.13107705116271973 351.8740234375 2.4956842864961937 True 36 20
411 410 20 diag 0.001 1 k-means++ 100 7031180.545228925 6544678.0 -1139.4286 0.14613750576972961 345.2534484863281 2.516432567197497 True 27 20
412 411 20 diag 0.001 1 k-means++ 300 7031180.545228925 6544678.0 -1139.4286 0.14613750576972961 345.2534484863281 2.516432567197497 True 27 20
413 412 20 diag 0.001 5 k-means++ 100 6918391.545228925 6431889.0 -1119.2877 0.13308578729629517 351.49005126953125 2.474649164658472 True 35 20
414 413 20 diag 0.001 5 k-means++ 300 6918391.545228925 6431889.0 -1119.2877 0.13308578729629517 351.49005126953125 2.474649164658472 True 35 20

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,158 @@
nohup: ignoring input
Loading embeddings from /home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json...
Loaded 2800 samples with embedding dimension 2048
======================================================================
RUNNING GAUSSIAN MIXTURE MODEL CLUSTERING WITH OPTIMIZED GRID SEARCH
======================================================================
Optimized parameter combinations:
- n_components: 11 values [2, 3, 4, 5, 6, 8, 10, 11, 14, 17, 20]
- covariance_types: 2 options ['full', 'diag']
- reg_covar: 3 values [1e-05, 0.0001, 0.001]
- n_init: 2 values [1, 5]
- init_params: 2 options ['kmeans', 'k-means++']
- max_iter: 2 values [100, 300]
Total combinations: 528 (optimized for speed)
Estimated runtime: 4.4 minutes
This should be much faster...
Progress: 50/528 (9.5%) - Best scores so far: BIC=17260132.61, Silhouette=0.376
Progress: 100/528 (18.9%) - Best scores so far: BIC=17260132.61, Silhouette=0.376
Progress: 150/528 (28.4%) - Best scores so far: BIC=17260132.61, Silhouette=0.376
Progress: 200/528 (37.9%) - Best scores so far: BIC=17260132.61, Silhouette=0.376
Progress: 250/528 (47.3%) - Best scores so far: BIC=17260132.61, Silhouette=0.376
n_components=2, cov=diag, init=kmeans: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089203.91, AIC=13040559.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089203.91, AIC=13040559.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089203.91, AIC=13040559.00, silhouette=0.3697
n_components=2, cov=diag, init=kmeans: BIC=13089203.91, AIC=13040559.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089173.91, AIC=13040529.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089203.91, AIC=13040559.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089203.91, AIC=13040559.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089203.91, AIC=13040559.00, silhouette=0.3697
n_components=2, cov=diag, init=k-means++: BIC=13089203.91, AIC=13040559.00, silhouette=0.3697
n_components=3, cov=diag, init=kmeans: BIC=12693850.34, AIC=12620880.00, silhouette=0.3761
n_components=3, cov=diag, init=kmeans: BIC=12693850.34, AIC=12620880.00, silhouette=0.3761
n_components=3, cov=diag, init=kmeans: BIC=12699627.34, AIC=12626657.00, silhouette=0.3761
n_components=3, cov=diag, init=kmeans: BIC=12699627.34, AIC=12626657.00, silhouette=0.3761
n_components=3, cov=diag, init=kmeans: BIC=12718245.34, AIC=12645275.00, silhouette=0.3761
n_components=3, cov=diag, init=kmeans: BIC=12718245.34, AIC=12645275.00, silhouette=0.3761
Progress: 300/528 (56.8%) - Best scores so far: BIC=11770626.34, Silhouette=0.376
n_components=4, cov=diag, init=kmeans: BIC=11525150.76, AIC=11427855.00, silhouette=0.3090
n_components=4, cov=diag, init=kmeans: BIC=11525150.76, AIC=11427855.00, silhouette=0.3090
n_components=4, cov=diag, init=kmeans: BIC=11530927.76, AIC=11433632.00, silhouette=0.3090
n_components=4, cov=diag, init=kmeans: BIC=11530927.76, AIC=11433632.00, silhouette=0.3090
n_components=4, cov=diag, init=kmeans: BIC=11549555.76, AIC=11452260.00, silhouette=0.3090
n_components=4, cov=diag, init=kmeans: BIC=11549555.76, AIC=11452260.00, silhouette=0.3090
n_components=5, cov=diag, init=kmeans: BIC=10641753.18, AIC=10520132.00, silhouette=0.3119
n_components=5, cov=diag, init=kmeans: BIC=10641753.18, AIC=10520132.00, silhouette=0.3119
n_components=5, cov=diag, init=kmeans: BIC=10647529.18, AIC=10525908.00, silhouette=0.3119
n_components=5, cov=diag, init=kmeans: BIC=10647529.18, AIC=10525908.00, silhouette=0.3119
n_components=5, cov=diag, init=kmeans: BIC=10666196.18, AIC=10544575.00, silhouette=0.3119
n_components=5, cov=diag, init=kmeans: BIC=10666196.18, AIC=10544575.00, silhouette=0.3119
Progress: 350/528 (66.3%) - Best scores so far: BIC=9931250.18, Silhouette=0.376
Progress: 400/528 (75.8%) - Best scores so far: BIC=8401628.46, Silhouette=0.376
Progress: 450/528 (85.2%) - Best scores so far: BIC=7579813.73, Silhouette=0.376
Progress: 500/528 (94.7%) - Best scores so far: BIC=6988291.27, Silhouette=0.376
Progress: 528/528 (100.0%) - Best scores so far: BIC=6849987.05, Silhouette=0.376
======================================================================
GAUSSIAN MIXTURE MODEL GRID SEARCH ANALYSIS
======================================================================
Total parameter combinations tested: 413
Combinations with valid clustering: 413
Model Selection Metrics:
Best BIC score: 6849987.05
Best AIC score: -11119584.00
Best Log-Likelihood: 6594.97
Clustering Quality Metrics:
Best silhouette score: 0.3761
Mean silhouette score: 0.2317
Best Calinski-Harabasz score: 1331.69
Best Davies-Bouldin score: 0.7860
Top 5 results by BIC (lower is better):
n_comp=20, cov=diag: BIC=6849987.05, AIC=6363484.50
n_comp=20, cov=diag: BIC=6849987.05, AIC=6363484.50
n_comp=20, cov=diag: BIC=6849987.05, AIC=6363484.50
n_comp=20, cov=diag: BIC=6849987.05, AIC=6363484.50
n_comp=20, cov=diag: BIC=6855879.05, AIC=6369376.50
Top 5 results by AIC (lower is better):
n_comp=4, cov=full: BIC=38759701.15, AIC=-11119584.00
n_comp=4, cov=full: BIC=38759701.15, AIC=-11119584.00
n_comp=3, cov=full: BIC=26462676.38, AIC=-10946786.00
n_comp=3, cov=full: BIC=26462676.38, AIC=-10946786.00
n_comp=5, cov=full: BIC=54230057.92, AIC=-8119050.00
Top 5 results by Silhouette Score:
n_comp=3, cov=diag: silhouette=0.3761
n_comp=3, cov=diag: silhouette=0.3761
n_comp=3, cov=diag: silhouette=0.3761
n_comp=3, cov=diag: silhouette=0.3761
n_comp=3, cov=diag: silhouette=0.3761
Component count analysis (top 10 by BIC):
20.0 components: BIC=6849987.05, AIC=6363484.50, silhouette=0.1770
17.0 components: BIC=6988291.27, AIC=6574765.00, silhouette=0.2085
14.0 components: BIC=7179637.00, AIC=6839087.00, silhouette=0.2119
11.0 components: BIC=7579813.73, AIC=7312240.00, silhouette=0.2577
10.0 components: BIC=7737961.30, AIC=7494713.00, silhouette=0.2863
8.0 components: BIC=8401628.46, AIC=1051428.00, silhouette=0.2748
6.0 components: BIC=9102218.61, AIC=-6065602.00, silhouette=0.2707
5.0 components: BIC=9931250.18, AIC=-8119050.00, silhouette=0.3163
4.0 components: BIC=10865268.76, AIC=-11119584.00, silhouette=0.3110
3.0 components: BIC=11686081.34, AIC=-10946786.00, silhouette=0.3761
📁 SAVING DETAILED RESULTS...
==============================
Detailed grid search results saved to: gmm_grid_search_detailed_20250801_015245.json
Grid search summary CSV saved to: gmm_grid_search_summary_20250801_015245.csv
Best GMM result by BIC:
Parameters: {'n_components': 20, 'covariance_type': 'diag', 'reg_covar': 1e-05, 'n_init': 1, 'init_params': 'kmeans', 'max_iter': 100}
BIC score: 6849987.05
Best GMM result by AIC:
Parameters: {'n_components': 4, 'covariance_type': 'full', 'reg_covar': 0.0001, 'n_init': 5, 'init_params': 'kmeans', 'max_iter': 100}
AIC score: -11119584.00
Best GMM result by Silhouette:
Parameters: {'n_components': 3, 'covariance_type': 'diag', 'reg_covar': 1e-05, 'n_init': 1, 'init_params': 'kmeans', 'max_iter': 100}
Silhouette score: 0.3761
Visualization saved as 'gmm_clustering_results.png'
Final clustering results (bic) saved to: gmm_final_results_bic_20250801_015247.json
Traceback (most recent call last):
File "/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_extensive.py", line 646, in <module>
main()
File "/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_extensive.py", line 640, in main
clustering.save_clustering_results(results)
File "/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_extensive.py", line 614, in save_clustering_results
json.dump({
File "/home/nguyendc/miniconda3/envs/cluster/lib/python3.10/json/__init__.py", line 179, in dump
for chunk in iterable:
File "/home/nguyendc/miniconda3/envs/cluster/lib/python3.10/json/encoder.py", line 431, in _iterencode
yield from _iterencode_dict(o, _current_indent_level)
File "/home/nguyendc/miniconda3/envs/cluster/lib/python3.10/json/encoder.py", line 405, in _iterencode_dict
yield from chunks
File "/home/nguyendc/miniconda3/envs/cluster/lib/python3.10/json/encoder.py", line 438, in _iterencode
o = _default(o)
File "/home/nguyendc/miniconda3/envs/cluster/lib/python3.10/json/encoder.py", line 179, in default
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type float32 is not JSON serializable

View File

@@ -0,0 +1,140 @@
nohup: ignoring input
Loading embeddings from /home/nguyendc/sonnh/embedding-clustering/extract/embeddings_factures_osteopathie_1k_qwen.json...
Loaded 2800 samples with embedding dimension 2048
======================================================================
RUNNING GAUSSIAN MIXTURE MODEL CLUSTERING WITH OPTIMIZED GRID SEARCH
======================================================================
Optimized parameter combinations:
- n_components: 21 values [2, 3, 4, 5, 6, 8, 10, 11, 14, 17, 20, 23, 26, 29, 32, 35, 38, 41, 44, 47, 50]
- covariance_types: 2 options ['tied', 'spherical']
- reg_covar: 3 values [1e-05, 0.0001, 0.001]
- n_init: 2 values [1, 5]
- init_params: 2 options ['kmeans', 'k-means++']
- max_iter: 2 values [100, 300]
Total combinations: 1008 (optimized for speed)
Estimated runtime: 8.4 minutes
This should be much faster...
n_components=2, cov=tied, init=kmeans: BIC=6521812.14, AIC=-5960170.38, silhouette=0.3692
n_components=3, cov=tied, init=kmeans: BIC=6511443.85, AIC=-5982704.34, silhouette=0.3756
n_components=3, cov=tied, init=kmeans: BIC=6511443.85, AIC=-5982704.34, silhouette=0.3756
n_components=3, cov=tied, init=kmeans: BIC=6511443.85, AIC=-5982704.34, silhouette=0.3756
n_components=3, cov=tied, init=kmeans: BIC=6511443.85, AIC=-5982704.34, silhouette=0.3756
n_components=4, cov=tied, init=kmeans: BIC=6514783.32, AIC=-5991530.55, silhouette=0.3110
Progress: 50/1008 (5.0%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
n_components=4, cov=tied, init=kmeans: BIC=6514783.32, AIC=-5991530.55, silhouette=0.3110
n_components=4, cov=tied, init=kmeans: BIC=6514783.32, AIC=-5991530.55, silhouette=0.3110
n_components=4, cov=tied, init=kmeans: BIC=6514783.32, AIC=-5991530.55, silhouette=0.3110
n_components=5, cov=tied, init=kmeans: BIC=6520503.08, AIC=-5997976.48, silhouette=0.3163
n_components=5, cov=tied, init=kmeans: BIC=6520503.08, AIC=-5997976.48, silhouette=0.3163
n_components=5, cov=tied, init=kmeans: BIC=6520503.08, AIC=-5997976.48, silhouette=0.3163
n_components=5, cov=tied, init=kmeans: BIC=6520503.08, AIC=-5997976.48, silhouette=0.3163
Progress: 100/1008 (9.9%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 150/1008 (14.9%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 200/1008 (19.8%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 250/1008 (24.8%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 300/1008 (29.8%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 350/1008 (34.7%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 400/1008 (39.7%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 450/1008 (44.6%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 500/1008 (49.6%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 550/1008 (54.6%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 600/1008 (59.5%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 650/1008 (64.5%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 700/1008 (69.4%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 750/1008 (74.4%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 800/1008 (79.4%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 850/1008 (84.3%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 900/1008 (89.3%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 950/1008 (94.2%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 1000/1008 (99.2%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
Progress: 1008/1008 (100.0%) - Best scores so far: BIC=6511443.85, Silhouette=0.376
======================================================================
GAUSSIAN MIXTURE MODEL GRID SEARCH ANALYSIS
======================================================================
Total parameter combinations tested: 1008
Combinations with valid clustering: 1008
Model Selection Metrics:
Best BIC score: 6511443.85
Best AIC score: -6295231.48
Best Log-Likelihood: 1910.09
Clustering Quality Metrics:
Best silhouette score: 0.3757
Mean silhouette score: 0.0287
Best Calinski-Harabasz score: 1331.69
Best Davies-Bouldin score: 0.6762
Top 5 results by BIC (lower is better):
n_comp=3, cov=tied: BIC=6511443.85, AIC=-5982704.34
n_comp=3, cov=tied: BIC=6511443.85, AIC=-5982704.34
n_comp=3, cov=tied: BIC=6511443.85, AIC=-5982704.34
n_comp=3, cov=tied: BIC=6511443.85, AIC=-5982704.34
n_comp=4, cov=tied: BIC=6514783.32, AIC=-5991530.55
Top 5 results by AIC (lower is better):
n_comp=50, cov=tied: BIC=6770703.71, AIC=-6295231.48
n_comp=50, cov=tied: BIC=6770703.71, AIC=-6295231.48
n_comp=50, cov=tied: BIC=6779928.76, AIC=-6286006.43
n_comp=50, cov=tied: BIC=6779928.76, AIC=-6286006.43
n_comp=47, cov=tied: BIC=6755535.12, AIC=-6273903.03
Top 5 results by Silhouette Score:
n_comp=3, cov=spherical: silhouette=0.3757
n_comp=3, cov=spherical: silhouette=0.3757
n_comp=3, cov=spherical: silhouette=0.3757
n_comp=3, cov=spherical: silhouette=0.3757
n_comp=3, cov=spherical: silhouette=0.3757
Component count analysis (top 10 by BIC):
3.0 components: BIC=6511443.85, AIC=-5982704.34, silhouette=0.3757
4.0 components: BIC=6514783.32, AIC=-5991530.55, silhouette=0.3110
5.0 components: BIC=6520503.08, AIC=-5997976.48, silhouette=0.3163
2.0 components: BIC=6521812.14, AIC=-5960170.38, silhouette=0.3693
6.0 components: BIC=6526215.27, AIC=-6004429.97, silhouette=0.2485
8.0 components: BIC=6529704.08, AIC=-6025272.52, silhouette=0.2680
10.0 components: BIC=6538644.29, AIC=-6040663.67, silhouette=0.2706
11.0 components: BIC=6546208.81, AIC=-6045264.84, silhouette=0.2580
14.0 components: BIC=6563001.35, AIC=-6064969.34, silhouette=0.2241
17.0 components: BIC=6580862.17, AIC=-6083605.55, silhouette=0.2109
📁 SAVING DETAILED RESULTS...
==============================
Detailed grid search results saved to: gmm_grid_search_detailed_20250805_150635.json
Grid search summary CSV saved to: gmm_grid_search_summary_20250805_150635.csv
Best GMM result by BIC:
Parameters: {'n_components': 3, 'covariance_type': 'tied', 'reg_covar': 1e-05, 'n_init': 1, 'init_params': 'kmeans', 'max_iter': 100}
BIC score: 6511443.85
Best GMM result by AIC:
Parameters: {'n_components': 50, 'covariance_type': 'tied', 'reg_covar': 1e-05, 'n_init': 5, 'init_params': 'kmeans', 'max_iter': 100}
AIC score: -6295231.48
Best GMM result by Silhouette:
Parameters: {'n_components': 3, 'covariance_type': 'spherical', 'reg_covar': 1e-05, 'n_init': 1, 'init_params': 'kmeans', 'max_iter': 100}
Silhouette score: 0.3757
Visualization saved as 'gmm_clustering_results.png'
Final clustering results (bic) saved to: gmm_final_results_bic_20250805_150636.json
Final clustering results (aic) saved to: gmm_final_results_aic_20250805_150636.json
Traceback (most recent call last):
File "/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_extensive.py", line 649, in <module>
main()
File "/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_extensive.py", line 643, in main
clustering.save_clustering_results(results)
File "/home/nguyendc/sonnh/embedding-clustering/cluster/gmm_extensive.py", line 617, in save_clustering_results
json.dump({
File "/usr/lib/python3.10/json/__init__.py", line 179, in dump
for chunk in iterable:
File "/usr/lib/python3.10/json/encoder.py", line 431, in _iterencode
yield from _iterencode_dict(o, _current_indent_level)
File "/usr/lib/python3.10/json/encoder.py", line 405, in _iterencode_dict
yield from chunks
File "/usr/lib/python3.10/json/encoder.py", line 438, in _iterencode
o = _default(o)
File "/usr/lib/python3.10/json/encoder.py", line 179, in default
raise TypeError(f'Object of type {o.__class__.__name__} '
TypeError: Object of type float32 is not JSON serializable