add OCRBench v2
This commit is contained in:
184
OCRBench_v2/eval_scripts/spotting_metric.py
Normal file
184
OCRBench_v2/eval_scripts/spotting_metric.py
Normal file
@@ -0,0 +1,184 @@
|
||||
import re
|
||||
import os
|
||||
import ast
|
||||
import ipdb
|
||||
import shutil
|
||||
import zipfile
|
||||
import subprocess
|
||||
import spotting_eval.rrc_evaluation_funcs_1_1 as rrc_evaluation_funcs
|
||||
from spotting_eval.script import default_evaluation_params,validate_data,evaluate_method
|
||||
|
||||
|
||||
def extract_bounding_boxes_robust(predict_str):
|
||||
"""
|
||||
Extract coordinates and text content from the given prediction string,
|
||||
handling potential format issues.
|
||||
|
||||
Args:
|
||||
predict_str (str): Model prediction output as a string.
|
||||
|
||||
Returns:
|
||||
list: Extracted data in the format [[x1, y1, x2, y2, text_content], ...].
|
||||
Returns None if no valid data is extracted.
|
||||
"""
|
||||
results = []
|
||||
seen = set()
|
||||
|
||||
# try parsing with ast.literal_eval
|
||||
try:
|
||||
data = ast.literal_eval(predict_str)
|
||||
except Exception:
|
||||
data = None
|
||||
|
||||
if data is not None:
|
||||
if isinstance(data, (list, tuple)):
|
||||
for item in data:
|
||||
if isinstance(item, (list, tuple)) and len(item) >= 5:
|
||||
x1_str, y1_str, x2_str, y2_str = item[:4]
|
||||
text_content = item[4]
|
||||
|
||||
x1_str = str(x1_str).strip()
|
||||
y1_str = str(y1_str).strip()
|
||||
x2_str = str(x2_str).strip()
|
||||
y2_str = str(y2_str).strip()
|
||||
text_content = str(text_content).replace("\n", "").strip().strip('"').strip("'")
|
||||
|
||||
try:
|
||||
x1 = int(x1_str)
|
||||
y1 = int(y1_str)
|
||||
x2 = int(x2_str)
|
||||
y2 = int(y2_str)
|
||||
|
||||
if not (0 <= x1 <= 1000 and 0 <= y1 <= 1000 and 0 <= x2 <= 1000 and 0 <= y2 <= 1000):
|
||||
continue
|
||||
|
||||
key = (x1, y1, x2, y2, text_content)
|
||||
if key in seen:
|
||||
continue
|
||||
|
||||
seen.add(key)
|
||||
results.append([x1, y1, x2, y2, text_content])
|
||||
except ValueError:
|
||||
continue
|
||||
else:
|
||||
# try parsing with regular expression
|
||||
|
||||
list_content = predict_str
|
||||
items = re.findall(r'[\[\(]\s*([^\[\]\(\)]*?)\s*[\]\)]', list_content)
|
||||
|
||||
if not items:
|
||||
return None
|
||||
|
||||
for item in items:
|
||||
parts = item.split(',', 4)
|
||||
if len(parts) < 5:
|
||||
continue
|
||||
|
||||
x1_str, y1_str, x2_str, y2_str, text_content = parts
|
||||
|
||||
x1_str = x1_str.strip()
|
||||
y1_str = y1_str.strip()
|
||||
x2_str = x2_str.strip()
|
||||
y2_str = y2_str.strip()
|
||||
text_content = text_content.replace("\n", "").strip().strip('"').strip("'")
|
||||
|
||||
try:
|
||||
x1 = int(x1_str)
|
||||
y1 = int(y1_str)
|
||||
x2 = int(x2_str)
|
||||
y2 = int(y2_str)
|
||||
|
||||
if not (0 <= x1 <= 1000 and 0 <= y1 <= 1000 and 0 <= x2 <= 1000 and 0 <= y2 <= 1000):
|
||||
continue
|
||||
|
||||
key = (x1, y1, x2, y2, text_content)
|
||||
if key in seen:
|
||||
continue
|
||||
|
||||
seen.add(key)
|
||||
results.append([x1, y1, x2, y2, text_content])
|
||||
except ValueError:
|
||||
continue
|
||||
|
||||
if not results:
|
||||
return None
|
||||
|
||||
return results
|
||||
|
||||
|
||||
def zip_folder(source_folder, destination_zip):
|
||||
abs_source = os.path.abspath(source_folder)
|
||||
abs_destination = os.path.abspath(destination_zip)
|
||||
|
||||
with zipfile.ZipFile(abs_destination, 'w', zipfile.ZIP_DEFLATED) as zf:
|
||||
for root, _, files in os.walk(abs_source):
|
||||
for file in files:
|
||||
abs_file_path = os.path.join(root, file)
|
||||
|
||||
relative_path = os.path.relpath(abs_file_path, abs_source)
|
||||
zf.write(abs_file_path, relative_path)
|
||||
|
||||
|
||||
def spotting_evaluation(prediction_list, img_metas):
|
||||
score = 0
|
||||
|
||||
submit_path = "./eval_scripts/spotting_eval/submit"
|
||||
gt_path = "./eval_scripts/spotting_eval/gt"
|
||||
submit_zip_path = "./eval_scripts/spotting_eval/submit.zip"
|
||||
gt_zip_path = "./eval_scripts/spotting_eval/gt.zip"
|
||||
for file_path in [submit_path, gt_path, submit_zip_path, gt_zip_path]:
|
||||
if "zip" in file_path:
|
||||
if os.path.exists(file_path):
|
||||
os.remove(file_path)
|
||||
else:
|
||||
if os.path.exists(file_path):
|
||||
shutil.rmtree(file_path)
|
||||
os.makedirs(file_path)
|
||||
|
||||
res_submit_list = []
|
||||
for item in prediction_list:
|
||||
if len(item) != 5:
|
||||
ipdb.set_trace()
|
||||
x1, y1, x2, y2, rec = item
|
||||
if x1 >= x2 or y1 >= y2:
|
||||
continue
|
||||
|
||||
res_submit_list.append(",".join([str(x1),str(y1),str(x2),str(y1),str(x2),str(y2),str(x1),str(y2),rec]))
|
||||
|
||||
res_gt_list = []
|
||||
for bbox, rec in zip(img_metas["bbox"], img_metas["content"]):
|
||||
x_coords = bbox[0::2]
|
||||
y_coords = bbox[1::2]
|
||||
|
||||
x1, y1 = min(x_coords), min(y_coords)
|
||||
x2, y2 = max(x_coords), max(y_coords)
|
||||
|
||||
res_gt_list.append(",".join([str(x1),str(y1),str(x2),str(y1),str(x2),str(y2),str(x1),str(y2),rec]))
|
||||
|
||||
if len(res_submit_list) == 0 or len(res_gt_list) == 0:
|
||||
return 0
|
||||
|
||||
with open(os.path.join(submit_path,"res_img_0.txt"), "w") as f:
|
||||
for item in res_submit_list[:-1]:
|
||||
f.write(item + "\n")
|
||||
f.write(res_submit_list[-1])
|
||||
|
||||
with open(os.path.join(gt_path,"gt_img_0.txt"), "w") as f:
|
||||
for item in res_gt_list[:-1]:
|
||||
f.write(item + "\n")
|
||||
f.write(res_gt_list[-1])
|
||||
|
||||
zip_folder(submit_path, submit_zip_path)
|
||||
zip_folder(gt_path, gt_zip_path)
|
||||
|
||||
command = {
|
||||
'g': gt_zip_path,
|
||||
's': submit_zip_path,
|
||||
'o': './',
|
||||
'p': '{"IOU_CONSTRAINT":0.5}'
|
||||
}
|
||||
|
||||
# run rrc_evaluation_funcs
|
||||
result = rrc_evaluation_funcs.main_evaluation(command,default_evaluation_params,validate_data,evaluate_method)
|
||||
score = result["method"]["hmean"]
|
||||
return score
|
Reference in New Issue
Block a user