IAM ReCTS

This commit is contained in:
echo840
2023-06-09 10:29:18 +08:00
parent 3c59897aa6
commit e22b12b169
185 changed files with 294244 additions and 22 deletions

0
datasets/__init__.py Normal file → Executable file
View File

0
datasets/__pycache__/__init__.cpython-310.pyc Normal file → Executable file
View File

Binary file not shown.

0
datasets/__pycache__/formula_dataset.cpython-310.pyc Normal file → Executable file
View File

Binary file not shown.

Binary file not shown.

Binary file not shown.

0
datasets/__pycache__/vqa_dataset.cpython-310.pyc Normal file → Executable file
View File

Binary file not shown.

0
datasets/formula_dataset.py Normal file → Executable file
View File

11
datasets/kie_dataset.py Normal file → Executable file
View File

@@ -7,35 +7,36 @@ class SROIEDataset(Dataset):
self,
dir_path= "./data/SROIE",
):
dir_path = dir_path+'/ann'
self.image_list = []
self.question_list = []
self.answer_list = []
for file_name in os.listdir(dir_path):
if file_name.endswith(".txt") and '(' not in file_name:
file_path = os.path.join(dir_path, file_name)
img_path = file_path.replace('.txt', '.jpg')
img_path = file_path.replace('.txt', '.jpg').replace('ann','image')
with open(file_path) as f:
content = f.read()
info = json.loads(content)
if 'company' in info.keys():
self.question_list.append("what is the name of the company that issued this invoice?")#llava 0.12
self.question_list.append("what is the name of the company that issued this receipt?")#llava 0.12
#self.question_list.append("what is the company information in the image?")#llava 0.08
self.answer_list.append(info['company'])
self.image_list.append(img_path)
if 'date' in info.keys():
self.question_list.append("when was this invoice issued?")
self.question_list.append("when was this receipt issued?")
#self.question_list.append("what is the date information in the image?")
self.answer_list.append(info['date'])
self.image_list.append(img_path)
if 'address' in info.keys():
self.question_list.append("where was this invoice issued?")
self.question_list.append("where was this receipt issued?")
#self.question_list.append("what is the address information in the image?")
self.answer_list.append(info['address'])
self.image_list.append(img_path)
if 'total' in info.keys():
self.question_list.append("what is the total amount of this invoice?")
self.question_list.append("what is the total amount of this receipt?")
#self.question_list.append("what is the total information in the image?")
self.answer_list.append(info['total'])
self.image_list.append(img_path)

68
datasets/ocr_dataset.py Normal file → Executable file
View File

@@ -1,5 +1,11 @@
from torch.utils.data import Dataset
import xml.etree.ElementTree as ET
import os
import re
def remove_special_chars(s):
pattern = r"[^a-zA-Z0-9\s]"
s = re.sub(pattern, "", s)
return s
class ocrDataset(Dataset):
def __init__(
self,
@@ -19,4 +25,64 @@ class ocrDataset(Dataset):
answers = self.lines[idx].split()[1]
return {
"image_path": img_path,
"gt_answers": answers}
"gt_answers": answers}
class IAMDataset(Dataset):
def __init__(self, image_dir_path = './data/IAM') -> None:
ann_path = image_dir_path + '/xml'
self.images = []
self.answers = []
for filename in os.listdir(ann_path):
if filename.endswith('.xml'):
# 读取xml文件
xml_file = os.path.join(ann_path, filename)
tree = ET.parse(xml_file)
root = tree.getroot()
# 对读取的xml文件进行操作
# 例如输出xml文件中的所有元素
for word in root.iter('word'):
text = word.get('text')
img_id = word.get('id')
img_path = image_dir_path+'/'+filename.split('-')[0]+'/'+filename.split('.')[0]+'/'+img_id+'.png'
text = remove_special_chars(text)
if len(text)>0:
self.images.append(img_path)
self.answers.append(text)
def __len__(self):
return len(self.images)
def __getitem__(self, idx):
img_path = self.images[idx]
answers = self.answers[idx]
return {
"image_path": img_path,
"gt_answers": answers}
class ReCTSDataset(Dataset):
def __init__(
self,
dir_path= "./data/ReCTS",
):
self.image_dir_path = os.path.join(dir_path, 'crops')
file_path = os.path.join(dir_path, 'test_label.txt')
file = open(file_path, "r")
self.lines = file.readlines()
def __len__(self):
return len(self.lines)
def __getitem__(self, idx):
image_id = self.lines[idx].split()[0]
img_path = os.path.join(self.image_dir_path, image_id)
answers = self.lines[idx].split()[1]
return {
"image_path": img_path,
"gt_answers": answers}
if __name__ == "__main__":
'''data = IAMDataset('/home/zhangli/GPT4/MutimodelOCR/data/IAM')
print(len(data))
data = iter(data)
batch = next(data)
import pdb;pdb.set_trace()'''
data = ReCTSDataset('/home/zhangli/GPT4/MutimodelOCR/data/ReCTS')
print(len(data))
data = iter(data)
batch = next(data)
print(batch)

4
datasets/process/process_ESTVQA.py Normal file → Executable file
View File

@@ -5,8 +5,8 @@ def has_chinese_characters(string):
pattern = re.compile(r'[\u4e00-\u9fa5]')
return bool(pattern.search(string))
if __name__ == "__main__":
ann_file = "/home/zhangli/GPT4/MutimodelOCR/data/ESTVQA/annotations/train.json"
img_file = "/home/zhangli/GPT4/MutimodelOCR/data/ESTVQA/images/train"
ann_file = "/home/zhangli/OCRData/data/TextVQA/ESTVQA/annotations/train.json"
#img_file = "/home/zhangli/GPT4/MutimodelOCR/data/ESTVQA/images/train"
cn_list = []
en_list= []
with open(ann_file,'r') as f:

0
datasets/process/process_FUNSD.py Normal file → Executable file
View File

View File

@@ -0,0 +1,23 @@
import re
import os
def has_chinese_characters(string):
pattern = re.compile(r'[\u4e00-\u9fa5]')
return bool(pattern.search(string))
def is_all_chinese(text):
"""
判断一个字符串是否仅仅包含中文
"""
pattern = re.compile(r'^[\u4e00-\u9fa5]+$')
return pattern.match(text) is not None
if __name__ =='__main__':
file_path = "/home/zhangli/GPT4/MutimodelOCR/data/ReCTS/annotation.txt"
out = open("/home/zhangli/GPT4/MutimodelOCR/data/ReCTS/ann.txt",'w')
with open(file_path, 'r') as file:
data = file.readlines()
for line in data:
text = line.strip().split()[1]
path = os.path.join("/home/zhangli/GPT4/MutimodelOCR/data/ReCTS/crops",line.strip().split()[0])
if is_all_chinese(text) and os.path.exists(path):
out.write(line.strip())
out.write('\n')
out.close()

0
datasets/vqa_dataset.py Normal file → Executable file
View File