IAM ReCTS
This commit is contained in:
0
datasets/__init__.py
Normal file → Executable file
0
datasets/__init__.py
Normal file → Executable file
0
datasets/__pycache__/__init__.cpython-310.pyc
Normal file → Executable file
0
datasets/__pycache__/__init__.cpython-310.pyc
Normal file → Executable file
BIN
datasets/__pycache__/__init__.cpython-38.pyc
Normal file
BIN
datasets/__pycache__/__init__.cpython-38.pyc
Normal file
Binary file not shown.
0
datasets/__pycache__/formula_dataset.cpython-310.pyc
Normal file → Executable file
0
datasets/__pycache__/formula_dataset.cpython-310.pyc
Normal file → Executable file
BIN
datasets/__pycache__/formula_dataset.cpython-38.pyc
Normal file
BIN
datasets/__pycache__/formula_dataset.cpython-38.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
datasets/__pycache__/kie_dataset.cpython-38.pyc
Normal file
BIN
datasets/__pycache__/kie_dataset.cpython-38.pyc
Normal file
Binary file not shown.
Binary file not shown.
BIN
datasets/__pycache__/ocr_dataset.cpython-38.pyc
Normal file
BIN
datasets/__pycache__/ocr_dataset.cpython-38.pyc
Normal file
Binary file not shown.
0
datasets/__pycache__/vqa_dataset.cpython-310.pyc
Normal file → Executable file
0
datasets/__pycache__/vqa_dataset.cpython-310.pyc
Normal file → Executable file
BIN
datasets/__pycache__/vqa_dataset.cpython-38.pyc
Normal file
BIN
datasets/__pycache__/vqa_dataset.cpython-38.pyc
Normal file
Binary file not shown.
0
datasets/formula_dataset.py
Normal file → Executable file
0
datasets/formula_dataset.py
Normal file → Executable file
11
datasets/kie_dataset.py
Normal file → Executable file
11
datasets/kie_dataset.py
Normal file → Executable file
@@ -7,35 +7,36 @@ class SROIEDataset(Dataset):
|
||||
self,
|
||||
dir_path= "./data/SROIE",
|
||||
):
|
||||
dir_path = dir_path+'/ann'
|
||||
self.image_list = []
|
||||
self.question_list = []
|
||||
self.answer_list = []
|
||||
for file_name in os.listdir(dir_path):
|
||||
if file_name.endswith(".txt") and '(' not in file_name:
|
||||
file_path = os.path.join(dir_path, file_name)
|
||||
img_path = file_path.replace('.txt', '.jpg')
|
||||
img_path = file_path.replace('.txt', '.jpg').replace('ann','image')
|
||||
with open(file_path) as f:
|
||||
content = f.read()
|
||||
info = json.loads(content)
|
||||
if 'company' in info.keys():
|
||||
self.question_list.append("what is the name of the company that issued this invoice?")#llava 0.12
|
||||
self.question_list.append("what is the name of the company that issued this receipt?")#llava 0.12
|
||||
#self.question_list.append("what is the company information in the image?")#llava 0.08
|
||||
self.answer_list.append(info['company'])
|
||||
self.image_list.append(img_path)
|
||||
if 'date' in info.keys():
|
||||
self.question_list.append("when was this invoice issued?")
|
||||
self.question_list.append("when was this receipt issued?")
|
||||
#self.question_list.append("what is the date information in the image?")
|
||||
self.answer_list.append(info['date'])
|
||||
self.image_list.append(img_path)
|
||||
|
||||
if 'address' in info.keys():
|
||||
self.question_list.append("where was this invoice issued?")
|
||||
self.question_list.append("where was this receipt issued?")
|
||||
#self.question_list.append("what is the address information in the image?")
|
||||
self.answer_list.append(info['address'])
|
||||
self.image_list.append(img_path)
|
||||
|
||||
if 'total' in info.keys():
|
||||
self.question_list.append("what is the total amount of this invoice?")
|
||||
self.question_list.append("what is the total amount of this receipt?")
|
||||
#self.question_list.append("what is the total information in the image?")
|
||||
self.answer_list.append(info['total'])
|
||||
self.image_list.append(img_path)
|
||||
|
68
datasets/ocr_dataset.py
Normal file → Executable file
68
datasets/ocr_dataset.py
Normal file → Executable file
@@ -1,5 +1,11 @@
|
||||
from torch.utils.data import Dataset
|
||||
import xml.etree.ElementTree as ET
|
||||
import os
|
||||
import re
|
||||
def remove_special_chars(s):
|
||||
pattern = r"[^a-zA-Z0-9\s]"
|
||||
s = re.sub(pattern, "", s)
|
||||
return s
|
||||
class ocrDataset(Dataset):
|
||||
def __init__(
|
||||
self,
|
||||
@@ -19,4 +25,64 @@ class ocrDataset(Dataset):
|
||||
answers = self.lines[idx].split()[1]
|
||||
return {
|
||||
"image_path": img_path,
|
||||
"gt_answers": answers}
|
||||
"gt_answers": answers}
|
||||
class IAMDataset(Dataset):
|
||||
def __init__(self, image_dir_path = './data/IAM') -> None:
|
||||
ann_path = image_dir_path + '/xml'
|
||||
self.images = []
|
||||
self.answers = []
|
||||
for filename in os.listdir(ann_path):
|
||||
if filename.endswith('.xml'):
|
||||
# 读取xml文件
|
||||
xml_file = os.path.join(ann_path, filename)
|
||||
tree = ET.parse(xml_file)
|
||||
root = tree.getroot()
|
||||
# 对读取的xml文件进行操作
|
||||
# 例如,输出xml文件中的所有元素
|
||||
for word in root.iter('word'):
|
||||
text = word.get('text')
|
||||
img_id = word.get('id')
|
||||
img_path = image_dir_path+'/'+filename.split('-')[0]+'/'+filename.split('.')[0]+'/'+img_id+'.png'
|
||||
text = remove_special_chars(text)
|
||||
if len(text)>0:
|
||||
self.images.append(img_path)
|
||||
self.answers.append(text)
|
||||
def __len__(self):
|
||||
return len(self.images)
|
||||
def __getitem__(self, idx):
|
||||
img_path = self.images[idx]
|
||||
answers = self.answers[idx]
|
||||
return {
|
||||
"image_path": img_path,
|
||||
"gt_answers": answers}
|
||||
class ReCTSDataset(Dataset):
|
||||
def __init__(
|
||||
self,
|
||||
dir_path= "./data/ReCTS",
|
||||
):
|
||||
self.image_dir_path = os.path.join(dir_path, 'crops')
|
||||
file_path = os.path.join(dir_path, 'test_label.txt')
|
||||
file = open(file_path, "r")
|
||||
self.lines = file.readlines()
|
||||
def __len__(self):
|
||||
return len(self.lines)
|
||||
def __getitem__(self, idx):
|
||||
image_id = self.lines[idx].split()[0]
|
||||
img_path = os.path.join(self.image_dir_path, image_id)
|
||||
answers = self.lines[idx].split()[1]
|
||||
return {
|
||||
"image_path": img_path,
|
||||
"gt_answers": answers}
|
||||
if __name__ == "__main__":
|
||||
'''data = IAMDataset('/home/zhangli/GPT4/MutimodelOCR/data/IAM')
|
||||
print(len(data))
|
||||
data = iter(data)
|
||||
batch = next(data)
|
||||
import pdb;pdb.set_trace()'''
|
||||
data = ReCTSDataset('/home/zhangli/GPT4/MutimodelOCR/data/ReCTS')
|
||||
print(len(data))
|
||||
data = iter(data)
|
||||
batch = next(data)
|
||||
print(batch)
|
||||
|
||||
|
||||
|
4
datasets/process/process_ESTVQA.py
Normal file → Executable file
4
datasets/process/process_ESTVQA.py
Normal file → Executable file
@@ -5,8 +5,8 @@ def has_chinese_characters(string):
|
||||
pattern = re.compile(r'[\u4e00-\u9fa5]')
|
||||
return bool(pattern.search(string))
|
||||
if __name__ == "__main__":
|
||||
ann_file = "/home/zhangli/GPT4/MutimodelOCR/data/ESTVQA/annotations/train.json"
|
||||
img_file = "/home/zhangli/GPT4/MutimodelOCR/data/ESTVQA/images/train"
|
||||
ann_file = "/home/zhangli/OCRData/data/TextVQA/ESTVQA/annotations/train.json"
|
||||
#img_file = "/home/zhangli/GPT4/MutimodelOCR/data/ESTVQA/images/train"
|
||||
cn_list = []
|
||||
en_list= []
|
||||
with open(ann_file,'r') as f:
|
||||
|
0
datasets/process/process_FUNSD.py
Normal file → Executable file
0
datasets/process/process_FUNSD.py
Normal file → Executable file
23
datasets/process/process_ReCTS.py
Normal file
23
datasets/process/process_ReCTS.py
Normal file
@@ -0,0 +1,23 @@
|
||||
import re
|
||||
import os
|
||||
def has_chinese_characters(string):
|
||||
pattern = re.compile(r'[\u4e00-\u9fa5]')
|
||||
return bool(pattern.search(string))
|
||||
def is_all_chinese(text):
|
||||
"""
|
||||
判断一个字符串是否仅仅包含中文
|
||||
"""
|
||||
pattern = re.compile(r'^[\u4e00-\u9fa5]+$')
|
||||
return pattern.match(text) is not None
|
||||
if __name__ =='__main__':
|
||||
file_path = "/home/zhangli/GPT4/MutimodelOCR/data/ReCTS/annotation.txt"
|
||||
out = open("/home/zhangli/GPT4/MutimodelOCR/data/ReCTS/ann.txt",'w')
|
||||
with open(file_path, 'r') as file:
|
||||
data = file.readlines()
|
||||
for line in data:
|
||||
text = line.strip().split()[1]
|
||||
path = os.path.join("/home/zhangli/GPT4/MutimodelOCR/data/ReCTS/crops",line.strip().split()[0])
|
||||
if is_all_chinese(text) and os.path.exists(path):
|
||||
out.write(line.strip())
|
||||
out.write('\n')
|
||||
out.close()
|
0
datasets/vqa_dataset.py
Normal file → Executable file
0
datasets/vqa_dataset.py
Normal file → Executable file
Reference in New Issue
Block a user